diff options
| -rw-r--r-- | test/helper.py | 22 | ||||
| -rw-r--r-- | test/test_age_restriction.py | 53 | ||||
| -rw-r--r-- | test/test_download.py | 23 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 6 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornotube.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/viddler.py | 64 | ||||
| -rw-r--r-- | youtube_dl/extractor/youporn.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 3 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
14 files changed, 177 insertions, 27 deletions
| diff --git a/test/helper.py b/test/helper.py index 8e641e3cb..884cf32dc 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,3 +1,4 @@ +import errno  import io  import json  import os.path @@ -22,18 +23,33 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:      parameters = json.load(pf) + +def try_rm(filename): +    """ Remove a file if it exists """ +    try: +        os.remove(filename) +    except OSError as ose: +        if ose.errno != errno.ENOENT: +            raise + +  class FakeYDL(YoutubeDL):      def __init__(self): -        self.result = []          # Different instances of the downloader can't share the same dictionary          # some test set the "sublang" parameter, which would break the md5 checks. -        self.params = dict(parameters) -    def to_screen(self, s): +        params = dict(parameters) +        super(FakeYDL, self).__init__(params) +        self.result = [] +         +    def to_screen(self, s, skip_eol=None):          print(s) +      def trouble(self, s, tb=None):          raise Exception(s) +      def download(self, x):          self.result.append(x) +      def expect_warning(self, regex):          # Silence an expected warning matching a regex          old_report_warning = self.report_warning diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..943f9a315 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from helper import try_rm + + +def _download_restricted(url, filename, age): +    """ Returns true iff the file has been downloaded """ + +    params = { +        'age_limit': age, +        'skip_download': True, +        'writeinfojson': True, +        "outtmpl": "%(id)s.%(ext)s", +    } +    ydl = YoutubeDL(params) +    ydl.add_default_info_extractors() +    json_filename = filename + '.info.json' +    try_rm(json_filename) +    ydl.download([url]) +    res = os.path.exists(json_filename) +    try_rm(json_filename) +    return res + + +class TestAgeRestriction(unittest.TestCase): +    def _assert_restricted(self, url, filename, age, old_age=None): +        self.assertTrue(_download_restricted(url, filename, old_age)) +        self.assertFalse(_download_restricted(url, filename, age)) + +    def test_youtube(self): +        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + +    def test_youporn(self): +        self._assert_restricted( +            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', +            '505835.mp4', 2, old_age=25) + +    def test_pornotube(self): +        self._assert_restricted( +            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', +            '1689755.flv', 13) + + +if __name__ == '__main__': +    unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 23a66254d..23d3853c4 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,6 +1,5 @@  #!/usr/bin/env python -import errno  import hashlib  import io  import os @@ -28,14 +27,6 @@ opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, You  compat_urllib_request.install_opener(opener)  socket.setdefaulttimeout(10) -def _try_rm(filename): -    """ Remove a file if it exists """ -    try: -        os.remove(filename) -    except OSError as ose: -        if ose.errno != errno.ENOENT: -            raise -  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()  class YoutubeDL(youtube_dl.YoutubeDL): @@ -54,7 +45,7 @@ def _file_md5(fn):      with open(fn, 'rb') as f:          return hashlib.md5(f.read()).hexdigest() -from helper import get_testcases +from helper import get_testcases, try_rm  defs = get_testcases()  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -97,9 +88,9 @@ def generator(test_case):          test_cases = test_case.get('playlist', [test_case])          for tc in test_cases: -            _try_rm(tc['file']) -            _try_rm(tc['file'] + '.part') -            _try_rm(tc['file'] + '.info.json') +            try_rm(tc['file']) +            try_rm(tc['file'] + '.part') +            try_rm(tc['file'] + '.info.json')          try:              for retry in range(1, RETRIES + 1):                  try: @@ -145,9 +136,9 @@ def generator(test_case):                      self.assertTrue(key in info_dict.keys() and info_dict[key])          finally:              for tc in test_cases: -                _try_rm(tc['file']) -                _try_rm(tc['file'] + '.part') -                _try_rm(tc['file'] + '.info.json') +                try_rm(tc['file']) +                try_rm(tc['file'] + '.part') +                try_rm(tc['file'] + '.info.json')      return test_template diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 856e9ac92..073a3837c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -85,6 +85,8 @@ class YoutubeDL(object):      cachedir:          Location of the cache files in the filesystem.                         None to disable filesystem cache.      noplaylist:        Download single video instead of a playlist if in doubt. +    age_limit:         An integer representing the user's age in years. +                       Unsuitable videos for the given age are skipped.      downloadarchive:   File name of a file where all downloads are recorded.                         Videos already present in the file are not downloaded                         again. @@ -313,6 +315,10 @@ class YoutubeDL(object):              dateRange = self.params.get('daterange', DateRange())              if date not in dateRange:                  return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) +        age_limit = self.params.get('age_limit') +        if age_limit is not None: +            if age_limit < info_dict.get('age_limit', 0): +                return u'Skipping "' + title + '" because it is age restricted'          if self.in_download_archive(info_dict):              return (u'%(title)s has already been recorded in archive'                      % info_dict) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a680d7c55..ba5206387 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):      selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)      selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)      selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) +    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', +                         help='download only videos suitable for the given age', +                         default=None, type=int)      selection.add_option('--download-archive', metavar='FILE',                           dest='download_archive',                           help='Download only videos not present in the archive file. Record all downloaded videos in it.') @@ -634,6 +637,7 @@ def _real_main(argv=None):          'daterange': date,          'cachedir': opts.cachedir,          'youtube_print_sig_code': opts.youtube_print_sig_code, +        'age_limit': opts.age_limit,          'download_archive': opts.download_archive,          }) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d1b7e5f99..2b054e1c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -117,6 +117,7 @@ from .veehd import VeeHDIE  from .veoh import VeohIE  from .vevo import VevoIE  from .vice import ViceIE +from .viddler import ViddlerIE  from .videofyme import VideofyMeIE  from .vimeo import VimeoIE, VimeoChannelIE  from .vine import VineIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69cdcdc1b..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,7 @@ class InfoExtractor(object):      view_count:     How many users have watched the video on the platform.      urlhandle:      [internal] The urlHandle to be used to download the file,                      like returned by urllib.request.urlopen +    age_limit:      Age restriction for the video, as an integer (years)      formats:        A list of dictionaries for each format available, it must                      be ordered from worst to best quality. Potential fields:                      * url       Mandatory. The URL of the video file @@ -318,6 +319,15 @@ class InfoExtractor(object):                                          self._og_regex('video')],                                         html, name, **kargs) +    def _rta_search(self, html): +        # See http://www.rtalabel.org/index.php?content=howtofaq#single +        if re.search(r'(?ix)<meta\s+name="rating"\s+' +                     r'     content="RTA-5042-1996-1400-1577-RTA"', +                     html): +            return 18 +        return 0 + +  class SearchInfoExtractor(InfoExtractor):      """      Base class for paged search queries extractors. diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764070635..7060c6f92 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):          except ValueError:              # since this is the last-resort InfoExtractor, if              # this error is thrown, it'll be thrown here -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Failed to download URL: %s' % url)          self.report_extraction(video_id)          # Look for BrightCove: @@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):              # HTML5 video              mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Unsupported URL: %s' % url)          # It's possible that one of the regexes          # matched, but returned an empty group:          if mobj.group(1) is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Did not find a valid video URL at %s' % url)          video_url = mobj.group(1)          video_url = compat_urlparse.urljoin(url, video_url) diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index add76a11e..5d770ec28 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):          VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'          upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)          if upload_date: upload_date = unified_strdate(upload_date) +        age_limit = self._rta_search(webpage)          info = {'id': video_id,                  'url': video_url, @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):                  'upload_date': upload_date,                  'title': video_title,                  'ext': 'flv', -                'format': 'flv'} +                'format': 'flv', +                'age_limit': age_limit}          return [info] diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py new file mode 100644 index 000000000..12c84a985 --- /dev/null +++ b/youtube_dl/extractor/viddler.py @@ -0,0 +1,64 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +) + + +class ViddlerIE(InfoExtractor): +    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)' +    _TEST = { +        u"url": u"http://www.viddler.com/v/43903784", +        u'file': u'43903784.mp4', +        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', +        u'info_dict': { +            u"title": u"Video Made Easy", +            u"uploader": u"viddler", +            u"duration": 100.89, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        embed_url = mobj.group('domain') + u'/embed/' + video_id +        webpage = self._download_webpage(embed_url, video_id) + +        video_sources_code = self._search_regex( +            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') +        video_sources = json.loads(video_sources_code.replace("'", '"')) + +        formats = [{ +            'url': video_url, +            'format': format_id, +        } for video_url, format_id in video_sources.items()] + +        title = self._html_search_regex( +            r"title\s*:\s*'([^']*)'", webpage, u'title') +        uploader = self._html_search_regex( +            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) +        duration_s = self._html_search_regex( +            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) +        duration = float(duration_s) if duration_s else None +        thumbnail = self._html_search_regex( +            r"thumbnail\s*:\s*'([^']*)'", +            webpage, u'thumbnail', fatal=False) + +        info = { +            '_type': 'video', +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'duration': duration, +            'formats': formats, +        } + +        # TODO: Remove when #980 has been merged +        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) +        info.update(info['formats'][-1]) + +        return info diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..b1f93dd1b 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):          req = compat_urllib_request.Request(url)          req.add_header('Cookie', 'age_verified=1')          webpage = self._download_webpage(req, video_id) +        age_limit = self._rta_search(webpage)          # Get JSON parameters          json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):                  'ext': extension,                  'format': format,                  'thumbnail': thumbnail, -                'description': video_description +                'description': video_description, +                'age_limit': age_limit,              })          if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1101011ea..b02ae2572 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'description':  video_description,                  'player_url':   player_url,                  'subtitles':    video_subtitles, -                'duration':     video_duration +                'duration':     video_duration, +                'age_limit':    18 if age_gate else 0,              })          return results diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a463049a4..de2654762 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -175,7 +175,7 @@ def compat_ord(c):  compiled_regex_type = type(re.compile(''))  std_headers = { -    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', +    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      'Accept-Encoding': 'gzip, deflate', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e773e82da..08eda2197 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.04' +__version__ = '2013.10.06' | 
