diff options
42 files changed, 865 insertions, 210 deletions
diff --git a/.gitignore b/.gitignore index 24fdb3626..7dd0ad09b 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ updates_key.pem  *.mp4  *.part  test/testdata +.tox @@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like.      --datebefore DATE          download only videos uploaded before this date      --dateafter DATE           download only videos uploaded after this date      --no-playlist              download only the currently playing video +    --age-limit YEARS          download only videos suitable for the given age +    --download-archive FILE    Download only videos not present in the archive +                               file. Record all downloaded videos in it.  ## Download Options:      -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 33f242480..153e15c8a 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -16,10 +16,11 @@ def main():      ie_htmls = []      for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):          ie_html = '<b>{}</b>'.format(ie.IE_NAME) -        try: +        ie_desc = getattr(ie, 'IE_DESC', None) +        if ie_desc is False: +            continue +        elif ie_desc is not None:              ie_html += ': {}'.format(ie.IE_DESC) -        except AttributeError: -            pass          if ie.working() == False:              ie_html += ' (Currently broken)'          ie_htmls.append('<li>{}</li>'.format(ie_html)) diff --git a/test/helper.py b/test/helper.py index a2b468b50..ad1b74dd3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,6 +1,9 @@ +import errno  import io  import json  import os.path +import re +import types  import youtube_dl.extractor  from youtube_dl import YoutubeDL, YoutubeDLHandler @@ -9,30 +12,47 @@ from youtube_dl.utils import (      compat_urllib_request,  ) -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) +youtube_dl._setup_opener(timeout=10)  PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:      parameters = json.load(pf) + +def try_rm(filename): +    """ Remove a file if it exists """ +    try: +        os.remove(filename) +    except OSError as ose: +        if ose.errno != errno.ENOENT: +            raise + +  class FakeYDL(YoutubeDL):      def __init__(self): -        self.result = []          # Different instances of the downloader can't share the same dictionary          # some test set the "sublang" parameter, which would break the md5 checks. -        self.params = dict(parameters) -    def to_screen(self, s): +        params = dict(parameters) +        super(FakeYDL, self).__init__(params) +        self.result = [] +         +    def to_screen(self, s, skip_eol=None):          print(s) +      def trouble(self, s, tb=None):          raise Exception(s) +      def download(self, x):          self.result.append(x) +    def expect_warning(self, regex): +        # Silence an expected warning matching a regex +        old_report_warning = self.report_warning +        def report_warning(self, message): +            if re.match(regex, message): return +            old_report_warning(message) +        self.report_warning = types.MethodType(report_warning, self) +  def get_testcases():      for ie in youtube_dl.extractor.gen_extractors():          t = getattr(ie, '_TEST', None) diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..943f9a315 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from helper import try_rm + + +def _download_restricted(url, filename, age): +    """ Returns true iff the file has been downloaded """ + +    params = { +        'age_limit': age, +        'skip_download': True, +        'writeinfojson': True, +        "outtmpl": "%(id)s.%(ext)s", +    } +    ydl = YoutubeDL(params) +    ydl.add_default_info_extractors() +    json_filename = filename + '.info.json' +    try_rm(json_filename) +    ydl.download([url]) +    res = os.path.exists(json_filename) +    try_rm(json_filename) +    return res + + +class TestAgeRestriction(unittest.TestCase): +    def _assert_restricted(self, url, filename, age, old_age=None): +        self.assertTrue(_download_restricted(url, filename, old_age)) +        self.assertFalse(_download_restricted(url, filename, age)) + +    def test_youtube(self): +        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + +    def test_youporn(self): +        self._assert_restricted( +            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', +            '505835.mp4', 2, old_age=25) + +    def test_pornotube(self): +        self._assert_restricted( +            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', +            '1689755.flv', 13) + + +if __name__ == '__main__': +    unittest.main() diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index 83c65d57e..ed2ad311d 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -2,8 +2,6 @@  import sys  import unittest -import json -import io  import hashlib  # Allow direct execution @@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase):          subtitles = self.getSubtitles()          self.assertEqual(len(subtitles.keys()), 5)      def test_list_subtitles(self): +        self.DL.expect_warning(u'Automatic Captions not supported by this server')          self.DL.params['listsubtitles'] = True          info_dict = self.getInfoDict()          self.assertEqual(info_dict, None)      def test_automatic_captions(self): +        self.DL.expect_warning(u'Automatic Captions not supported by this server')          self.DL.params['writeautomaticsub'] = True          self.DL.params['subtitleslang'] = ['en']          subtitles = self.getSubtitles()          self.assertTrue(len(subtitles.keys()) == 0)      def test_nosubtitles(self): +        self.DL.expect_warning(u'video doesn\'t have subtitles')          self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'          self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True diff --git a/test/test_download.py b/test/test_download.py index 23a66254d..fdf59bb5c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,6 +1,5 @@  #!/usr/bin/env python -import errno  import hashlib  import io  import os @@ -20,22 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para  RETRIES = 3 -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) -socket.setdefaulttimeout(10) - -def _try_rm(filename): -    """ Remove a file if it exists """ -    try: -        os.remove(filename) -    except OSError as ose: -        if ose.errno != errno.ENOENT: -            raise -  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()  class YoutubeDL(youtube_dl.YoutubeDL): @@ -54,7 +37,8 @@ def _file_md5(fn):      with open(fn, 'rb') as f:          return hashlib.md5(f.read()).hexdigest() -from helper import get_testcases +import helper  # Set up remaining global configuration +from helper import get_testcases, try_rm  defs = get_testcases()  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -97,9 +81,9 @@ def generator(test_case):          test_cases = test_case.get('playlist', [test_case])          for tc in test_cases: -            _try_rm(tc['file']) -            _try_rm(tc['file'] + '.part') -            _try_rm(tc['file'] + '.info.json') +            try_rm(tc['file']) +            try_rm(tc['file'] + '.part') +            try_rm(tc['file'] + '.info.json')          try:              for retry in range(1, RETRIES + 1):                  try: @@ -145,9 +129,9 @@ def generator(test_case):                      self.assertTrue(key in info_dict.keys() and info_dict[key])          finally:              for tc in test_cases: -                _try_rm(tc['file']) -                _try_rm(tc['file'] + '.part') -                _try_rm(tc['file'] + '.info.json') +                try_rm(tc['file']) +                try_rm(tc['file'] + '.part') +                try_rm(tc['file'] + '.info.json')      return test_template diff --git a/test/test_playlists.py b/test/test_playlists.py index c33511333..de8bd298a 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -16,6 +16,7 @@ from youtube_dl.extractor import (      UstreamChannelIE,      SoundcloudUserIE,      LivestreamIE, +    NHLVideocenterIE,  )  from youtube_dl.utils import * @@ -74,5 +75,14 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], u'TEDCity2.0 (English)')          self.assertTrue(len(result['entries']) >= 4) +    def test_nhl_videocenter(self): +        dl = FakeYDL() +        ie = NHLVideocenterIE(dl) +        result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'999') +        self.assertEqual(result['title'], u'Highlights') +        self.assertEqual(len(result['entries']), 12) +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 168e6c66c..f9b0c1ad0 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -2,8 +2,6 @@  import sys  import unittest -import json -import io  import hashlib  # Allow direct execution @@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase):          subtitles = self.getSubtitles()          self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')      def test_youtube_list_subtitles(self): +        self.DL.expect_warning(u'Video doesn\'t have automatic captions')          self.DL.params['listsubtitles'] = True          info_dict = self.getInfoDict()          self.assertEqual(info_dict, None) @@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase):          subtitles = self.getSubtitles()          self.assertTrue(subtitles['it'] is not None)      def test_youtube_nosubtitles(self): +        self.DL.expect_warning(u'video doesn\'t have subtitles')          self.url = 'sAjKT8FhjI8'          self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..53b461fdb --- /dev/null +++ b/tox.ini @@ -0,0 +1,5 @@ +[tox] +envlist = py26,py27,py33 +[testenv] +deps = nose +commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index d6673fd3a..8ecabab1a 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -270,6 +270,7 @@ class FileDownloader(object):      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):          self.report_destination(filename)          tmpfilename = self.temp_name(filename) +        test = self.params.get('test', False)          # Check for rtmpdump first          try: @@ -291,6 +292,8 @@ class FileDownloader(object):              basic_args += ['--playpath', play_path]          if tc_url is not None:              basic_args += ['--tcUrl', url] +        if test: +            basic_args += ['--stop', '1']          args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]          if self.params.get('verbose', False):              try: @@ -300,7 +303,7 @@ class FileDownloader(object):                  shell_quote = repr              self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))          retval = subprocess.call(args) -        while retval == 2 or retval == 1: +        while (retval == 2 or retval == 1) and not test:              prevsize = os.path.getsize(encodeFilename(tmpfilename))              self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)              time.sleep(5.0) # This seems to be needed @@ -313,7 +316,7 @@ class FileDownloader(object):                  self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')                  retval = 0                  break -        if retval == 0: +        if retval == 0 or (test and retval == 2):              fsize = os.path.getsize(encodeFilename(tmpfilename))              self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)              self.try_rename(tmpfilename, filename) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 3ee1d3c58..fbf8a7f98 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -2,6 +2,7 @@ import os  import subprocess  import sys  import time +import datetime  from .utils import * @@ -467,3 +468,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):          os.rename(encodeFilename(temp_filename), encodeFilename(filename))          return True, information + + +class FFmpegMetadataPP(FFmpegPostProcessor): +    def run(self, info): +        metadata = {} +        if info.get('title') is not None: +            metadata['title'] = info['title'] +        if info.get('upload_date') is not None: +            metadata['date'] = info['upload_date'] +        if info.get('uploader') is not None: +            metadata['artist'] = info['uploader'] +        elif info.get('uploader_id') is not None: +            metadata['artist'] = info['uploader_id'] + +        if not metadata: +            self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add') +            return True, info + +        filename = info['filepath'] +        ext = os.path.splitext(filename)[1][1:] +        temp_filename = filename + u'.temp' + +        options = ['-c', 'copy'] +        for (name, value) in metadata.items(): +            options.extend(['-metadata', '%s="%s"' % (name, value)]) +        options.extend(['-f', ext]) + +        self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) +        self.run_ffmpeg(filename, temp_filename, options) +        os.remove(encodeFilename(filename)) +        os.rename(encodeFilename(temp_filename), encodeFilename(filename)) +        return True, info diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..e85e03fa4 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@  from __future__ import absolute_import +import errno  import io  import os  import re @@ -84,6 +85,11 @@ class YoutubeDL(object):      cachedir:          Location of the cache files in the filesystem.                         None to disable filesystem cache.      noplaylist:        Download single video instead of a playlist if in doubt. +    age_limit:         An integer representing the user's age in years. +                       Unsuitable videos for the given age are skipped. +    downloadarchive:   File name of a file where all downloads are recorded. +                       Videos already present in the file are not downloaded +                       again.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -113,7 +119,7 @@ class YoutubeDL(object):                  and not params['restrictfilenames']):              # On Python 3, the Unicode filesystem API will throw errors (#1474)              self.report_warning( -                u'Assuming --restrict-filenames isnce file system encoding ' +                u'Assuming --restrict-filenames since file system encoding '                  u'cannot encode all charactes. '                  u'Set the LC_ALL environment variable to fix this.')              params['restrictfilenames'] = True @@ -309,6 +315,13 @@ class YoutubeDL(object):              dateRange = self.params.get('daterange', DateRange())              if date not in dateRange:                  return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) +        age_limit = self.params.get('age_limit') +        if age_limit is not None: +            if age_limit < info_dict.get('age_limit', 0): +                return u'Skipping "' + title + '" because it is age restricted' +        if self.in_download_archive(info_dict): +            return (u'%(title)s has already been recorded in archive' +                    % info_dict)          return None      def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +591,8 @@ class YoutubeDL(object):                      self.report_error(u'postprocessing: %s' % str(err))                      return +        self.record_download_archive(info_dict) +      def download(self, url_list):          """Download a given list of URLs."""          if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +632,26 @@ class YoutubeDL(object):                  os.remove(encodeFilename(filename))              except (IOError, OSError):                  self.report_warning(u'Unable to remove downloaded video file') + +    def in_download_archive(self, info_dict): +        fn = self.params.get('download_archive') +        if fn is None: +            return False +        vid_id = info_dict['extractor'] + u' ' + info_dict['id'] +        try: +            with locked_file(fn, 'r', encoding='utf-8') as archive_file: +                for line in archive_file: +                    if line.strip() == vid_id: +                        return True +        except IOError as ioe: +            if ioe.errno != errno.ENOENT: +                raise +        return False + +    def record_download_archive(self, info_dict): +        fn = self.params.get('download_archive') +        if fn is None: +            return +        vid_id = info_dict['extractor'] + u' ' + info_dict['id'] +        with locked_file(fn, 'a', encoding='utf-8') as archive_file: +            archive_file.write(vid_id + u'\n') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..3513d719f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -36,6 +36,7 @@ __authors__  = (  __license__ = 'Public Domain'  import codecs +import collections  import getpass  import optparse  import os @@ -188,6 +189,12 @@ def parseOpts(overrideArguments=None):      selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)      selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)      selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) +    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', +                         help='download only videos suitable for the given age', +                         default=None, type=int) +    selection.add_option('--download-archive', metavar='FILE', +                         dest='download_archive', +                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')      authentication.add_option('-u', '--username', @@ -351,6 +358,8 @@ def parseOpts(overrideArguments=None):              help='do not overwrite post-processed files; the post-processed files are overwritten by default')      postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,              help='embed subtitles in the video (only for mp4 videos)') +    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, +            help='add metadata to the files')      parser.add_option_group(general) @@ -441,27 +450,7 @@ def _real_main(argv=None):      all_urls = batchurls + args      all_urls = [url.strip() for url in all_urls] -    # General configuration -    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -    if opts.proxy is not None: -        if opts.proxy == '': -            proxies = {} -        else: -            proxies = {'http': opts.proxy, 'https': opts.proxy} -    else: -        proxies = compat_urllib_request.getproxies() -        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) -        if 'http' in proxies and 'https' not in proxies: -            proxies['https'] = proxies['http'] -    proxy_handler = compat_urllib_request.ProxyHandler(proxies) -    https_handler = make_HTTPS_handler(opts) -    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) -    # Delete the default user-agent header, which would otherwise apply in -    # cases where our custom HTTP handler doesn't come into play -    # (See https://github.com/rg3/youtube-dl/issues/1309 for details) -    opener.addheaders =[] -    compat_urllib_request.install_opener(opener) -    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) +    opener = _setup_opener(jar=jar, opts=opts)      extractors = gen_extractors() @@ -478,6 +467,8 @@ def _real_main(argv=None):              if not ie._WORKING:                  continue              desc = getattr(ie, 'IE_DESC', ie.IE_NAME) +            if desc is False: +                continue              if hasattr(ie, 'SEARCH_KEY'):                  _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')                  _COUNTS = (u'', u'5', u'10', u'all') @@ -631,6 +622,8 @@ def _real_main(argv=None):          'daterange': date,          'cachedir': opts.cachedir,          'youtube_print_sig_code': opts.youtube_print_sig_code, +        'age_limit': opts.age_limit, +        'download_archive': opts.download_archive,          })      if opts.verbose: @@ -650,11 +643,19 @@ def _real_main(argv=None):              except:                  pass          write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') -        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') + +        proxy_map = {} +        for handler in opener.handlers: +            if hasattr(handler, 'proxies'): +                proxy_map.update(handler.proxies) +        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')      ydl.add_default_info_extractors()      # PostProcessors +    # Add the metadata pp first, the other pps will copy it +    if opts.addmetadata: +        ydl.add_post_processor(FFmpegMetadataPP())      if opts.extractaudio:          ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))      if opts.recodevideo: @@ -688,6 +689,37 @@ def _real_main(argv=None):      sys.exit(retcode) + +def _setup_opener(jar=None, opts=None, timeout=300): +    if opts is None: +        FakeOptions = collections.namedtuple( +            'FakeOptions', ['proxy', 'no_check_certificate']) +        opts = FakeOptions(proxy=None, no_check_certificate=False) + +    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +    if opts.proxy is not None: +        if opts.proxy == '': +            proxies = {} +        else: +            proxies = {'http': opts.proxy, 'https': opts.proxy} +    else: +        proxies = compat_urllib_request.getproxies() +        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) +        if 'http' in proxies and 'https' not in proxies: +            proxies['https'] = proxies['http'] +    proxy_handler = compat_urllib_request.ProxyHandler(proxies) +    https_handler = make_HTTPS_handler(opts) +    opener = compat_urllib_request.build_opener( +        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) +    # Delete the default user-agent header, which would otherwise apply in +    # cases where our custom HTTP handler doesn't come into play +    # (See https://github.com/rg3/youtube-dl/issues/1309 for details) +    opener.addheaders = [] +    compat_urllib_request.install_opener(opener) +    socket.setdefaulttimeout(timeout) +    return opener + +  def main(argv=None):      try:          _real_main(argv) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db30edc27..226c3a762 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -35,6 +35,7 @@ from .eighttracks import EightTracksIE  from .escapist import EscapistIE  from .exfm import ExfmIE  from .facebook import FacebookIE +from .faz import FazIE  from .fktv import (      FKTVIE,      FKTVPosteckeIE, @@ -81,6 +82,7 @@ from .naver import NaverIE  from .nba import NBAIE  from .nbc import NBCNewsIE  from .newgrounds import NewgroundsIE +from .nhl import NHLIE, NHLVideocenterIE  from .ooyala import OoyalaIE  from .orf import ORFIE  from .pbs import PBSIE @@ -118,6 +120,7 @@ from .veehd import VeeHDIE  from .veoh import VeohIE  from .vevo import VevoIE  from .vice import ViceIE +from .viddler import ViddlerIE  from .videofyme import VideofyMeIE  from .vimeo import VimeoIE, VimeoChannelIE  from .vine import VineIE @@ -141,6 +144,7 @@ from .youtube import (      YoutubeShowIE,      YoutubeSubscriptionsIE,      YoutubeRecommendedIE, +    YoutubeTruncatedURLIE,      YoutubeWatchLaterIE,      YoutubeFavouritesIE,  ) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 69b3b0ad7..4707d7cca 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -109,17 +109,27 @@ class ArteTvIE(InfoExtractor):              return any(re.match(r, f['versionCode']) for r in regexes)          # Some formats may not be in the same language as the url          formats = filter(_match_lang, formats) +        # Some formats use the m3u8 protocol +        formats = filter(lambda f: f['videoFormat'] != 'M3U8', formats)          # We order the formats by quality          formats = sorted(formats, key=lambda f: int(f['height']))          # Prefer videos without subtitles in the same language          formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)          # Pick the best quality -        format_info = formats[-1] -        if format_info['mediaType'] == u'rtmp': -            info_dict['url'] = format_info['streamer'] -            info_dict['play_path'] = 'mp4:' + format_info['url'] -        else: -            info_dict['url'] = format_info['url'] +        def _format(format_info): +            info = {'ext': 'flv', +                    'width': format_info.get('width'), +                    'height': format_info.get('height'), +                    } +            if format_info['mediaType'] == u'rtmp': +                info['url'] = format_info['streamer'] +                info['play_path'] = 'mp4:' + format_info['url'] +            else: +                info_dict['url'] = format_info['url'] +            return info +        info_dict['formats'] = [_format(f) for f in formats] +        # TODO: Remove when #980 has been merged  +        info_dict.update(info_dict['formats'][-1])          return info_dict diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 08b28c994..493504f75 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor):                  ext = umobj.group(1)                  info = { -                    'id': data['item_id'], +                    'id': compat_str(data['item_id']),                      'url': video_url,                      'uploader': data['display_name'],                      'upload_date': upload_date, diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 558b3d009..745212f2f 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -49,6 +49,11 @@ class BrightcoveIE(InfoExtractor):          Build a Brightcove url from a xml string containing          <object class="BrightcoveExperience">{params}</object>          """ + +        # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 +        object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', +                            lambda m: m.group(1) + '/>', object_str) +          object_doc = xml.etree.ElementTree.fromstring(object_str)          assert u'BrightcoveExperience' in object_doc.attrib['class']          params = {'flashID': object_doc.attrib['id'], diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index bf8d711ee..69b2beece 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):          '400': 'mp4',      }      _video_dimensions = { -        '3500': '1280x720', -        '2200': '960x540', -        '1700': '768x432', -        '1200': '640x360', -        '750': '512x288', -        '400': '384x216', +        '3500': (1280, 720), +        '2200': (960, 540), +        '1700': (768, 432), +        '1200': (640, 360), +        '750': (512, 288), +        '400': (384, 216),      }      @classmethod @@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):          """Receives a URL and returns True if suitable for this IE."""          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None -    def _print_formats(self, formats): -        print('Available formats:') -        for x in formats: -            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) - +    @staticmethod +    def _transform_rtmp_url(rtmp_video_url): +        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) +        if not m: +            raise ExtractorError(u'Cannot transform RTMP url') +        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' +        return base + m.group('finalid')      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):                  self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')                  continue -            if self._downloader.params.get('listformats', None): -                self._print_formats([i[0] for i in turls]) -                return - -            # For now, just pick the highest bitrate -            format,rtmp_video_url = turls[-1] - -            # Get the format arg from the arg stream -            req_format = self._downloader.params.get('format', None) - -            # Select format if we can find one -            for f,v in turls: -                if f == req_format: -                    format, rtmp_video_url = f, v -                    break - -            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) -            if not m: -                raise ExtractorError(u'Cannot transform RTMP url') -            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' -            video_url = base + m.group('finalid') +            formats = [] +            for format, rtmp_video_url in turls: +                w, h = self._video_dimensions.get(format, (None, None)) +                formats.append({ +                    'url': self._transform_rtmp_url(rtmp_video_url), +                    'ext': self._video_extensions.get(format, 'mp4'), +                    'format_id': format, +                    'height': h, +                    'width': w, +                })              effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)              info = {                  'id': shortMediaId, -                'url': video_url, +                'formats': formats,                  'uploader': showId,                  'upload_date': officialDate,                  'title': effTitle, -                'ext': 'mp4', -                'format': format,                  'thumbnail': None,                  'description': compat_str(officialTitle),              } + +            # TODO: Remove when #980 has been merged +            info.update(info['formats'][-1]) +              results.append(info)          return results diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 77726ee24..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -35,6 +35,8 @@ class InfoExtractor(object):      title:          Video title, unescaped.      ext:            Video filename extension. +    Instead of url and ext, formats can also specified. +      The following fields are optional:      format:         The video format, defaults to ext (used for --get-format) @@ -52,8 +54,20 @@ class InfoExtractor(object):      view_count:     How many users have watched the video on the platform.      urlhandle:      [internal] The urlHandle to be used to download the file,                      like returned by urllib.request.urlopen - -    The fields should all be Unicode strings. +    age_limit:      Age restriction for the video, as an integer (years) +    formats:        A list of dictionaries for each format available, it must +                    be ordered from worst to best quality. Potential fields: +                    * url       Mandatory. The URL of the video file +                    * ext       Will be calculated from url if missing +                    * format    A human-readable description of the format +                                ("mp4 container with h264/opus"). +                                Calculated from width and height if missing. +                    * format_id A short description of the format +                                ("mp4_h264_opus" or "19") +                    * width     Width of the video, if known +                    * height    Height of the video, if known + +    Unless mentioned otherwise, the fields should be Unicode strings.      Subclasses of this one should re-define the _real_initialize() and      _real_extract() methods and define a _VALID_URL regexp. @@ -305,6 +319,15 @@ class InfoExtractor(object):                                          self._og_regex('video')],                                         html, name, **kargs) +    def _rta_search(self, html): +        # See http://www.rtalabel.org/index.php?content=howtofaq#single +        if re.search(r'(?ix)<meta\s+name="rating"\s+' +                     r'     content="RTA-5042-1996-1400-1577-RTA"', +                     html): +            return 18 +        return 0 + +  class SearchInfoExtractor(InfoExtractor):      """      Base class for paged search queries extractors. diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 259806f38..7d8353946 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -10,6 +10,7 @@ from ..utils import (      compat_str,      get_element_by_attribute,      get_element_by_id, +    orderedSet,      ExtractorError,  ) @@ -158,12 +159,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):                                               id, u'Downloading page %s' % pagenum)              playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) -            video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) +            video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))              if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:                  break          return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') -                   for video_id in video_ids] +                   for video_id in orderedSet(video_ids)]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py new file mode 100644 index 000000000..deaa4ed2d --- /dev/null +++ b/youtube_dl/extractor/faz.py @@ -0,0 +1,60 @@ +# encoding: utf-8 +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    clean_html, +    get_element_by_attribute, +) + + +class FazIE(InfoExtractor): +    IE_NAME = u'faz.net' +    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html' + +    _TEST = { +        u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', +        u'file': u'12610585.mp4', +        u'info_dict': { +            u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', +            u'description': u'md5:1453fbf9a0d041d985a47306192ea253', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        self.to_screen(video_id) +        webpage = self._download_webpage(url, video_id) +        config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, +            u'config xml url') +        config_xml = self._download_webpage(config_xml_url, video_id, +            u'Downloading config xml') +        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + +        encodings = config.find('ENCODINGS') +        formats = [] +        for code in ['LOW', 'HIGH', 'HQ']: +            encoding = encodings.find(code) +            if encoding is None: +                continue +            encoding_url = encoding.find('FILENAME').text +            formats.append({ +                'url': encoding_url, +                'ext': determine_ext(encoding_url), +                'format_id': code.lower(), +            }) + +        descr_html = get_element_by_attribute('class', 'Content Copy', webpage) +        info = { +            'id': video_id, +            'title': self._og_search_title(webpage), +            'formats': formats, +            'description': clean_html(descr_html), +            'thumbnail': config.find('STILL/STILL_BIG').text, +        } +        # TODO: Remove when #980 has been merged +        info.update(formats[-1]) +        return info diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 80d96baf7..e1d2f0526 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -9,7 +9,7 @@ from ..utils import (  class FlickrIE(InfoExtractor):      """Information Extractor for Flickr videos""" -    _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' +    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'      _TEST = {          u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',          u'file': u'5645318632.mp4', diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 461dac8ef..086cafca0 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -103,7 +103,7 @@ class France2IE(FranceTVBaseInfoExtractor):  class GenerationQuoiIE(InfoExtractor): -    IE_NAME = u'http://generation-quoi.france2.fr' +    IE_NAME = u'france2.fr:generation-quoi'      _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'      _TEST = { diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index cd3bbe65f..5edbf678a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -8,6 +8,7 @@ from ..utils import (  )  class GameSpotIE(InfoExtractor): +    _WORKING = False      _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'      _TEST = {          u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764070635..7060c6f92 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):          except ValueError:              # since this is the last-resort InfoExtractor, if              # this error is thrown, it'll be thrown here -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Failed to download URL: %s' % url)          self.report_extraction(video_id)          # Look for BrightCove: @@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):              # HTML5 video              mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Unsupported URL: %s' % url)          # It's possible that one of the regexes          # matched, but returned an empty group:          if mobj.group(1) is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError(u'Did not find a valid video URL at %s' % url)          video_url = mobj.group(1)          video_url = compat_urlparse.urljoin(url, video_url) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 8895ad289..ab12d7e93 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -41,7 +41,8 @@ class GooglePlusIE(InfoExtractor):          # Extract update date          upload_date = self._html_search_regex( -            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'], +            r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*> +                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',              webpage, u'upload date', fatal=False)          if upload_date:              # Convert timestring to a format suitable for filename diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 4327bc13d..6bb54b932 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -6,13 +6,14 @@ import xml.etree.ElementTree  from .common import InfoExtractor +  class JeuxVideoIE(InfoExtractor):      _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'      _TEST = {          u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',          u'file': u'5182.mp4', -        u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0', +        u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',          u'info_dict': {              u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',              u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', @@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          title = re.match(self._VALID_URL, url).group(1)          webpage = self._download_webpage(url, title) -        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage) - -        xml_link = m_download.group(1) +        xml_link = self._html_search_regex( +            r'<param name="flashvars" value="config=(.*?)" />', +            webpage, u'config URL') -        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) +        video_id = self._search_regex( +            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', +            xml_link, u'video ID') -        xml_config = self._download_webpage(xml_link, title, -                                                  'Downloading XML config') +        xml_config = self._download_webpage( +            xml_link, title, u'Downloading XML config')          config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) -        info = re.search(r'<format\.json>(.*?)</format\.json>', -                         xml_config, re.MULTILINE|re.DOTALL).group(1) -        info = json.loads(info)['versions'][0] +        info_json = self._search_regex( +            r'(?sm)<format\.json>(.*?)</format\.json>', +            xml_config, u'JSON information') +        info = json.loads(info_json)['versions'][0]          video_url = 'http://video720.jeuxvideo.com/' + info['file'] -        return {'id': id, -                'title' : config.find('titre_video').text, -                'ext' : 'mp4', -                'url' : video_url, -                'description': self._og_search_description(webpage), -                'thumbnail': config.find('image').text, -                } +        return { +            'id': video_id, +            'title': config.find('titre_video').text, +            'ext': 'mp4', +            'url': video_url, +            'description': self._og_search_description(webpage), +            'thumbnail': config.find('image').text, +        } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8f956571d..e520e2bb4 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):      def _get_thumbnail_url(self, uri, itemdoc):          return 'http://mtv.mtvnimages.com/uri/' + uri -    def _extract_video_url(self, metadataXml): +    def _extract_video_formats(self, metadataXml):          if '/error_country_block.swf' in metadataXml:              raise ExtractorError(u'This video is not available from your country.', expected=True)          mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))          renditions = mdoc.findall('.//rendition') -        # For now, always pick the highest quality. -        rendition = renditions[-1] - -        try: -            _,_,ext = rendition.attrib['type'].partition('/') -            format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] -            rtmp_video_url = rendition.find('./src').text -        except KeyError: -            raise ExtractorError('Invalid rendition field.') -        video_url = self._transform_rtmp_url(rtmp_video_url) -        return {'ext': ext, 'url': video_url, 'format': format} +        formats = [] +        for rendition in mdoc.findall('.//rendition'): +            try: +                _, _, ext = rendition.attrib['type'].partition('/') +                rtmp_video_url = rendition.find('./src').text +                formats.append({'ext': ext, +                                'url': self._transform_rtmp_url(rtmp_video_url), +                                'format_id': rendition.get('bitrate'), +                                'width': int(rendition.get('width')), +                                'height': int(rendition.get('height')), +                                }) +            except (KeyError, TypeError): +                raise ExtractorError('Invalid rendition field.') +        return formats      def _get_video_info(self, itemdoc):          uri = itemdoc.find('guid').text @@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):              mediagen_url += '&acceptMethods=fms'          mediagen_page = self._download_webpage(mediagen_url, video_id,                                                 u'Downloading video urls') -        video_info = self._extract_video_url(mediagen_page)          description_node = itemdoc.find('description')          if description_node is not None: -            description = description_node.text +            description = description_node.text.strip()          else:              description = None -        video_info.update({'title': itemdoc.find('title').text, -                           'id': video_id, -                           'thumbnail': self._get_thumbnail_url(uri, itemdoc), -                           'description': description, -                           }) -        return video_info + +        info = { +            'title': itemdoc.find('title').text, +            'formats': self._extract_video_formats(mediagen_page), +            'id': video_id, +            'thumbnail': self._get_thumbnail_url(uri, itemdoc), +            'description': description, +        } + +        # TODO: Remove when #980 has been merged +        info.update(info['formats'][-1]) + +        return info      def _get_videos_info(self, uri):          video_id = self._id_from_uri(uri) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py new file mode 100644 index 000000000..e8d43dd13 --- /dev/null +++ b/youtube_dl/extractor/nhl.py @@ -0,0 +1,120 @@ +import re +import json +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( +    compat_urlparse, +    compat_urllib_parse, +    determine_ext, +    unified_strdate, +) + + +class NHLBaseInfoExtractor(InfoExtractor): +    @staticmethod +    def _fix_json(json_string): +        return json_string.replace('\\\'', '\'') + +    def _extract_video(self, info): +        video_id = info['id'] +        self.report_extraction(video_id) + +        initial_video_url = info['publishPoint'] +        data = compat_urllib_parse.urlencode({ +            'type': 'fvod', +            'path': initial_video_url.replace('.mp4', '_sd.mp4'), +        }) +        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data +        path_response = self._download_webpage(path_url, video_id, +            u'Downloading final video url') +        path_doc = xml.etree.ElementTree.fromstring(path_response) +        video_url = path_doc.find('path').text + +        join = compat_urlparse.urljoin +        return { +            'id': video_id, +            'title': info['name'], +            'url': video_url, +            'ext': determine_ext(video_url), +            'description': info['description'], +            'duration': int(info['duration']), +            'thumbnail': join(join(video_url, '/u/'), info['bigImage']), +            'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), +        } + + +class NHLIE(NHLBaseInfoExtractor): +    IE_NAME = u'nhl.com' +    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)' + +    _TEST = { +        u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', +        u'file': u'453614.mp4', +        u'info_dict': { +            u'title': u'Quick clip: Weise 4-3 goal vs Flames', +            u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', +            u'duration': 18, +            u'upload_date': u'20131006', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id +        info_json = self._download_webpage(json_url, video_id, +            u'Downloading info json') +        info_json = self._fix_json(info_json) +        info = json.loads(info_json)[0] +        return self._extract_video(info) + + +class NHLVideocenterIE(NHLBaseInfoExtractor): +    IE_NAME = u'nhl.com:videocenter' +    IE_DESC = u'Download the first 12 videos from a videocenter category' +    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?' + +    @classmethod +    def suitable(cls, url): +        if NHLIE.suitable(url): +            return False +        return super(NHLVideocenterIE, cls).suitable(url) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        team = mobj.group('team') +        webpage = self._download_webpage(url, team) +        cat_id = self._search_regex( +            [r'var defaultCatId = "(.+?)";', +             r'{statusIndex:0,index:0,.*?id:(.*?),'], +            webpage, u'category id') +        playlist_title = self._html_search_regex( +            r'\?catid=%s">(.*?)</a>' % cat_id, +            webpage, u'playlist title', flags=re.DOTALL) + +        data = compat_urllib_parse.urlencode({ +            'cid': cat_id, +            # This is the default value +            'count': 12, +            'ptrs': 3, +            'format': 'json', +        }) +        path = '/videocenter/servlets/browse?' + data +        request_url = compat_urlparse.urljoin(url, path) +        response = self._download_webpage(request_url, playlist_title) +        response = self._fix_json(response) +        if not response.strip(): +            self._downloader.report_warning(u'Got an empty reponse, trying ' +                                            u'adding the "newvideos" parameter') +            response = self._download_webpage(request_url + '&newvideos=true', +                playlist_title) +            response = self._fix_json(response) +        videos = json.loads(response) + +        return { +            '_type': 'playlist', +            'title': playlist_title, +            'id': cat_id, +            'entries': [self._extract_video(i) for i in videos], +        } diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index add76a11e..5d770ec28 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):          VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'          upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)          if upload_date: upload_date = unified_strdate(upload_date) +        age_limit = self._rta_search(webpage)          info = {'id': video_id,                  'url': video_url, @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):                  'upload_date': upload_date,                  'title': video_title,                  'ext': 'flv', -                'format': 'flv'} +                'format': 'flv', +                'age_limit': age_limit}          return [info] diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 1d2cf1f56..365aade56 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -14,24 +14,30 @@ class RedTubeIE(InfoExtractor):          }      } -    def _real_extract(self,url): +    def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        video_extension = 'mp4'         +        video_extension = 'mp4'          webpage = self._download_webpage(url, video_id)          self.report_extraction(video_id) -        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">', -            webpage, u'video URL') +        video_url = self._html_search_regex( +            r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') -        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', +        video_title = self._html_search_regex( +            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',              webpage, u'title') -        return [{ -            'id':       video_id, -            'url':      video_url, -            'ext':      video_extension, -            'title':    video_title, -        }] +        # No self-labeling, but they describe themselves as +        # "Home of Videos Porno" +        age_limit = 18 + +        return { +            'id':        video_id, +            'url':       video_url, +            'ext':       video_extension, +            'title':     video_title, +            'age_limit': age_limit, +        } diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 580f9e6d5..d1b08c9bc 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -86,8 +86,10 @@ class RTLnowIE(InfoExtractor):          u'params': {              u'skip_download': True,          }, +        u'skip': u'Only works from Germany',      }] +      def _real_extract(self,url):          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 4c11f7a03..dfa1176a3 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):          thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',                                         webpage, 'thumbnail') +        formats = [{ +            'ext': 'mp4', +            'url': stream['file'], +            'format': stream['id'] +            } for stream in info['htmlStreams']]          info = { -                'id': info['id'], -                'url': info['htmlStreams'][-1]['file'], -                'ext': 'mp4', -                'title': title, -                'thumbnail': thumbnail, -                'description': desc, -                } +            'id': info['id'], +            'title': title, +            'thumbnail': thumbnail, +            'description': desc, +            'formats': formats, +        } + +        # TODO: Remove when #980 has been merged +        info.update(info['formats'][-1]) +          return info diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 70408c4f0..1c1cc418d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,11 +1,15 @@  import re  import json +import xml.etree.ElementTree +import datetime  from .common import InfoExtractor  from ..utils import ( +    determine_ext,      ExtractorError,  ) +  class VevoIE(InfoExtractor):      """      Accepts urls from vevo.com or in the format 'vevo:{id}' @@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):      _TEST = {          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',          u'file': u'GB1101300280.mp4', -        u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',          u'info_dict': {              u"upload_date": u"20130624",              u"uploader": u"Hurts", -            u"title": u"Somebody to Die For" +            u"title": u"Somebody to Die For", +            u'duration': 230,          }      } @@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        json_url = 'http://www.vevo.com/data/video/%s' % video_id -        base_url = 'http://smil.lvl3.vevo.com' -        videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower()) +        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id          info_json = self._download_webpage(json_url, video_id, u'Downloading json info') -        links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')          self.report_extraction(video_id) -        video_info = json.loads(info_json) -        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) -        if m_urls is None or len(m_urls) == 0: -            raise ExtractorError(u'Unable to extract video url') -        # They are sorted from worst to best quality -        m_url = m_urls[-1] -        video_url = base_url + '/' + m_url.group('url') -        ext = m_url.group('ext') - -        return {'url': video_url, -                'ext': ext, -                'id': video_id, -                'title': video_info['title'], -                'thumbnail': video_info['img'], -                'upload_date': video_info['launchDate'].replace('/',''), -                'uploader': video_info['Artists'][0]['title'], -                } +        video_info = json.loads(info_json)['video'] +        last_version = {'version': -1} +        for version in video_info['videoVersions']: +            # These are the HTTP downloads, other types are for different manifests +            if version['sourceType'] == 2: +                if version['version'] > last_version['version']: +                    last_version = version +        if last_version['version'] == -1: +            raise ExtractorError(u'Unable to extract last version of the video') + +        renditions = xml.etree.ElementTree.fromstring(last_version['data']) +        formats = [] +        # Already sorted from worst to best quality +        for rend in renditions.findall('rendition'): +            attr = rend.attrib +            f_url = attr['url'] +            formats.append({ +                'url': f_url, +                'ext': determine_ext(f_url), +                'height': int(attr['frameheight']), +                'width': int(attr['frameWidth']), +            }) + +        date_epoch = int(self._search_regex( +            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 +        upload_date = datetime.datetime.fromtimestamp(date_epoch) +        info = { +            'id': video_id, +            'title': video_info['title'], +            'formats': formats, +            'thumbnail': video_info['imageUrl'], +            'upload_date': upload_date.strftime('%Y%m%d'), +            'uploader': video_info['mainArtists'][0]['artistName'], +            'duration': video_info['duration'], +        } + +        # TODO: Remove when #980 has been merged +        info.update(formats[-1]) + +        return info diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py new file mode 100644 index 000000000..12c84a985 --- /dev/null +++ b/youtube_dl/extractor/viddler.py @@ -0,0 +1,64 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +) + + +class ViddlerIE(InfoExtractor): +    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)' +    _TEST = { +        u"url": u"http://www.viddler.com/v/43903784", +        u'file': u'43903784.mp4', +        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', +        u'info_dict': { +            u"title": u"Video Made Easy", +            u"uploader": u"viddler", +            u"duration": 100.89, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        embed_url = mobj.group('domain') + u'/embed/' + video_id +        webpage = self._download_webpage(embed_url, video_id) + +        video_sources_code = self._search_regex( +            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') +        video_sources = json.loads(video_sources_code.replace("'", '"')) + +        formats = [{ +            'url': video_url, +            'format': format_id, +        } for video_url, format_id in video_sources.items()] + +        title = self._html_search_regex( +            r"title\s*:\s*'([^']*)'", webpage, u'title') +        uploader = self._html_search_regex( +            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) +        duration_s = self._html_search_regex( +            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) +        duration = float(duration_s) if duration_s else None +        thumbnail = self._html_search_regex( +            r"thumbnail\s*:\s*'([^']*)'", +            webpage, u'thumbnail', fatal=False) + +        info = { +            '_type': 'video', +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'duration': duration, +            'formats': formats, +        } + +        # TODO: Remove when #980 has been merged +        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) +        info.update(info['formats'][-1]) + +        return info diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4a7d82b7a..cea29f035 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor):      """Information extractor for vimeo.com."""      # _VALID_URL matches Vimeo URLs -    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' +    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'      _NETRC_MACHINE = 'vimeo'      IE_NAME = u'vimeo'      _TESTS = [ diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 39126e631..464b498f5 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,17 +17,21 @@ class YahooIE(InfoExtractor):      _TESTS = [          {              u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', -            u'file': u'214727115.mp4', +            u'file': u'214727115.flv',              u'info_dict': {                  u'title': u'Julian Smith & Travis Legg Watch Julian Smith',                  u'description': u'Julian and Travis watch Julian Smith',              }, +            u'params': { +                # Requires rtmpdump +                u'skip_download': True, +            },          },          {              u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',              u'file': u'103000935.flv',              u'info_dict': { -                u'title': u'The Cougar Lies with Spanish Moss', +                u'title': u'Codefellas - The Cougar Lies with Spanish Moss',                  u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',              },              u'params': { @@ -46,6 +50,21 @@ class YahooIE(InfoExtractor):              webpage, u'items', flags=re.MULTILINE)          items = json.loads(items_json)          info = items['mediaItems']['query']['results']['mediaObj'][0] +        # The 'meta' field is not always in the video webpage, we request it +        # from another page +        long_id = info['id'] +        query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' +                 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) +        data = compat_urllib_parse.urlencode({ +            'q': query, +            'env': 'prod', +            'format': 'json', +        }) +        query_result_json = self._download_webpage( +            'http://video.query.yahoo.com/v1/public/yql?' + data, +            video_id, u'Downloading video info') +        query_result = json.loads(query_result_json) +        info = query_result['query']['results']['mediaObj'][0]          meta = info['meta']          formats = [] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..b1f93dd1b 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):          req = compat_urllib_request.Request(url)          req.add_header('Cookie', 'age_verified=1')          webpage = self._download_webpage(req, video_id) +        age_limit = self._rta_search(webpage)          # Get JSON parameters          json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):                  'ext': extension,                  'format': format,                  'thumbnail': thumbnail, -                'description': video_description +                'description': video_description, +                'age_limit': age_limit,              })          if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 39ff33290..8222a880f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1037,12 +1037,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          if player_url is not None:              try: -                if player_url not in self._player_cache: +                player_id = (player_url, len(s)) +                if player_id not in self._player_cache:                      func = self._extract_signature_function(                          video_id, player_url, len(s)                      ) -                    self._player_cache[player_url] = func -                func = self._player_cache[player_url] +                    self._player_cache[player_id] = func +                func = self._player_cache[player_id]                  if self._downloader.params.get('youtube_print_sig_code'):                      self._print_sig_code(func, len(s))                  return func(s) @@ -1115,6 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'lang': lang,                  'v': video_id,                  'fmt': self._downloader.params.get('subtitlesformat'), +                'name': l[0],              })              url = u'http://www.youtube.com/api/timedtext?' + params              sub_lang_list[lang] = url @@ -1249,9 +1251,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          return url_map      def _real_extract(self, url): -        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): -            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).') -          # Extract original video URL from URL with redirection, like age verification, using next_url parameter          mobj = re.search(self._NEXT_URL_RE, url)          if mobj: @@ -1494,7 +1493,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'description':  video_description,                  'player_url':   player_url,                  'subtitles':    video_subtitles, -                'duration':     video_duration +                'duration':     video_duration, +                'age_limit':    18 if age_gate else 0,              })          return results @@ -1635,7 +1635,7 @@ class YoutubeChannelIE(InfoExtractor):  class YoutubeUserIE(InfoExtractor):      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' -    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' +    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'      _GDATA_PAGE_SIZE = 50      _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1828,3 +1828,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):          webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')          playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')          return self.url_result(playlist_id, 'YoutubePlaylist') + + +class YoutubeTruncatedURLIE(InfoExtractor): +    IE_NAME = 'youtube:truncated_url' +    IE_DESC = False  # Do not list +    _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' + +    def _real_extract(self, url): +        raise ExtractorError( +            u'Did you forget to quote the URL? Remember that & is a meta ' +            u'character in most shells, so you want to put the URL in quotes, ' +            u'like  youtube-dl ' +            u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' +            u' (or simply  youtube-dl BaW_jenozKc  ).', +            expected=True) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..82a1daeb9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -175,7 +175,7 @@ def compat_ord(c):  compiled_regex_type = type(re.compile(''))  std_headers = { -    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', +    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      'Accept-Encoding': 'gzip, deflate', @@ -715,6 +715,7 @@ def unified_strdate(date_str):          '%Y/%m/%d %H:%M:%S',          '%d.%m.%Y %H:%M',          '%Y-%m-%dT%H:%M:%SZ', +        '%Y-%m-%dT%H:%M:%S',      ]      for expression in format_expressions:          try: @@ -830,3 +831,99 @@ def get_cachedir(params={}):      cache_root = os.environ.get('XDG_CACHE_HOME',                                  os.path.expanduser('~/.cache'))      return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': +    import ctypes.wintypes +    import msvcrt + +    class OVERLAPPED(ctypes.Structure): +        _fields_ = [ +            ('Internal', ctypes.wintypes.LPVOID), +            ('InternalHigh', ctypes.wintypes.LPVOID), +            ('Offset', ctypes.wintypes.DWORD), +            ('OffsetHigh', ctypes.wintypes.DWORD), +            ('hEvent', ctypes.wintypes.HANDLE), +        ] + +    kernel32 = ctypes.windll.kernel32 +    LockFileEx = kernel32.LockFileEx +    LockFileEx.argtypes = [ +        ctypes.wintypes.HANDLE,     # hFile +        ctypes.wintypes.DWORD,      # dwFlags +        ctypes.wintypes.DWORD,      # dwReserved +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh +        ctypes.POINTER(OVERLAPPED)  # Overlapped +    ] +    LockFileEx.restype = ctypes.wintypes.BOOL +    UnlockFileEx = kernel32.UnlockFileEx +    UnlockFileEx.argtypes = [ +        ctypes.wintypes.HANDLE,     # hFile +        ctypes.wintypes.DWORD,      # dwReserved +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh +        ctypes.POINTER(OVERLAPPED)  # Overlapped +    ] +    UnlockFileEx.restype = ctypes.wintypes.BOOL +    whole_low = 0xffffffff +    whole_high = 0x7fffffff + +    def _lock_file(f, exclusive): +        overlapped = OVERLAPPED() +        overlapped.Offset = 0 +        overlapped.OffsetHigh = 0 +        overlapped.hEvent = 0 +        f._lock_file_overlapped_p = ctypes.pointer(overlapped) +        handle = msvcrt.get_osfhandle(f.fileno()) +        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, +                          whole_low, whole_high, f._lock_file_overlapped_p): +            raise OSError('Locking file failed: %r' % ctypes.FormatError()) + +    def _unlock_file(f): +        assert f._lock_file_overlapped_p +        handle = msvcrt.get_osfhandle(f.fileno()) +        if not UnlockFileEx(handle, 0, +                            whole_low, whole_high, f._lock_file_overlapped_p): +            raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: +    import fcntl + +    def _lock_file(f, exclusive): +        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + +    def _unlock_file(f): +        fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): +    def __init__(self, filename, mode, encoding=None): +        assert mode in ['r', 'a', 'w'] +        self.f = io.open(filename, mode, encoding=encoding) +        self.mode = mode + +    def __enter__(self): +        exclusive = self.mode != 'r' +        try: +            _lock_file(self.f, exclusive) +        except IOError: +            self.f.close() +            raise +        return self + +    def __exit__(self, etype, value, traceback): +        try: +            _unlock_file(self.f) +        finally: +            self.f.close() + +    def __iter__(self): +        return iter(self.f) + +    def write(self, *args): +        return self.f.write(*args) + +    def read(self, *args): +        return self.f.read(*args) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e773e82da..1004af116 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.04' +__version__ = '2013.10.09'  | 
