diff options
36 files changed, 529 insertions, 189 deletions
@@ -100,3 +100,4 @@ Cédric Luthi  Thijs Vermeir  Joel Leclerc  Christopher Krooss +Ondřej Caletka diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ff7b395a..7917abfc6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services  ###  Is anyone going to need the feature? -Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.  ###  Is your question about youtube-dl? @@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py  	chmod a+x youtube-dl  README.md: youtube_dl/*.py youtube_dl/*/*.py -	COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py +	COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py  CONTRIBUTING.md: README.md  	python devscripts/make_contributing.py README.md CONTRIBUTING.md @@ -248,14 +248,15 @@ which means you can modify it, redistribute it or use it however you like.  ## Video Format Options:      -f, --format FORMAT              video format code, specify the order of -                                     preference using slashes: -f 22/17/18 .  -f -                                     mp4 , -f m4a and  -f flv  are also -                                     supported. You can also use the special -                                     names "best", "bestvideo", "bestaudio", -                                     "worst", "worstvideo" and "worstaudio". By -                                     default, youtube-dl will pick the best -                                     quality. Use commas to download multiple -                                     audio formats, such as -f +                                     preference using slashes, as in -f 22/17/18 +                                     .  Instead of format codes, you can select +                                     by extension for the extensions aac, m4a, +                                     mp3, mp4, ogg, wav, webm. You can also use +                                     the special names "best", "bestvideo", +                                     "bestaudio", "worst".  By default, youtube- +                                     dl will pick the best quality. Use commas +                                     to download multiple audio formats, such as +                                     -f                                       136/137/mp4/bestvideo,140/m4a/bestaudio.                                       You can merge the video and audio of two                                       formats into a single file using -f <video- @@ -326,7 +327,7 @@ which means you can modify it, redistribute it or use it however you like.  # CONFIGURATION -You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`. +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.  # OUTPUT TEMPLATE diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index f0f0481c7..d3ef5f0b5 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -16,7 +16,7 @@ def main():          template = tmplf.read()      ie_htmls = [] -    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): +    for ie in youtube_dl.list_extractors(age_limit=None):          ie_html = '<b>{}</b>'.format(ie.IE_NAME)          ie_desc = getattr(ie, 'IE_DESC', None)          if ie_desc is False: diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 140010644..3df4385a6 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -23,12 +23,12 @@ def main():      def gen_ies_md(ies):          for ie in ies: -            ie_md = '**{}**'.format(ie.IE_NAME) +            ie_md = '**{0}**'.format(ie.IE_NAME)              ie_desc = getattr(ie, 'IE_DESC', None)              if ie_desc is False:                  continue              if ie_desc is not None: -                ie_md += ': {}'.format(ie.IE_DESC) +                ie_md += ': {0}'.format(ie.IE_DESC)              if not ie.working():                  ie_md += ' (Currently broken)'              yield ie_md diff --git a/test/helper.py b/test/helper.py index 96d58b7c1..77225e4f7 100644 --- a/test/helper.py +++ b/test/helper.py @@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):  def gettestcases(include_onlymatching=False):      for ie in youtube_dl.extractor.gen_extractors(): -        t = getattr(ie, '_TEST', None) -        if t: -            assert not hasattr(ie, '_TESTS'), \ -                '%s has _TEST and _TESTS' % type(ie).__name__ -            tests = [t] -        else: -            tests = getattr(ie, '_TESTS', []) -        for t in tests: -            if not include_onlymatching and t.get('only_matching', False): -                continue -            t['name'] = type(ie).__name__[:-len('IE')] -            yield t +        for tc in ie.get_testcases(include_onlymatching): +            yield tc  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 13c18ed95..be8d12997 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):          self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')          self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') +    def test_html_search_meta(self): +        ie = self.ie +        html = ''' +            <meta name="a" content="1" /> +            <meta name='b' content='2'> +            <meta name="c" content='3'> +            <meta name=d content='4'> +            <meta property="e" content='5' > +            <meta content="6" name="f"> +        ''' + +        self.assertEqual(ie._html_search_meta('a', html), '1') +        self.assertEqual(ie._html_search_meta('b', html), '2') +        self.assertEqual(ie._html_search_meta('c', html), '3') +        self.assertEqual(ie._html_search_meta('d', html), '4') +        self.assertEqual(ie._html_search_meta('e', html), '5') +        self.assertEqual(ie._html_search_meta('f', html), '6') +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 730f7ec26..85d87f2c3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,8 @@ import sys  import unittest  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import copy +  from test.helper import FakeYDL, assertRegexpMatches  from youtube_dl import YoutubeDL  from youtube_dl.extractor import YoutubeIE @@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):          downloaded = ydl.downloaded_info_dicts[0]          self.assertEqual(downloaded['format_id'], 'vid-high') +    def test_format_selection_audio_exts(self): +        formats = [ +            {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, +            {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, +            {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, +            {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, +            {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, +        ] + +        info_dict = _make_result(formats) +        ydl = YDL({'format': 'best'}) +        ie = YoutubeIE(ydl) +        ie._sort_formats(info_dict['formats']) +        ydl.process_ie_result(copy.deepcopy(info_dict)) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'aac-64') + +        ydl = YDL({'format': 'mp3'}) +        ie = YoutubeIE(ydl) +        ie._sort_formats(info_dict['formats']) +        ydl.process_ie_result(copy.deepcopy(info_dict)) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'mp3-64') + +        ydl = YDL({'prefer_free_formats': True}) +        ie = YoutubeIE(ydl) +        ie._sort_formats(info_dict['formats']) +        ydl.process_ie_result(copy.deepcopy(info_dict)) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'ogg-64') +      def test_format_selection_video(self):          formats = [              {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, diff --git a/test/test_subtitles.py b/test/test_subtitles.py index d34565191..6336dd317 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import (      TEDIE,      VimeoIE,      WallaIE, +    CeskaTelevizeIE,  ) @@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):          self.assertEqual(len(subtitles), 0) +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): +    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' +    IE = CeskaTelevizeIE + +    def test_list_subtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['listsubtitles'] = True +        info_dict = self.getInfoDict() +        self.assertEqual(info_dict, None) + +    def test_allsubtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(set(subtitles.keys()), set(['cs'])) +        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + +    def test_nosubtitles(self): +        self.DL.expect_warning('video doesn\'t have subtitles') +        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(len(subtitles), 0) + +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index dd49a6d17..16e1a1ddf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import json  import xml.etree.ElementTree  from youtube_dl.utils import ( +    age_restricted,      args_to_str,      clean_html,      DateRange, @@ -402,5 +403,12 @@ Trying to open render node...  Success at /dev/dri/renderD128.  ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') +    def test_age_restricted(self): +        self.assertFalse(age_restricted(None, 10))  # unrestricted content +        self.assertFalse(age_restricted(1, None))  # unrestricted policy +        self.assertFalse(age_restricted(8, 10)) +        self.assertTrue(age_restricted(18, 14)) +        self.assertFalse(age_restricted(18, 18)) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 806e7b239..24d6c2de7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -63,6 +63,7 @@ from .utils import (      YoutubeDLHandler,      prepend_extension,      args_to_str, +    age_restricted,  )  from .cache import Cache  from .extractor import get_info_extractor, gen_extractors @@ -550,13 +551,8 @@ class YoutubeDL(object):              max_views = self.params.get('max_views')              if max_views is not None and view_count > max_views:                  return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) -        age_limit = self.params.get('age_limit') -        if age_limit is not None: -            actual_age_limit = info_dict.get('age_limit') -            if actual_age_limit is None: -                actual_age_limit = 0 -            if age_limit < actual_age_limit: -                return 'Skipping "' + title + '" because it is age restricted' +        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): +            return 'Skipping "%s" because it is age restricted' % title          if self.in_download_archive(info_dict):              return '%s has already been recorded in archive' % video_title          return None @@ -790,7 +786,7 @@ class YoutubeDL(object):              if video_formats:                  return video_formats[0]          else: -            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] +            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']              if format_spec in extensions:                  filter_f = lambda f: f['ext'] == format_spec              else: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 56f560d26..4aa7fba6a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -38,7 +38,7 @@ from .update import update_self  from .downloader import (      FileDownloader,  ) -from .extractor import gen_extractors +from .extractor import gen_extractors, list_extractors  from .YoutubeDL import YoutubeDL @@ -95,17 +95,15 @@ def _real_main(argv=None):      _enc = preferredencoding()      all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] -    extractors = gen_extractors() -      if opts.list_extractors: -        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): +        for ie in list_extractors(opts.age_limit):              compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))              matchedUrls = [url for url in all_urls if ie.suitable(url)]              for mu in matchedUrls:                  compat_print('  ' + mu)          sys.exit(0)      if opts.list_extractor_descriptions: -        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): +        for ie in list_extractors(opts.age_limit):              if not ie._WORKING:                  continue              desc = getattr(ie, 'IE_DESC', ie.IE_NAME) @@ -365,3 +363,5 @@ def main(argv=None):          sys.exit('ERROR: fixed output name but more than one file to download')      except KeyboardInterrupt:          sys.exit('\nERROR: Interrupted by user') + +__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8e47bd60d..8dacc2c54 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE  from .gdcvault import GDCVaultIE  from .generic import GenericIE  from .giantbomb import GiantBombIE +from .giga import GigaIE  from .glide import GlideIE  from .globo import GloboIE  from .godtube import GodTubeIE @@ -574,6 +575,17 @@ def gen_extractors():      return [klass() for klass in _ALL_CLASSES] +def list_extractors(age_limit): +    """ +    Return a list of extractors that are suitable for the given age, +    sorted by extractor ID. +    """ + +    return sorted( +        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), +        key=lambda ie: ie.IE_NAME.lower()) + +  def get_info_extractor(ie_name):      """Returns the info extractor class with the given ie_name"""      return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 241b904a9..75d744852 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,9 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import compat_parse_qs  from ..utils import ( -    ExtractorError,      int_or_none,      unified_strdate,  ) @@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):          thumbnail = self._html_search_meta(              'thumbnailUrl', video_code, 'thumbnail', fatal=False) -        player_params = compat_parse_qs(self._html_search_regex( -            r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"', -            webpage, 'player params')) +        cid = self._search_regex(r'cid=(\d+)', webpage, 'cid') -        if 'cid' in player_params: -            cid = player_params['cid'][0] +        lq_doc = self._download_xml( +            'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, +            video_id, +            note='Downloading LQ video info' +        ) +        lq_durl = lq_doc.find('./durl') +        formats = [{ +            'format_id': 'lq', +            'quality': 1, +            'url': lq_durl.find('./url').text, +            'filesize': int_or_none( +                lq_durl.find('./size'), get_attr='text'), +        }] -            lq_doc = self._download_xml( -                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid, -                video_id, -                note='Downloading LQ video info' -            ) -            lq_durl = lq_doc.find('.//durl') -            formats = [{ -                'format_id': 'lq', -                'quality': 1, -                'url': lq_durl.find('./url').text, +        hq_doc = self._download_xml( +            'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid, +            video_id, +            note='Downloading HQ video info', +            fatal=False, +        ) +        if hq_doc is not False: +            hq_durl = hq_doc.find('./durl') +            formats.append({ +                'format_id': 'hq', +                'quality': 2, +                'ext': 'flv', +                'url': hq_durl.find('./url').text,                  'filesize': int_or_none( -                    lq_durl.find('./size'), get_attr='text'), -            }] - -            hq_doc = self._download_xml( -                'http://interface.bilibili.cn/playurl?cid=%s' % cid, -                video_id, -                note='Downloading HQ video info', -                fatal=False, -            ) -            if hq_doc is not False: -                hq_durl = hq_doc.find('.//durl') -                formats.append({ -                    'format_id': 'hq', -                    'quality': 2, -                    'ext': 'flv', -                    'url': hq_durl.find('./url').text, -                    'filesize': int_or_none( -                        hq_durl.find('./size'), get_attr='text'), -                }) -        else: -            raise ExtractorError('Unsupported player parameters: %r' % (player_params,)) +                    hq_durl.find('./size'), get_attr='text'), +            })          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index ba8376338..f70e090bb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor  from ..compat import (      compat_urllib_request,      compat_urllib_parse, @@ -15,7 +15,7 @@ from ..utils import (  ) -class CeskaTelevizeIE(InfoExtractor): +class CeskaTelevizeIE(SubtitlesInfoExtractor):      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'      _TESTS = [ @@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):          duration = float_or_none(item.get('duration'))          thumbnail = item.get('previewImageUrl') +        subtitles = {} +        subs = item.get('subtitles') +        if subs: +            subtitles['cs'] = subs[0]['url'] + +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return + +        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) +          return {              'id': episode_id,              'title': title, @@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):              'thumbnail': thumbnail,              'duration': duration,              'formats': formats, +            'subtitles': subtitles,          } + +    @staticmethod +    def _fix_subtitles(subtitles): +        """ Convert millisecond-based subtitles to SRT """ +        if subtitles is None: +            return subtitles  # subtitles not requested + +        def _msectotimecode(msec): +            """ Helper utility to convert milliseconds to timecode """ +            components = [] +            for divider in [1000, 60, 60, 100]: +                components.append(msec % divider) +                msec //= divider +            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) + +        def _fix_subtitle(subtitle): +            for line in subtitle.splitlines(): +                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) +                if m: +                    yield m.group(1) +                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) +                    yield "{0} --> {1}".format(start, stop) +                else: +                    yield line + +        fixed_subtitles = {} +        for k, v in subtitles.items(): +            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) +        return fixed_subtitles diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 562e656e0..d703893dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import (      compat_str,  )  from ..utils import ( +    age_restricted,      clean_html,      compiled_regex_type,      ExtractorError, @@ -593,7 +594,7 @@ class InfoExtractor(object):          return self._html_search_regex(              r'''(?isx)<meta                      (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) -                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), +                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),              html, display_name, fatal=fatal, group='content', **kwargs)      def _dc_search_uploader(self, html): @@ -877,6 +878,35 @@ class InfoExtractor(object):              None, '/', True, False, expire_time, '', None, None, None)          self._downloader.cookiejar.set_cookie(cookie) +    def get_testcases(self, include_onlymatching=False): +        t = getattr(self, '_TEST', None) +        if t: +            assert not hasattr(self, '_TESTS'), \ +                '%s has _TEST and _TESTS' % type(self).__name__ +            tests = [t] +        else: +            tests = getattr(self, '_TESTS', []) +        for t in tests: +            if not include_onlymatching and t.get('only_matching', False): +                continue +            t['name'] = type(self).__name__[:-len('IE')] +            yield t + +    def is_suitable(self, age_limit): +        """ Test whether the extractor is generally suitable for the given +        age limit (i.e. pornographic sites are not, all others usually are) """ + +        any_restricted = False +        for tc in self.get_testcases(include_onlymatching=False): +            if 'playlist' in tc: +                tc = tc['playlist'][0] +            is_restricted = age_restricted( +                tc.get('info_dict', {}).get('age_limit'), age_limit) +            if not is_restricted: +                return True +            any_restricted = any_restricted or is_restricted +        return not any_restricted +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index d09d1c13a..190d9f9ad 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -13,7 +13,7 @@ from ..utils import (  class FKTVIE(InfoExtractor):      IE_NAME = 'fernsehkritik.tv' -    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' +    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'      _TEST = {          'url': 'http://fernsehkritik.tv/folge-1', @@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        episode = int(mobj.group('ep')) +        episode = int(self._match_id(url)) -        server = random.randint(2, 4) -        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode -        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, +        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode +        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,                                                 episode)          playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,                                        'playlist', flags=re.DOTALL)          files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) -        # TODO: return a single multipart video +          videos = []          for i, _ in enumerate(files, 1):              video_id = '%04d%d' % (episode, i) -            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) +            video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)              videos.append({ +                'ext': 'flv',                  'id': video_id,                  'url': video_url,                  'title': clean_html(get_element_by_id('eptitle', start_webpage)),                  'description': clean_html(get_element_by_id('contentlist', start_webpage)),                  'thumbnail': video_thumbnail              }) -        return videos +        return { +            '_type': 'multi_video', +            'entries': videos, +            'id': 'folge-%s' % episode, +        }  class FKTVPosteckeIE(InfoExtractor): diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 75f180928..a07d69841 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -57,8 +57,7 @@ class GameOneIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          og_video = self._og_search_video_url(webpage, secure=False) diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py new file mode 100644 index 000000000..775890112 --- /dev/null +++ b/youtube_dl/extractor/giga.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..utils import ( +    qualities, +    compat_str, +    parse_duration, +    parse_iso8601, +    str_to_int, +) + + +class GigaIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)' +    _TESTS = [{ +        'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/', +        'md5': '6bc5535e945e724640664632055a584f', +        'info_dict': { +            'id': '2622086', +            'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss', +            'ext': 'mp4', +            'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss', +            'description': 'md5:afdf5862241aded4718a30dff6a57baf', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 578, +            'timestamp': 1414749706, +            'upload_date': '20141031', +            'uploader': 'Robin Schweiger', +            'view_count': int, +        }, +    }, { +        'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/', +        'only_matching': True, +    }, { +        'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/', +        'only_matching': True, +    }, { +        'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        video_id = self._search_regex( +            [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'], +            webpage, 'video id') + +        playlist = self._download_json( +            'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/' +            % video_id, video_id)[0] + +        quality = qualities(['normal', 'hd720']) + +        formats = [] +        for format_id in itertools.count(0): +            fmt = playlist.get(compat_str(format_id)) +            if not fmt: +                break +            formats.append({ +                'url': fmt['src'], +                'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]), +                'quality': quality(fmt['quality']), +            }) +        self._sort_formats(formats) + +        title = self._html_search_meta( +            'title', webpage, 'title', fatal=True) +        description = self._html_search_meta( +            'description', webpage, 'description') +        thumbnail = self._og_search_thumbnail(webpage) + +        duration = parse_duration(self._search_regex( +            r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id), +            webpage, 'duration', fatal=False)) + +        timestamp = parse_iso8601(self._search_regex( +            r'datetime="([^"]+)"', webpage, 'upload date', fatal=False)) +        uploader = self._search_regex( +            r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False) + +        view_count = str_to_int(self._search_regex( +            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'timestamp': timestamp, +            'uploader': uploader, +            'view_count': view_count, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index 4ccf6b9b8..a38eae421 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):          data = self._download_json(api_url, video_id)['data']          video_title = data['title'] -        duration = parse_duration(data['running_time']) -        upload_date = unified_strdate(data['schedule']['starts_at']) +        duration = parse_duration(data.get('running_time')) +        upload_date = unified_strdate( +            data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))          description = data.get('description')          thumbnails = [] @@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):              'ext': 'mp4',              'url': url,              'vcodec': 'none' if key.startswith('audio/') else None, -        } for key, url in data['sources']['live'].items()] -        if data.get('fivemin_id'): -            fid = data['fivemin_id'] -            fcat = str(int(fid) // 100 + 1) -            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4' -            formats.append({ -                'format': 'fivemin', -                'url': furl, -                'preference': 1, -            }) +        } for key, url in data.get('sources', {}).get('live', {}).items()] + +        if not formats and data.get('fivemin_id'): +            return self.url_result('5min:%s' % data['fivemin_id']) +          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 13a53a0cb..f29df36b5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):      _TEST = {          'url': 'http://www.imdb.com/video/imdb/vi2524815897', -        'md5': '9f34fa777ade3a6e57a054fdbcb3a068',          'info_dict': {              'id': '2524815897',              'ext': 'mp4', diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 78787e8f1..3c61a850f 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -105,6 +105,9 @@ class OCWMITIE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',                  'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.', +                'upload_date': '20121109', +                'uploader_id': 'MIT', +                'uploader': 'MIT OpenCourseWare',                  # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'              }          }, @@ -114,6 +117,9 @@ class OCWMITIE(InfoExtractor):                  'id': '7K1sB05pE0A',                  'ext': 'mp4',                  'title': 'Session 1: Introduction to Derivatives', +                'upload_date': '20090818', +                'uploader_id': 'MIT', +                'uploader': 'MIT OpenCourseWare',                  'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',                  # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'              } diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@  # coding: utf-8  from __future__ import unicode_literals -import hashlib -import json -import time -  from .common import InfoExtractor  from ..compat import ( -    compat_parse_qs, -    compat_str, -) -from ..utils import ( -    int_or_none, +    compat_urlparse,  )  class MotorsportIE(InfoExtractor):      IE_DESC = 'motorsport.com' -    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])' +    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'      _TEST = {          'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', -        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',          'info_dict': { -            'id': '7063', +            'id': '2-T3WuR-KMM',              'ext': 'mp4',              'title': 'Red Bull Racing: 2014 Rules Explained', -            'duration': 207, +            'duration': 208,              'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', -            'uploader': 'rainiere', -            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' -        } +            'uploader': 'mcomstaff', +            'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', +            'upload_date': '20140903', +            'thumbnail': r're:^https?://.+\.jpg$' +        }, +        'add_ie': ['Youtube'], +        'params': { +            'skip_download': True, +        },      }      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        flashvars_code = self._html_search_regex( -            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') -        flashvars = compat_parse_qs(flashvars_code) -        params = json.loads(flashvars['parameters'][0]) - -        e = compat_str(int(time.time()) + 24 * 60 * 60) -        base_video_url = params['location'] + '?e=' + e -        s = 'h3hg713fh32' -        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest() -        video_url = base_video_url + '&h=' + h - -        uploader = self._html_search_regex( -            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage, -            'uploader', fatal=False) +        iframe_path = self._html_search_regex( +            r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, +            'iframe path') +        iframe = self._download_webpage( +            compat_urlparse.urljoin(url, iframe_path), display_id, +            'Downloading iframe') +        youtube_id = self._search_regex( +            r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')          return { -            'id': params['video_id'], +            '_type': 'url_transparent',              'display_id': display_id, -            'title': params['title'], -            'url': video_url, -            'description': params.get('description'), -            'thumbnail': params.get('main_thumb'), -            'duration': int_or_none(params.get('duration')), -            'uploader': uploader, +            'url': 'https://youtube.com/watch?v=%s' % youtube_id,          } diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 43e8e619f..321ce5ce7 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):  class NRKTVIE(InfoExtractor): -    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' +    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'      _TESTS = [          { @@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a',                  'upload_date': '20140523',                  'duration': 1741.52, -            } +            },          },          {              'url': 'http://tv.nrk.no/program/mdfp15000514', @@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:654c12511f035aed1e42bdf5db3b206a',                  'upload_date': '20140524',                  'duration': 4605.0, -            } +            },          }, +        { +            # single playlist video +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', +            'md5': 'adbd1dbd813edaf532b0a253780719c2', +            'info_dict': { +                'id': 'MSPO40010515-part2', +                'ext': 'flv', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +            }, +            'skip': 'Only works from Norway', +        }, +        { +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', +            'playlist': [ +                { +                    'md5': '9480285eff92d64f06e02a5367970a7a', +                    'info_dict': { +                        'id': 'MSPO40010515-part1', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +                { +                    'md5': 'adbd1dbd813edaf532b0a253780719c2', +                    'info_dict': { +                        'id': 'MSPO40010515-part2', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +            ], +            'info_dict': { +                'id': 'MSPO40010515', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +                'duration': 6947.5199999999995, +            }, +            'skip': 'Only works from Norway', +        }      ] +    def _extract_f4m(self, manifest_url, video_id): +        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') - -        page = self._download_webpage(url, video_id) - -        title = self._html_search_meta('title', page, 'title') -        description = self._html_search_meta('description', page, 'description') -        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) -        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) -        duration = float_or_none( -            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)) +        part_id = mobj.group('part_id') + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_meta( +            'title', webpage, 'title') +        description = self._html_search_meta( +            'description', webpage, 'description') + +        thumbnail = self._html_search_regex( +            r'data-posterimage="([^"]+)"', +            webpage, 'thumbnail', fatal=False) +        upload_date = unified_strdate(self._html_search_meta( +            'rightsfrom', webpage, 'upload date', fatal=False)) +        duration = float_or_none(self._html_search_regex( +            r'data-duration="([^"]+)"', +            webpage, 'duration', fatal=False)) + +        # playlist +        parts = re.findall( +            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage) +        if parts: +            entries = [] +            for current_part_id, stream_url, part_title in parts: +                if part_id and current_part_id != part_id: +                    continue +                video_part_id = '%s-part%s' % (video_id, current_part_id) +                formats = self._extract_f4m(stream_url, video_part_id) +                entries.append({ +                    'id': video_part_id, +                    'title': part_title, +                    'description': description, +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'formats': formats, +                }) +            if part_id: +                if entries: +                    return entries[0] +            else: +                playlist = self.playlist_result(entries, video_id, title, description) +                playlist.update({ +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'duration': duration, +                }) +                return playlist          formats = [] -        f4m_url = re.search(r'data-media="([^"]+)"', page) +        f4m_url = re.search(r'data-media="([^"]+)"', webpage)          if f4m_url: -            formats.append({ -                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', -                'format_id': 'f4m', -                'ext': 'flv', -            }) +            formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) -        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page) +        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)          if m3u8_url: -            formats.append({ -                'url': m3u8_url.group(1), -                'format_id': 'm3u8', -            }) +            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))          self._sort_formats(formats) diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py index c833fc8ee..6446d26dc 100644 --- a/youtube_dl/extractor/sexykarma.py +++ b/youtube_dl/extractor/sexykarma.py @@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):              'title': 'Taking a quick pee.',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': 'wildginger7', -            'upload_date': '20141007', +            'upload_date': '20141008',              'duration': 22,              'view_count': int,              'comment_count': int, @@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):              'view_count': int,              'comment_count': int,              'categories': list, +            'age_limit': 18,          }      }, {          'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', @@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):              'view_count': int,              'comment_count': int,              'categories': list, +            'age_limit': 18,          }      }] @@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):              'view_count': view_count,              'comment_count': comment_count,              'categories': categories, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 6c3445d79..82675431f 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          title = self._html_search_meta('title', webpage, 'title', fatal=True) diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index 4ce5aeeba..b6b1f2568 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):      _INFO_DICT = {          'id': '34682',          'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', -        'ext': 'AAC', +        'ext': 'aac',          'thumbnail': 're:^https?://.*\.png$',          'location': 'Tacoma, WA',      } @@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):          for stream in streams:              if stream.get('Type') == 'Live':                  is_live = True +            reliability = stream.get('Reliability') +            format_note = ( +                'Reliability: %d%%' % reliability +                if reliability is not None else None)              formats.append({ +                'preference': ( +                    0 if reliability is None or reliability > 90 +                    else 1),                  'abr': stream.get('Bandwidth'), -                'ext': stream.get('MediaType'), +                'ext': stream.get('MediaType').lower(),                  'acodec': stream.get('MediaType'),                  'vcodec': 'none',                  'url': stream.get('Url'), -                # Sometimes streams with the highest quality do not exist -                'preference': stream.get('Reliability'), +                'source_preference': reliability, +                'format_note': format_note,              })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 15f315298..944901e14 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):      _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'      _TEST = {          'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', -        'md5': 'a21454021c2646f5433514177e2caa5f',          'info_dict': {              'id': '1023585v',              'ext': 'mp4', @@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          title = self._og_search_title(webpage) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index 88bbbb219..c17bebd6e 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -10,14 +10,14 @@ from ..utils import (  class WashingtonPostIE(InfoExtractor): -    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' +    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'      _TEST = {          'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',          'info_dict': {              'title': 'Sinkhole of bureaucracy',          },          'playlist': [{ -            'md5': 'c3f4b4922ffa259243f68e928db2db8c', +            'md5': '79132cc09ec5309fa590ae46e4cc31bc',              'info_dict': {                  'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',                  'ext': 'mp4', @@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):                  'upload_date': '20140322',              },          }, { -            'md5': 'f645a07652c2950cd9134bb852c5f5eb', +            'md5': 'e1d5734c06865cc504ad99dc2de0d443',              'info_dict': {                  'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',                  'ext': 'mp4', @@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        page_id = mobj.group('id') - +        page_id = self._match_id(url)          webpage = self._download_webpage(url, page_id) +          title = self._og_search_title(webpage)          uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)          entries = [] diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 95f1c8f3c..e8490b028 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):          'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',          'info_dict': {              'id': 'greenshowers', +            'age_limit': 18,          },          'playlist_mincount': 155,      } @@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):          return {              '_type': 'playlist',              'id': username, +            'age_limit': 18,              'entries': [{                  '_type': 'url',                  'url': eurl, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d1bbf0b01..e71956071 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, +        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, +        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          # Dash webm audio          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, @@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'id': 'HtVdAasjOgU',                  'ext': 'mp4',                  'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', -                'description': 'md5:eca57043abae25130f58f655ad9a7771', +                'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',                  'uploader': 'The Witcher',                  'uploader_id': 'WitcherGame',                  'upload_date': '20140605', @@ -1046,7 +1048,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                      for f in formats:                          if f['format_id'] in dash_keys:                              f['format_id'] = 'nondash-%s' % f['format_id'] -                            f['preference'] -= 10000 +                            f['preference'] = f.get('preference', 0) - 10000                      formats.extend(dash_formats)          self._sort_formats(formats) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 74c76a9a0..98f15177b 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -119,7 +119,7 @@ class ZDFChannelIE(InfoExtractor):          'info_dict': {              'id': '1586442',          }, -        'playlist_count': 4, +        'playlist_count': 3,      }      _PAGE_SIZE = 50 diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a018c1d71..058342dec 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -267,10 +267,12 @@ def parseOpts(overrideArguments=None):          action='store', dest='format', metavar='FORMAT', default=None,          help=(              'video format code, specify the order of preference using' -            ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also' -            ' supported. You can also use the special names "best",' -            ' "bestvideo", "bestaudio", "worst", "worstvideo" and' -            ' "worstaudio". By default, youtube-dl will pick the best quality.' +            ' slashes, as in -f 22/17/18 . ' +            ' Instead of format codes, you can select by extension for the ' +            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' +            'You can also use the special names "best",' +            ' "bestvideo", "bestaudio", "worst". ' +            ' By default, youtube-dl will pick the best quality.'              ' Use commas to download multiple audio formats, such as'              ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'              ' You can merge the video and audio of two formats into a single' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d4951c406..29739a483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle):          getheader = url_handle.info().getheader      return getheader('Content-Type').split("/")[1] + + +def age_restricted(content_limit, age_limit): +    """ Returns True iff the content should be blocked """ + +    if age_limit is None:  # No limit set +        return False +    if content_limit is None: +        return False  # Content available for everyone +    return age_limit < content_limit diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 086f0ebf0..32019e362 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.01.05' +__version__ = '2015.01.08'  | 
