diff options
| -rw-r--r-- | test/test_YoutubeDL.py | 55 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 54 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitch.py | 2 | ||||
| -rw-r--r-- | youtube_dl/options.py | 11 | 
5 files changed, 131 insertions, 1 deletions
| diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 85d87f2c3..678b9f7d1 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -281,6 +281,61 @@ class TestFormatSelection(unittest.TestCase):              downloaded = ydl.downloaded_info_dicts[0]              self.assertEqual(downloaded['format_id'], f1id) +    def test_format_filtering(self): +        formats = [ +            {'format_id': 'A', 'filesize': 500, 'width': 1000}, +            {'format_id': 'B', 'filesize': 1000, 'width': 500}, +            {'format_id': 'C', 'filesize': 1000, 'width': 400}, +            {'format_id': 'D', 'filesize': 2000, 'width': 600}, +            {'format_id': 'E', 'filesize': 3000}, +            {'format_id': 'F'}, +            {'format_id': 'G', 'filesize': 1000000}, +        ] +        for f in formats: +            f['url'] = 'http://_/' +            f['ext'] = 'unknown' +        info_dict = _make_result(formats) + +        ydl = YDL({'format': 'best[filesize<3000]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'D') + +        ydl = YDL({'format': 'best[filesize<=3000]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'E') + +        ydl = YDL({'format': 'best[filesize <= ? 3000]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'F') + +        ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'B') + +        ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'C') + +        ydl = YDL({'format': '[filesize>?1]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'G') + +        ydl = YDL({'format': '[filesize<1M]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'E') + +        ydl = YDL({'format': '[filesize<1MiB]'}) +        ydl.process_ie_result(info_dict) +        downloaded = ydl.downloaded_info_dicts[0] +        self.assertEqual(downloaded['format_id'], 'G') +      def test_add_extra_info(self):          test_dict = {              'extractor': 'Foo', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 772fddd45..8ef74e414 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -10,6 +10,7 @@ import io  import itertools  import json  import locale +import operator  import os  import platform  import re @@ -49,6 +50,7 @@ from .utils import (      make_HTTPS_handler,      MaxDownloadsReached,      PagedList, +    parse_filesize,      PostProcessingError,      platform_name,      preferredencoding, @@ -768,7 +770,59 @@ class YoutubeDL(object):          else:              raise Exception('Invalid result type: %s' % result_type) +    def _apply_format_filter(self, format_spec, available_formats): +        " Returns a tuple of the remaining format_spec and filtered formats " + +        OPERATORS = { +            '<': operator.lt, +            '<=': operator.le, +            '>': operator.gt, +            '>=': operator.ge, +            '=': operator.eq, +            '!=': operator.ne, +        } +        operator_rex = re.compile(r'''(?x)\s*\[ +            (?P<key>width|height|tbr|abr|vbr|filesize) +            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* +            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) +            \]$ +            ''' % '|'.join(map(re.escape, OPERATORS.keys()))) +        m = operator_rex.search(format_spec) +        if not m: +            raise ValueError('Invalid format specification %r' % format_spec) + +        try: +            comparison_value = int(m.group('value')) +        except ValueError: +            comparison_value = parse_filesize(m.group('value')) +            if comparison_value is None: +                comparison_value = parse_filesize(m.group('value') + 'B') +            if comparison_value is None: +                raise ValueError( +                    'Invalid value %r in format specification %r' % ( +                        m.group('value'), format_spec)) +        op = OPERATORS[m.group('op')] + +        def _filter(f): +            actual_value = f.get(m.group('key')) +            if actual_value is None: +                return m.group('none_inclusive') +            return op(actual_value, comparison_value) +        new_formats = [f for f in available_formats if _filter(f)] + +        new_format_spec = format_spec[:-len(m.group(0))] +        if not new_format_spec: +            new_format_spec = 'best' + +        return (new_format_spec, new_formats) +      def select_format(self, format_spec, available_formats): +        while format_spec.endswith(']'): +            format_spec, available_formats = self._apply_format_filter( +                format_spec, available_formats) +        if not available_formats: +            return None +          if format_spec == 'best' or format_spec is None:              return available_formats[-1]          elif format_spec == 'worst': diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 634142d0d..fb2032832 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -10,6 +10,7 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( +    ExtractorError,      str_to_int,  )  from ..aes import ( @@ -44,6 +45,15 @@ class PornHubIE(InfoExtractor):          req.add_header('Cookie', 'age_verified=1')          webpage = self._download_webpage(req, video_id) +        error_msg = self._html_search_regex( +            r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', +            webpage, 'error message', default=None) +        if error_msg: +            error_msg = re.sub(r'\s+', ' ', error_msg) +            raise ExtractorError( +                'PornHub said: %s' % error_msg, +                expected=True, video_id=video_id) +          video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')          video_uploader = self._html_search_regex(              r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 87e3c453d..340cadcf5 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -348,4 +348,4 @@ class TwitchStreamIE(TwitchBaseIE):              'view_count': view_count,              'formats': formats,              'is_live': True, -        }
\ No newline at end of file +        } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f25c12e52..fd7b400b2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -289,6 +289,17 @@ def parseOpts(overrideArguments=None):              'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '              'You can also use the special names "best",'              ' "bestvideo", "bestaudio", "worst". ' +            ' You can filter the video results by putting a condition in' +            ' brackets, as in -f "best[height=720]"' +            ' (or -f "[filesize>10M]"). ' +            ' This works for filesize, height, width, tbr, abr, and vbr' +            ' and the comparisons <, <=, >, >=, =, != .' +            ' Formats for which the value is not known are excluded unless you' +            ' put a question mark (?) after the operator.' +            ' You can combine format filters, so  ' +            '-f "[height <=? 720][tbr>500]" ' +            'selects up to 720p videos (or videos where the height is not ' +            'known) with a bitrate of at least 500 KBit/s.'              ' By default, youtube-dl will pick the best quality.'              ' Use commas to download multiple audio formats, such as'              ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.' | 
