From a047eeb6d2cbbee944e5088f7cba111746f4bb0b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 5 Aug 2021 03:01:23 +0530 Subject: Add regex to `--match-filter` This does not fully deprecate `--match-title`/`--reject-title` since `--match-filter` is only checked after the extraction is complete, while `--match-title` can often be checked from the flat playlist. Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035 --- yt_dlp/utils.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'yt_dlp/utils.py') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index d06b18e00..b04fbd22c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4664,23 +4664,28 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): def _match_one(filter_part, dct): # TODO: Generalize code with YoutubeDL._build_format_filter + STRING_OPERATORS = { + '*=': operator.contains, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '~=': lambda attr, value: re.search(value, attr), + } COMPARISON_OPERATORS = { + **STRING_OPERATORS, + '<=': operator.le, # "<=" must be defined above "<" '<': operator.lt, - '<=': operator.le, - '>': operator.gt, '>=': operator.ge, + '>': operator.gt, '=': operator.eq, - '*=': operator.contains, - '^=': lambda attr, value: attr.startswith(value), - '$=': lambda attr, value: attr.endswith(value), } + operator_rex = re.compile(r'''(?x)\s* (?P[a-z_]+) \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* (?: (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| - (?P["\'])(?P(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| - (?P(?![0-9.])[a-z0-9A-Z]*) + (?P["\'])(?P.+?)(?P=quote)| + (?P.+?) ) \s*$ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) @@ -4705,9 +4710,8 @@ def _match_one(filter_part, dct): if quote is not None: comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: - if m.group('op') in ('*=', '^=', '$='): - raise ValueError( - 'Operator %s only supports string values!' % m.group('op')) + if m.group('op') in STRING_OPERATORS: + raise ValueError('Operator %s only supports string values!' % m.group('op')) try: comparison_value = int(m.group('intval')) except ValueError: @@ -4743,7 +4747,8 @@ def match_str(filter_str, dct): """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ return all( - _match_one(filter_part, dct) for filter_part in filter_str.split('&')) + _match_one(filter_part.replace(r'\&', '&'), dct) + for filter_part in re.split(r'(?