diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-05-01 07:15:23 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-05-01 07:15:23 +0600 |
commit | 68bb2fef9565159eba4a47f464b6b420cf2d5cda (patch) | |
tree | abd7689fa7090838ac5bb8ca9ff30f0458e22b63 | |
parent | 854cc54bc1d0488d8fa88bd5dfed6f7f8981847e (diff) |
[tagesschau] Restrict playlist entry regex
-rw-r--r-- | youtube_dl/extractor/tagesschau.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 499bd260b..136e18f96 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor): }, { 'url': 'http://www.tagesschau.de/100sekunden/index.html', 'only_matching': True, + }, { + # playlist article with collapsing sections + 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html', + 'only_matching': True, }] @classmethod @@ -275,7 +279,7 @@ class TagesschauIE(InfoExtractor): if webpage_type == 'website': # Article entries = [] for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, + r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, webpage), 1): entries.append({ 'id': '%s-%d' % (display_id, num), |