From cea364f70c97dad933fa38698f3c9df1bdb485cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:40:28 +0800 Subject: [extractor/common] Support HTML media elements without child nodes --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, -- cgit v1.2.3