diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-10-12 01:40:28 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-10-12 01:40:28 +0800 |
commit | cea364f70c97dad933fa38698f3c9df1bdb485cf (patch) | |
tree | fd5f43969b383f15474506bc4939b5da23d40692 /youtube_dl/extractor | |
parent | 55642487f072565bea3b2826b836a1a3159a3807 (diff) |
[extractor/common] Support HTML media elements without child nodes
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, |