diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-02-09 21:17:02 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-02-09 21:17:02 +0700 |
commit | 9e167e1ee311b43dbe9c3911bd0a79f0db15efef (patch) | |
tree | 5fe8bc2c0816c6639ea7cfbe716f40e2d8685fac | |
parent | 5828489072e97f7e29a611ce018ab62cf1c2d4d9 (diff) |
[newgrounds] Fix metadata extraction (closes #15531)
-rw-r--r-- | youtube_dl/extractor/newgrounds.py | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 0e26f8399..82e7cf522 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor): self._check_formats(formats, media_id) self._sort_formats(formats) - uploader = self._search_regex( - r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader', + uploader = self._html_search_regex( + (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>', + r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', fatal=False) - timestamp = unified_timestamp(self._search_regex( - r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp', + timestamp = unified_timestamp(self._html_search_regex( + (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', + r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', default=None)) duration = parse_duration(self._search_regex( - r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration', - default=None)) + r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage, + 'duration', default=None)) filesize_approx = parse_filesize(self._html_search_regex( - r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize', + r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize', default=None)) if len(formats) == 1: formats[0]['filesize_approx'] = filesize_approx |