diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-06-11 21:27:32 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-06-11 21:30:06 +0700 | 
| commit | 28a4d6cce89d159f9fe701e6cf716c7e3ffa4415 (patch) | |
| tree | 00c968a43a4553b7abc81d0eebb3a87fbf2eb031 | |
| parent | 2ae2ffda5eae9c64d40d2fec839ba5deb07717f2 (diff) | |
[newgrounds] Extract more metadata (closes #13232)
| -rw-r--r-- | youtube_dl/extractor/newgrounds.py | 38 | 
1 files changed, 33 insertions, 5 deletions
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index dc183caec..0e26f8399 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -6,6 +6,9 @@ from .common import InfoExtractor  from ..utils import (      extract_attributes,      int_or_none, +    parse_duration, +    parse_filesize, +    unified_timestamp,  ) @@ -19,7 +22,10 @@ class NewgroundsIE(InfoExtractor):              'ext': 'mp3',              'title': 'B7 - BusMode',              'uploader': 'Burn7', -        } +            'timestamp': 1378878540, +            'upload_date': '20130911', +            'duration': 143, +        },      }, {          'url': 'https://www.newgrounds.com/portal/view/673111',          'md5': '3394735822aab2478c31b1004fe5e5bc', @@ -28,6 +34,8 @@ class NewgroundsIE(InfoExtractor):              'ext': 'mp4',              'title': 'Dancin',              'uploader': 'Squirrelman82', +            'timestamp': 1460256780, +            'upload_date': '20160410',          },      }, {          # source format unavailable, additional mp4 formats @@ -37,6 +45,8 @@ class NewgroundsIE(InfoExtractor):              'ext': 'mp4',              'title': 'ZTV News Episode 8',              'uploader': 'BennettTheSage', +            'timestamp': 1487965140, +            'upload_date': '20170224',          },          'params': {              'skip_download': True, @@ -51,11 +61,11 @@ class NewgroundsIE(InfoExtractor):          title = self._html_search_regex(              r'<title>([^>]+)</title>', webpage, 'title') -        video_url = self._parse_json(self._search_regex( +        media_url = self._parse_json(self._search_regex(              r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)          formats = [{ -            'url': video_url, +            'url': media_url,              'format_id': 'source',              'quality': 1,          }] @@ -64,7 +74,7 @@ class NewgroundsIE(InfoExtractor):              r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',              default=None))          if max_resolution: -            url_base = video_url.rpartition('.')[0] +            url_base = media_url.rpartition('.')[0]              for resolution in (360, 720, 1080):                  if resolution > max_resolution:                      break @@ -77,14 +87,32 @@ class NewgroundsIE(InfoExtractor):          self._check_formats(formats, media_id)          self._sort_formats(formats) -        uploader = self._html_search_regex( +        uploader = self._search_regex(              r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',              fatal=False) +        timestamp = unified_timestamp(self._search_regex( +            r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp', +            default=None)) +        duration = parse_duration(self._search_regex( +            r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration', +            default=None)) + +        filesize_approx = parse_filesize(self._html_search_regex( +            r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize', +            default=None)) +        if len(formats) == 1: +            formats[0]['filesize_approx'] = filesize_approx + +        if '<dd>Song' in webpage: +            formats[0]['vcodec'] = 'none' +          return {              'id': media_id,              'title': title,              'uploader': uploader, +            'timestamp': timestamp, +            'duration': duration,              'formats': formats,          }  | 
