diff options
Diffstat (limited to 'youtube_dl/extractor/esri.py')
| -rw-r--r-- | youtube_dl/extractor/esri.py | 74 | 
1 files changed, 74 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py new file mode 100644 index 000000000..bf5d2019f --- /dev/null +++ b/youtube_dl/extractor/esri.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( +    int_or_none, +    parse_filesize, +    unified_strdate, +) + + +class EsriVideoIE(InfoExtractor): +    _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', +        'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', +        'info_dict': { +            'id': '1124', +            'ext': 'mp4', +            'title': 'ArcGIS Online - Developing Applications', +            'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 185, +            'upload_date': '20120419', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        formats = [] +        for width, height, content in re.findall( +                r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): +            for video_url, ext, filesize in re.findall( +                    r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): +                formats.append({ +                    'url': compat_urlparse.urljoin(url, video_url), +                    'ext': ext.lower(), +                    'format_id': '%s-%s' % (ext.lower(), height), +                    'width': int(width), +                    'height': int(height), +                    'filesize_approx': parse_filesize(filesize), +                }) +        self._sort_formats(formats) + +        title = self._html_search_meta('title', webpage, 'title') +        description = self._html_search_meta( +            'description', webpage, 'description', fatal=False) + +        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) +        if thumbnail: +            thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) + +        duration = int_or_none(self._search_regex( +            [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], +            webpage, 'duration', fatal=False)) + +        upload_date = unified_strdate(self._html_search_meta( +            'last-modified', webpage, 'upload date', fatal=None)) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'upload_date': upload_date, +            'formats': formats +        } | 
