From 3793090b1b1c1e3462b80dd3045a3573545cfb29 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 7 Nov 2015 16:54:35 +0100 Subject: [amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors --- youtube_dl/extractor/amp.py | 84 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 youtube_dl/extractor/amp.py (limited to 'youtube_dl/extractor/amp.py') diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py new file mode 100644 index 000000000..b573b9280 --- /dev/null +++ b/youtube_dl/extractor/amp.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class AMPIE(InfoExtractor): + def _get_media_node(self, item, name, default=None): + media_name = 'media-%s' % name + media_group = item.get('media-group') or item + return media_group.get(media_name) or item.get(media_name) or item.get(name, default) + + # parse Akamai Adaptive Media Player feed + def _extract_feed_info(self, url): + item = self._download_json( + url, None, + 'Downloading Akamai AMP feed', + 'Unable to download Akamai AMP feed' + )['channel']['item'] + + video_id = item['guid'] + + thumbnails = [] + media_thumbnail = self._get_media_node(item, 'thumbnail') + if media_thumbnail: + if isinstance(media_thumbnail, dict): + media_thumbnail = [media_thumbnail] + for thumbnail_data in media_thumbnail: + thumbnail = thumbnail_data['@attributes'] + thumbnails.append({ + 'url': self._proto_relative_url(thumbnail['url'], 'http:'), + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + subtitles = {} + media_subtitle = self._get_media_node(item, 'subTitle') + if media_subtitle: + if isinstance(media_subtitle, dict): + media_subtitle = [media_subtitle] + for subtitle_data in media_subtitle: + subtitle = subtitle_data['@attributes'] + lang = subtitle.get('lang') or 'en' + subtitles[lang] = [{'url': subtitle['href']}] + + formats = [] + media_content = self._get_media_node(item, 'content') + if isinstance(media_content, dict): + media_content = [media_content] + for media_data in media_content: + media = media_data['@attributes'] + media_type = media['type'] + if media_type == 'video/f4m': + f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False) + if f4m_formats: + formats.extend(f4m_formats) + elif media_type == 'application/x-mpegURL': + m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + else: + formats.append({ + 'format_id': media_data['media-category']['@attributes']['label'], + 'url': media['url'], + 'preference': 1, + 'vbr': int_or_none(media.get('bitrate')), + 'filesize': int_or_none(media.get('fileSize')), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._get_media_node(item, 'title'), + 'description': self._get_media_node(item, 'description'), + 'thumbnails': thumbnails, + 'timestamp': parse_iso8601(item.get('pubDate'), ' '), + 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), + 'formats': formats, + } -- cgit v1.2.3 From c7fa5fa42cb91aa32c5b21c6821f6d190ec047a2 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 21 Dec 2015 11:12:58 +0100 Subject: [bleacherreport] fix style issues and simplify --- youtube_dl/extractor/amp.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'youtube_dl/extractor/amp.py') diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index b573b9280..dcc3c97f1 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -9,23 +9,21 @@ from ..utils import ( class AMPIE(InfoExtractor): - def _get_media_node(self, item, name, default=None): - media_name = 'media-%s' % name - media_group = item.get('media-group') or item - return media_group.get(media_name) or item.get(media_name) or item.get(name, default) - # parse Akamai Adaptive Media Player feed def _extract_feed_info(self, url): item = self._download_json( - url, None, - 'Downloading Akamai AMP feed', - 'Unable to download Akamai AMP feed' - )['channel']['item'] + url, None, 'Downloading Akamai AMP feed', + 'Unable to download Akamai AMP feed')['channel']['item'] video_id = item['guid'] - + + def get_media_node(name, default=None): + media_name = 'media-%s' % name + media_group = item.get('media-group') or item + return media_group.get(media_name) or item.get(media_name) or item.get(name, default) + thumbnails = [] - media_thumbnail = self._get_media_node(item, 'thumbnail') + media_thumbnail = get_media_node('thumbnail') if media_thumbnail: if isinstance(media_thumbnail, dict): media_thumbnail = [media_thumbnail] @@ -38,7 +36,7 @@ class AMPIE(InfoExtractor): }) subtitles = {} - media_subtitle = self._get_media_node(item, 'subTitle') + media_subtitle = get_media_node('subTitle') if media_subtitle: if isinstance(media_subtitle, dict): media_subtitle = [media_subtitle] @@ -48,26 +46,28 @@ class AMPIE(InfoExtractor): subtitles[lang] = [{'url': subtitle['href']}] formats = [] - media_content = self._get_media_node(item, 'content') + media_content = get_media_node('content') if isinstance(media_content, dict): media_content = [media_content] for media_data in media_content: media = media_data['@attributes'] media_type = media['type'] if media_type == 'video/f4m': - f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False) + f4m_formats = self._extract_f4m_formats( + media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', + video_id, f4m_id='hds', fatal=False) if f4m_formats: formats.extend(f4m_formats) elif media_type == 'application/x-mpegURL': - m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False) + m3u8_formats = self._extract_m3u8_formats( + media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) else: formats.append({ 'format_id': media_data['media-category']['@attributes']['label'], 'url': media['url'], - 'preference': 1, - 'vbr': int_or_none(media.get('bitrate')), + 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), }) @@ -75,8 +75,8 @@ class AMPIE(InfoExtractor): return { 'id': video_id, - 'title': self._get_media_node(item, 'title'), - 'description': self._get_media_node(item, 'description'), + 'title': get_media_node('title'), + 'description': get_media_node('description'), 'thumbnails': thumbnails, 'timestamp': parse_iso8601(item.get('pubDate'), ' '), 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), -- cgit v1.2.3