aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2015-11-07 16:54:35 +0100
committerremitamine <remitamine@gmail.com>2015-11-07 16:54:35 +0100
commit3793090b1b1c1e3462b80dd3045a3573545cfb29 (patch)
tree4a63e921b217008bcce519531428c0f6cf090354 /youtube_dl/extractor
parent5d0f84d32cc038dd71673987cb6efaa85e953474 (diff)
downloadyoutube-dl-3793090b1b1c1e3462b80dd3045a3573545cfb29.tar.xz
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/amp.py84
-rw-r--r--youtube_dl/extractor/dramafever.py65
-rw-r--r--youtube_dl/extractor/foxnews.py64
3 files changed, 105 insertions, 108 deletions
diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py
new file mode 100644
index 000000000..b573b9280
--- /dev/null
+++ b/youtube_dl/extractor/amp.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class AMPIE(InfoExtractor):
+ def _get_media_node(self, item, name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ item = self._download_json(
+ url, None,
+ 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed'
+ )['channel']['item']
+
+ video_id = item['guid']
+
+ thumbnails = []
+ media_thumbnail = self._get_media_node(item, 'thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data['@attributes']
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = self._get_media_node(item, 'subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data['@attributes']
+ lang = subtitle.get('lang') or 'en'
+ subtitles[lang] = [{'url': subtitle['href']}]
+
+ formats = []
+ media_content = self._get_media_node(item, 'content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data['@attributes']
+ media_type = media['type']
+ if media_type == 'video/f4m':
+ f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif media_type == 'application/x-mpegURL':
+ m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ formats.append({
+ 'format_id': media_data['media-category']['@attributes']['label'],
+ 'url': media['url'],
+ 'preference': 1,
+ 'vbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._get_media_node(item, 'title'),
+ 'description': self._get_media_node(item, 'description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(item.get('pubDate'), ' '),
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index 38e6597c8..80a928827 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools
-from .common import InfoExtractor
+from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@@ -19,7 +19,7 @@ from ..utils import (
)
-class DramaFeverBaseIE(InfoExtractor):
+class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@@ -80,60 +80,24 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
- feed = self._download_json(
- 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
- video_id, 'Downloading episode JSON')['channel']['item']
+ info = self._extract_feed_info('http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
- media_group = feed.get('media-group', {})
-
- formats = []
- for media_content in media_group['media-content']:
- src = media_content.get('@attributes', {}).get('url')
- if not src:
- continue
- ext = determine_ext(src)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id='hds'))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id='hls'))
- else:
- formats.append({
- 'url': src,
- })
- self._sort_formats(formats)
-
- title = media_group.get('media-title')
- description = media_group.get('media-description')
- duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
- thumbnail = self._proto_relative_url(
- media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
- timestamp = parse_iso8601(feed.get('pubDate'), ' ')
-
- subtitles = {}
- for media_subtitle in media_group.get('media-subTitle', []):
- lang = media_subtitle.get('@attributes', {}).get('lang')
- href = media_subtitle.get('@attributes', {}).get('href')
- if not lang or not href:
- continue
- subtitles[lang] = [{
- 'ext': 'ttml',
- 'url': href,
- }]
-
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@@ -146,21 +110,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
- subtitles.setdefault('English', []).append({
+ info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
+ return info
class DramaFeverSeriesIE(DramaFeverBaseIE):
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index 3a4a59135..0cd0f9fa8 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -2,14 +2,14 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .amp import AMPIE
from ..utils import (
parse_iso8601,
int_or_none,
)
-class FoxNewsIE(InfoExtractor):
+class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@@ -20,10 +20,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
- 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+ 'description': '16-year-old girl is size of toddler',
'duration': 265,
- 'timestamp': 1304411491,
- 'upload_date': '20110503',
+ #'timestamp': 1304411491,
+ #'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -34,10 +34,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
- 'description': "Congressman discusses the president's executive action",
+ 'description': "Congressman discusses president's plan",
'duration': 292,
- 'timestamp': 1417662047,
- 'upload_date': '20141204',
+ #'timestamp': 1417662047,
+ #'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -56,48 +56,6 @@ class FoxNewsIE(InfoExtractor):
video_id = mobj.group('id')
host = mobj.group('host')
- video = self._download_json(
- 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
-
- item = video['channel']['item']
- title = item['title']
- description = item['description']
- timestamp = parse_iso8601(item['dc-date'])
-
- media_group = item['media-group']
- duration = None
- formats = []
- for media in media_group['media-content']:
- attributes = media['@attributes']
- video_url = attributes['url']
- if video_url.endswith('.f4m'):
- formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
- elif not video_url.endswith('.smil'):
- duration = int_or_none(attributes.get('duration'))
- formats.append({
- 'url': video_url,
- 'format_id': media['media-category']['@attributes']['label'],
- 'preference': 1,
- 'vbr': int_or_none(attributes.get('bitrate')),
- 'filesize': int_or_none(attributes.get('fileSize'))
- })
- self._sort_formats(formats)
-
- media_thumbnail = media_group['media-thumbnail']['@attributes']
- thumbnails = [{
- 'url': media_thumbnail['url'],
- 'width': int_or_none(media_thumbnail.get('width')),
- 'height': int_or_none(media_thumbnail.get('height')),
- }] if media_thumbnail else []
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
+ info = self._extract_feed_info('http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
+ info['id'] = video_id
+ return info