aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2016-03-18 21:17:45 +0100
committerremitamine <remitamine@gmail.com>2016-03-18 21:18:18 +0100
commit4cd70099ea79a4a82b26694937ca46d31f7436ca (patch)
tree1300022f93117d5f383698ff4dc99956dea92616
parent09fc33198a4cfc93a98ce1ba7d51d41c487e5f56 (diff)
[hbo] Add new extractor
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/hbo.py122
2 files changed, 123 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index acc0b03bd..529051a93 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -284,6 +284,7 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
+from .hbo import HBOIE
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py
new file mode 100644
index 000000000..dad0f3994
--- /dev/null
+++ b/youtube_dl/extractor/hbo.py
@@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ xpath_element,
+ int_or_none,
+ parse_duration,
+)
+
+
+class HBOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+ 'md5': '1c33253f0c7782142c993c0ba62a8753',
+ 'info_dict': {
+ 'id': '1437839',
+ 'ext': 'mp4',
+ 'title': 'Ep. 64 Clip: Encryption',
+ }
+ }
+ _FORMATS_INFO = {
+ '1920': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ '640': {
+ 'width': 768,
+ 'height': 432,
+ },
+ 'highwifi': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'high3g': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'medwifi': {
+ 'width': 400,
+ 'height': 224,
+ },
+ 'med3g': {
+ 'width': 400,
+ 'height': 224,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_xml(
+ 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
+ title = xpath_text(video_data, 'title', 'title', True)
+
+ formats = []
+ for source in xpath_element(video_data, 'videos', 'sources', True):
+ if source.tag == 'size':
+ path = xpath_text(source, './/path')
+ if not path:
+ continue
+ width = source.attrib.get('width')
+ format_info = self._FORMATS_INFO.get(width, {})
+ height = format_info.get('height')
+ fmt = {
+ 'url': path,
+ 'format_id': 'http%s' % ('-%dp' % height if height else ''),
+ 'width': format_info.get('width'),
+ 'height': height,
+ }
+ rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': fmt['format_id'].replace('http', 'rtmp'),
+ })
+ formats.append(fmt)
+ else:
+ video_url = source.text
+ if not video_url:
+ continue
+ if source.tag == 'tarball':
+ formats.extend(self._extract_m3u8_formats(
+ video_url.replace('.tar', '/base_index_w8.m3u8'),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ format_info = self._FORMATS_INFO.get(source.tag, {})
+ formats.append({
+ 'format_id': 'http-%s' % source.tag,
+ 'url': video_url,
+ 'width': format_info.get('width'),
+ 'height': format_info.get('height'),
+ })
+ self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+ thumbnails = []
+ card_sizes = xpath_element(video_data, 'titleCardSizes')
+ if card_sizes is not None:
+ for size in card_sizes:
+ path = xpath_text(size, 'path')
+ if not path:
+ continue
+ width = int_or_none(size.get('width'))
+ thumbnails.append({
+ 'id': width,
+ 'url': path,
+ 'width': width,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }