aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-05-08 22:50:01 +0600
committerSergey M․ <dstftw@gmail.com>2015-05-08 22:50:01 +0600
commit2c0c9dc46cda490137b6788d6d66f31ca092f58f (patch)
treec4074b4b2eefe5125933d0e03f1fc258acf7835f /youtube_dl/extractor
parent0ceab8474924c4e7a6e28497c8da40cc5002c8d3 (diff)
[xstream] Move xstream to separate extractor
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/xstream.py115
2 files changed, 116 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 587a45940..5cc35c8eb 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -653,6 +653,7 @@ from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
+from .xstream import XstreamIE
from .xvideos import XVideosIE
from .xtube import XTubeUserIE, XTubeIE
from .xuite import XuiteIE
diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py
new file mode 100644
index 000000000..71584c291
--- /dev/null
+++ b/youtube_dl/extractor/xstream.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class XstreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xstream:|
+ https?://frontend\.xstream\.(?:dk|net)/
+ )
+ (?P<partner_id>[^/]+)
+ (?:
+ :|
+ /feed/video/\?.*?\bid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ }, {
+ 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
+ % (partner_id, video_id),
+ video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }