aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2019-01-09 14:23:26 +0100
committerRemita Amine <remitamine@gmail.com>2019-01-09 14:23:26 +0100
commit3c1089dba41f137dcc6c373daacdc19a24aef81c (patch)
tree3fe78b540f8da252f7cc2384dd2e45b4c044d98b
parent6089ff40e7cc7710e399db1be87fea103a190ee6 (diff)
[gaia] Add new extractor(#14605)
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/gaia.py98
2 files changed, 99 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index ddeb70284..c7d04d366 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -411,6 +411,7 @@ from .funk import (
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
+from .gaia import GaiaIE
from .gameinformer import GameInformerIE
from .gameone import (
GameOneIE,
diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py
new file mode 100644
index 000000000..f2eef3f4c
--- /dev/null
+++ b/youtube_dl/extractor/gaia.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+)
+
+
+class GaiaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
+ _TESTS = [{
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
+ 'info_dict': {
+ 'id': '89356',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 936,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
+ 'info_dict': {
+ 'id': '89351',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 53,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, vtype = re.search(self._VALID_URL, url).groups()
+ node_id = self._download_json(
+ 'https://brooklyn.gaia.com/pathinfo', display_id, query={
+ 'path': 'video/' + display_id,
+ })['id']
+ node = self._download_json(
+ 'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
+ vdata = node[vtype]
+ media_id = compat_str(vdata['nid'])
+ title = node['title']
+
+ media = self._download_json(
+ 'https://brooklyn.gaia.com/media/' + media_id, media_id)
+ formats = self._extract_m3u8_formats(
+ media['mediaUrls']['bcHLS'], media_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ text_tracks = media.get('textTracks', {})
+ for key in ('captions', 'subtitles'):
+ for lang, sub_url in text_tracks.get(key, {}).items():
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ fivestar = node.get('fivestar', {})
+ fields = node.get('fields', {})
+
+ def get_field_value(key, value_key='value'):
+ return try_get(fields, lambda x: x[key][0][value_key])
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
+ 'timestamp': int_or_none(node.get('created')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(vdata.get('duration')),
+ 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
+ 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
+ 'comment_count': int_or_none(node.get('comment_count')),
+ 'series': try_get(node, lambda x: x['series']['title'], compat_str),
+ 'season_number': int_or_none(get_field_value('season')),
+ 'season_id': str_or_none(get_field_value('series_nid', 'nid')),
+ 'episode_number': int_or_none(get_field_value('episode')),
+ }