aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2019-11-06 19:56:10 +0100
committerRemita Amine <remitamine@gmail.com>2019-11-06 19:56:10 +0100
commit55adb63e5412fa5556be22e97d61b8d27c7a5e67 (patch)
treefc9a5d7473670b2b64e6038b62c4c09fc062449c
parentd64ec1242e9dec03ea2aa86b6e913db78c8619e0 (diff)
downloadyoutube-dl-55adb63e5412fa5556be22e97d61b8d27c7a5e67.tar.xz
[kinja] add support for Kinja embeds
closes #5756 closes #11282 closes #22237 closes #22384
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/generic.py17
-rw-r--r--youtube_dl/extractor/kinja.py221
-rw-r--r--youtube_dl/extractor/onionstudios.py54
4 files changed, 241 insertions, 52 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9f43b284d..9e3b554fa 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -513,6 +513,7 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 1c0780e98..3d919f656 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -119,6 +119,7 @@ from .viqeo import ViqeoIE
from .expressen import ExpressenIE
from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE
+from .kinja import KinjaEmbedIE
class GenericIE(InfoExtractor):
@@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor):
'timestamp': 1432570283,
},
},
- # OnionStudios embed
+ # Kinja embed
{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
- 'id': '2855',
+ 'id': '106351',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
}
},
# SnagFilms embed
@@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor):
if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP')
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
+
# Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage)
if onionstudios_url:
diff --git a/youtube_dl/extractor/kinja.py b/youtube_dl/extractor/kinja.py
new file mode 100644
index 000000000..79e3026d2
--- /dev/null
+++ b/youtube_dl/extractor/kinja.py
@@ -0,0 +1,221 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class KinjaEmbedIE(InfoExtractor):
+ IENAME = 'kinja:embed'
+ _DOMAIN_REGEX = r'''(?:[^.]+\.)?
+ (?:
+ avclub|
+ clickhole|
+ deadspin|
+ gizmodo|
+ jalopnik|
+ jezebel|
+ kinja|
+ kotaku|
+ lifehacker|
+ splinternews|
+ the(?:inventory|onion|root|takeout)
+ )\.com'''
+ _COMMON_REGEX = r'''/
+ (?:
+ ajax/inset|
+ embed/video
+ )/iframe\?.*?\bid='''
+ _VALID_URL = r'''(?x)https?://%s%s
+ (?P<type>
+ fb|
+ imgur|
+ instagram|
+ jwp(?:layer)?-video|
+ kinjavideo|
+ mcp|
+ megaphone|
+ ooyala|
+ soundcloud(?:-playlist)?|
+ tumblr-post|
+ twitch-stream|
+ twitter|
+ ustream-channel|
+ vimeo|
+ vine|
+ youtube-(?:list|video)
+ )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _TESTS = [{
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
+ 'only_matching': True,
+ }]
+ _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
+ _PROVIDER_MAP = {
+ 'fb': ('facebook.com/video.php?v=', 'Facebook'),
+ 'imgur': ('imgur.com/', 'Imgur'),
+ 'instagram': ('instagram.com/p/', 'Instagram'),
+ 'jwplayer-video': _JWPLATFORM_PROVIDER,
+ 'jwp-video': _JWPLATFORM_PROVIDER,
+ 'megaphone': ('player.megaphone.fm/', 'Generic'),
+ 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
+ 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
+ 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
+ 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
+ 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
+ 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
+ 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
+ 'vimeo': ('vimeo.com/', 'Vimeo'),
+ 'vine': ('vine.co/v/', 'Vine'),
+ 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
+ 'youtube-video': ('youtube.com/embed/', 'Youtube'),
+ }
+
+ @staticmethod
+ def _extract_urls(webpage, url):
+ return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
+ r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ provider = self._PROVIDER_MAP.get(video_type)
+ if provider:
+ video_id = compat_urllib_parse_unquote(video_id)
+ if video_type == 'tumblr-post':
+ video_id, blog = video_id.split('-', 1)
+ result_url = provider[0] % (blog, video_id)
+ elif video_type == 'youtube-list':
+ video_id, playlist_id = video_id.split('/')
+ result_url = provider[0] % (video_id, playlist_id)
+ else:
+ if video_type == 'ooyala':
+ video_id = video_id.split('/')[0]
+ result_url = provider[0] + video_id
+ return self.url_result('http://' + result_url, provider[1])
+
+ if video_type == 'kinjavideo':
+ data = self._download_json(
+ 'https://kinja.com/api/core/video/views/videoById',
+ video_id, query={'videoId': video_id})['data']
+ title = data['title']
+
+ formats = []
+ for k in ('signedPlaylist', 'streaming'):
+ m3u8_url = data.get(k + 'Url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = None
+ poster = data.get('poster') or {}
+ poster_id = poster.get('id')
+ if poster_id:
+ thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'formats': formats,
+ 'tags': data.get('tags'),
+ 'timestamp': int_or_none(try_get(
+ data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
+ 'thumbnail': thumbnail,
+ 'uploader': data.get('network'),
+ }
+ else:
+ video_data = self._download_json(
+ 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
+ video_id)['videoMetadata']
+ iptc = video_data['photoVideoMetadataIPTC']
+ title = iptc['title']['en']
+ fmg = video_data.get('photoVideoMetadata_fmg') or {}
+ tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
+ data = self._download_json(
+ tvss_domain + '/api/v3/video-auth/url-signature-tokens',
+ video_id, query={'mcpids': video_id})['data'][0]
+ formats = []
+
+ rendition_url = data.get('renditionUrl')
+ if rendition_url:
+ formats = self._extract_m3u8_formats(
+ rendition_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ fallback_rendition_url = data.get('fallbackRenditionUrl')
+ if fallback_rendition_url:
+ formats.append({
+ 'format_id': 'fallback',
+ 'tbr': int_or_none(self._search_regex(
+ r'_(\d+)\.mp4', fallback_rendition_url,
+ 'bitrate', default=None)),
+ 'url': fallback_rendition_url,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
+ 'uploader': fmg.get('network'),
+ 'duration': int_or_none(iptc.get('fileDuration')),
+ 'formats': formats,
+ 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
+ 'timestamp': parse_iso8601(iptc.get('dateReleased')),
+ }
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
index 7f8c6f0d3..cf5c39e66 100644
--- a/youtube_dl/extractor/onionstudios.py
+++ b/youtube_dl/extractor/onionstudios.py
@@ -4,13 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- compat_str,
- int_or_none,
- js_to_json,
- parse_iso8601,
- try_get,
-)
+from ..compat import compat_str
+from ..utils import js_to_json
class OnionStudiosIE(InfoExtractor):
@@ -20,7 +15,7 @@ class OnionStudiosIE(InfoExtractor):
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': '5a118d466d62b5cd03647cf2c593977f',
'info_dict': {
- 'id': '2937',
+ 'id': '3459881',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
@@ -53,43 +48,6 @@ class OnionStudiosIE(InfoExtractor):
mcp_id = compat_str(self._parse_json(self._search_regex(
r'window\.mcpMapping\s*=\s*({.+?});', webpage,
'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
- video_data = self._download_json(
- 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id,
- mcp_id)['videoMetadata']
- iptc = video_data['photoVideoMetadataIPTC']
- title = iptc['title']['en']
- fmg = video_data.get('photoVideoMetadata_fmg') or {}
- tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
- data = self._download_json(
- tvss_domain + '/api/v3/video-auth/url-signature-tokens',
- mcp_id, query={'mcpids': mcp_id})['data'][0]
- formats = []
-
- rendition_url = data.get('renditionUrl')
- if rendition_url:
- formats = self._extract_m3u8_formats(
- rendition_url, mcp_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False)
-
- fallback_rendition_url = data.get('fallbackRenditionUrl')
- if fallback_rendition_url:
- formats.append({
- 'format_id': 'fallback',
- 'tbr': int_or_none(self._search_regex(
- r'_(\d+)\.mp4', fallback_rendition_url,
- 'bitrate', default=None)),
- 'url': fallback_rendition_url,
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
- 'uploader': fmg.get('network'),
- 'duration': int_or_none(iptc.get('fileDuration')),
- 'formats': formats,
- 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
- 'timestamp': parse_iso8601(iptc.get('dateReleased')),
- }
+ return self.url_result(
+ 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
+ 'KinjaEmbed', mcp_id)