aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/__init__.py17
-rw-r--r--youtube_dl/extractor/aljazeera.py4
-rw-r--r--youtube_dl/extractor/bbc.py6
-rw-r--r--youtube_dl/extractor/brightcove.py176
-rw-r--r--youtube_dl/extractor/cbs.py12
-rw-r--r--youtube_dl/extractor/cbsnews.py5
-rw-r--r--youtube_dl/extractor/dumpert.py9
-rw-r--r--youtube_dl/extractor/funnyordie.py15
-rw-r--r--youtube_dl/extractor/generic.py33
-rw-r--r--youtube_dl/extractor/instagram.py9
-rw-r--r--youtube_dl/extractor/lynda.py6
-rw-r--r--youtube_dl/extractor/novamov.py43
-rw-r--r--youtube_dl/extractor/nowness.py6
-rw-r--r--youtube_dl/extractor/nowtv.py164
-rw-r--r--youtube_dl/extractor/nowvideo.py4
-rw-r--r--youtube_dl/extractor/periscope.py30
-rw-r--r--youtube_dl/extractor/ruutu.py15
-rw-r--r--youtube_dl/extractor/safari.py6
-rw-r--r--youtube_dl/extractor/space.py8
-rw-r--r--youtube_dl/extractor/tlc.py6
-rw-r--r--youtube_dl/extractor/twitter.py104
-rw-r--r--youtube_dl/extractor/vimeo.py63
-rw-r--r--youtube_dl/extractor/wsj.py1
-rw-r--r--youtube_dl/extractor/xfileshare.py (renamed from youtube_dl/extractor/gorillavid.py)30
-rw-r--r--youtube_dl/version.py2
25 files changed, 565 insertions, 209 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 0a90da73c..59c82f65d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -60,7 +60,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
-from .brightcove import BrightcoveIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
@@ -221,7 +224,6 @@ from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
-from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE
from .groupon import GrouponIE
from .hark import HarkIE
@@ -418,7 +420,10 @@ from .nowness import (
NownessPlaylistIE,
NownessSeriesIE,
)
-from .nowtv import NowTVIE
+from .nowtv import (
+ NowTVIE,
+ NowTVListIE,
+)
from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
@@ -456,10 +461,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
-from .periscope import (
- PeriscopeIE,
- QuickscopeIE,
-)
+from .periscope import PeriscopeIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
@@ -786,6 +788,7 @@ from .wrzuta import WrzutaIE
from .wsj import WSJIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py
index 184a14a4f..5b2c0dc9a 100644
--- a/youtube_dl/extractor/aljazeera.py
+++ b/youtube_dl/extractor/aljazeera.py
@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English',
},
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'skip': 'Not accessible from Travis CI server',
}
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
- 'ie_key': 'Brightcove',
+ 'ie_key': 'BrightcoveLegacy',
}
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index a55a6dbc9..33b296eaf 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -27,7 +27,7 @@ class BBCCoUkIE(InfoExtractor):
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
- # http://www.bbc.co.uk/programmes/b06bp7lf)
+ # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
]
@@ -334,7 +334,7 @@ class BBCCoUkIE(InfoExtractor):
return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id)
except BBCCoUkIE.MediaSelectionError as e:
- if e.id in ('notukerror', 'geolocation'):
+ if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
@@ -345,7 +345,7 @@ class BBCCoUkIE(InfoExtractor):
media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
else:
raise
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 1686cdde1..14ee05f21 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -20,12 +20,17 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
+ float_or_none,
+ js_to_json,
+ int_or_none,
+ parse_iso8601,
unescapeHTML,
unsmuggle_url,
)
-class BrightcoveIE(InfoExtractor):
+class BrightcoveLegacyIE(InfoExtractor):
+ IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info
+
+
+class BrightcoveNewIE(InfoExtractor):
+ IE_NAME = 'brightcove:new'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
+ _TESTS = [{
+ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+ 'md5': 'c8100925723840d4b0d243f7025703be',
+ 'info_dict': {
+ 'id': '4463358922001',
+ 'ext': 'mp4',
+ 'title': 'Meet the man behind Popcorn Time',
+ 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+ 'duration': 165.768,
+ 'timestamp': 1441391203,
+ 'upload_date': '20150904',
+ 'uploader_id': '929656772001',
+ 'formats': 'mincount:22',
+ },
+ }, {
+ # with rtmp streams
+ 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
+ 'info_dict': {
+ 'id': '4279049078001',
+ 'ext': 'mp4',
+ 'title': 'Titansgrave: Chapter 0',
+ 'description': 'Titansgrave: Chapter 0',
+ 'duration': 1242.058,
+ 'timestamp': 1433556729,
+ 'upload_date': '20150606',
+ 'uploader_id': '4036320279001',
+ 'formats': 'mincount:41',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
+ # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
+ # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
+
+ entries = []
+
+ # Look for iframe embeds [1]
+ for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+ entries.append(url)
+
+ # Look for embed_in_page embeds [2]
+ for video_id, account_id, player_id, embed in re.findall(
+ # According to examples from [3] it's unclear whether video id
+ # may be optional and what to do when it is
+ r'''(?sx)
+ <video[^>]+
+ data-video-id=["\'](\d+)["\'][^>]*>.*?
+ </video>.*?
+ <script[^>]+
+ src=["\'](?:https?:)?//players\.brightcove\.net/
+ (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
+ ''', webpage):
+ entries.append(
+ 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+ % (account_id, player_id, embed, video_id))
+
+ return entries
+
+ def _real_extract(self, url):
+ account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(
+ 'http://players.brightcove.net/%s/%s_%s/index.min.js'
+ % (account_id, player_id, embed), video_id)
+
+ policy_key = None
+
+ catalog = self._search_regex(
+ r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+ if catalog:
+ catalog = self._parse_json(
+ js_to_json(catalog), video_id, fatal=False)
+ if catalog:
+ policy_key = catalog.get('policyKey')
+
+ if not policy_key:
+ policy_key = self._search_regex(
+ r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+ webpage, 'policy key', group='pk')
+
+ req = compat_urllib_request.Request(
+ 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
+ % (account_id, video_id),
+ headers={'Accept': 'application/json;pk=%s' % policy_key})
+ json_data = self._download_json(req, video_id)
+
+ title = json_data['name']
+
+ formats = []
+ for source in json_data.get('sources', []):
+ source_type = source.get('type')
+ src = source.get('src')
+ if source_type == 'application/x-mpegURL':
+ if not src:
+ continue
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ streaming_src = source.get('streaming_src')
+ stream_name, app_name = source.get('stream_name'), source.get('app_name')
+ if not src and not streaming_src and (not stream_name or not app_name):
+ continue
+ tbr = float_or_none(source.get('avg_bitrate'), 1000)
+ height = int_or_none(source.get('height'))
+ f = {
+ 'tbr': tbr,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'filesize': int_or_none(source.get('size')),
+ 'container': source.get('container'),
+ 'vcodec': source.get('codec'),
+ 'ext': source.get('container').lower(),
+ }
+
+ def build_format_id(kind):
+ format_id = kind
+ if tbr:
+ format_id += '-%dk' % int(tbr)
+ if height:
+ format_id += '-%dp' % height
+ return format_id
+
+ if src or streaming_src:
+ f.update({
+ 'url': src or streaming_src,
+ 'format_id': build_format_id('http' if src else 'http-streaming'),
+ 'preference': 2 if src else 1,
+ })
+ else:
+ f.update({
+ 'url': app_name,
+ 'play_path': stream_name,
+ 'format_id': build_format_id('rtmp'),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ description = json_data.get('description')
+ thumbnail = json_data.get('thumbnail')
+ timestamp = parse_iso8601(json_data.get('published_at'))
+ duration = float_or_none(json_data.get('duration'), 1000)
+ tags = json_data.get('tags', [])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader_id': account_id,
+ 'formats': formats,
+ 'tags': tags,
+ }
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index 75fffb156..43f05d278 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -1,6 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import smuggle_url
class CBSIE(InfoExtractor):
@@ -46,13 +48,19 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ request = compat_urllib_request.Request(url)
+ # Android UA is served with higher quality (720p) streams (see
+ # https://github.com/rg3/youtube-dl/issues/7490)
+ request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
+ webpage = self._download_webpage(request, display_id)
real_id = self._search_regex(
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
webpage, 'real video ID')
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
- 'url': 'theplatform:%s' % real_id,
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
+ {'force_smil_url': True}),
'display_id': display_id,
}
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index 52e61d85b..f9a64a0a2 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
'format_id': format_id,
}
if uri.startswith('rtmp'):
+ play_path = re.sub(
+ r'{slistFilePath}', '',
+ uri.split('<break>')[-1].split('{break}')[-1])
fmt.update({
'app': 'ondemand?auth=cbs',
- 'play_path': 'mp4:' + uri.split('<break>')[-1],
+ 'play_path': 'mp4:' + play_path,
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
index 1f00386fe..f5a31058d 100644
--- a/youtube_dl/extractor/dumpert.py
+++ b/youtube_dl/extractor/dumpert.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import base64
+import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
@@ -9,7 +10,7 @@ from ..utils import qualities
class DumpertIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -26,9 +27,11 @@ class DumpertIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ protocol = mobj.group('protocol')
- url = 'https://www.dumpert.nl/mediabase/' + video_id
+ url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index f5f13689c..7f21d7410 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
- bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
- bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
- bitrates.sort()
+ m3u8_url = self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
+ webpage, 'm3u8 url', default=None, group='url')
formats = []
+
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+
+ bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
+ bitrates.sort()
+
for bitrate in bitrates:
for link in links:
formats.append({
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index d0b486d2a..51516a38a 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -30,7 +30,10 @@ from ..utils import (
url_basename,
xpath_text,
)
-from .brightcove import BrightcoveIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
@@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
'id': '2765128793001',
@@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):
'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):
},
{
# https://github.com/rg3/youtube-dl/issues/3541
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
'id': '3866516442001',
@@ -1031,6 +1034,17 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'cinemasnob',
},
+ },
+ # BrightcoveInPageEmbed embed
+ {
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
}
]
@@ -1290,14 +1304,14 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
- # Look for BrightCove:
- bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ # Look for Brightcove Legacy Studio embeds
+ bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
- 'ie_key': 'Brightcove'
+ 'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls]
return {
@@ -1307,6 +1321,11 @@ class GenericIE(InfoExtractor):
'entries': entries,
}
+ # Look for Brightcove New Studio embeds
+ bc_urls = BrightcoveNewIE._extract_urls(webpage)
+ if bc_urls:
+ return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 3d78f78c4..fce179000 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,8 +10,8 @@ from ..utils import (
class InstagramIE(InfoExtractor):
- _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
- _TEST = {
+ _VALID_URL = r'https://instagram\.com/p/(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
- }
+ }, {
+ 'url': 'https://instagram.com/p/-Cmh1cukG2/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 9a207b2cd..3d7e7e003 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
@@ -83,6 +83,10 @@ class LyndaBaseIE(InfoExtractor):
raise ExtractorError('Unable to log in')
def _logout(self):
+ username, _ = self._get_login_info()
+ if username is None:
+ return
+
self._download_webpage(
'http://www.lynda.com/ajax/logout.aspx', None,
'Logging out', 'Unable to log out', fatal=False)
diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py
index 04d779890..6b15fc2e5 100644
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -4,10 +4,14 @@ import re
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
+ NO_DEFAULT,
+ encode_dict,
+ urlencode_postdata,
)
@@ -38,19 +42,40 @@ class NovaMovIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- page = self._download_webpage(
- 'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
+ url = 'http://%s/video/%s' % (self._HOST, video_id)
- if re.search(self._FILE_DELETED_REGEX, page) is not None:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page')
- filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
+ if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
- description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
+ def extract_filekey(default=NO_DEFAULT):
+ return self._search_regex(
+ self._FILEKEY_REGEX, webpage, 'filekey', default=default)
+
+ filekey = extract_filekey(default=None)
+
+ if not filekey:
+ fields = self._hidden_inputs(webpage)
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
+ 'post url', default=url, group='url')
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(url, post_url)
+ request = compat_urllib_request.Request(
+ post_url, urlencode_postdata(encode_dict(fields)))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ request.add_header('Referer', post_url)
+ webpage = self._download_webpage(
+ request, video_id, 'Downloading continue to the video page')
+
+ filekey = extract_filekey()
+
+ title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
+ description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
index b97f62fdb..0fba55833 100644
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,7 +1,7 @@
# encoding: utf-8
from __future__ import unicode_literals
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import (
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
- bc_url = BrightcoveIE._extract_brightcove_url(player_code)
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
- return self.url_result(bc_url, 'Brightcove')
+ return self.url_result(bc_url, 'BrightcoveLegacy')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index b0bdffc4e..67e34b294 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -13,8 +15,63 @@ from ..utils import (
)
-class NowTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
+class NowTVBaseIE(InfoExtractor):
+ _VIDEO_FIELDS = (
+ 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
+ 'broadcastStartDate', 'seoUrl', 'duration', 'files',
+ 'format.defaultImage169Format', 'format.defaultImage169Logo')
+
+ def _extract_video(self, info, display_id=None):
+ video_id = compat_str(info['id'])
+
+ files = info['files']
+ if not files:
+ if info.get('geoblocked', False):
+ raise ExtractorError(
+ 'Video %s is not available from your location due to geo restriction' % video_id,
+ expected=True)
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+
+ formats = []
+ for item in files['items']:
+ if determine_ext(item['path']) != 'f4v':
+ continue
+ app, play_path = remove_start(item['path'], '/').split('/', 1)
+ formats.append({
+ 'url': 'rtmpe://fms.rtl.de',
+ 'app': app,
+ 'play_path': 'mp4:%s' % play_path,
+ 'ext': 'flv',
+ 'page_url': 'http://rtlnow.rtl.de',
+ 'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
+ 'tbr': int_or_none(item.get('bitrate')),
+ })
+ self._sort_formats(formats)
+
+ title = info['title']
+ description = info.get('articleLong') or info.get('articleShort')
+ timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+ duration = parse_duration(info.get('duration'))
+
+ f = info.get('format', {})
+ thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id or info.get('seoUrl'),
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class NowTVIE(NowTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
# rtl
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
'id': '203519',
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
'ext': 'flv',
- 'title': 'Die neuen Bauern und eine Hochzeit',
+ 'title': 'Inka Bause stellt die neuen Bauern vor',
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1432580700,
@@ -136,58 +193,65 @@ class NowTVIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
- display_id_split = display_id.split('/')
- if len(display_id) > 2:
- display_id = '/'.join((display_id_split[0], display_id_split[-1]))
+ mobj = re.match(self._VALID_URL, url)
+ display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
info = self._download_json(
- 'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
- display_id)
+ 'https://api.nowtv.de/v3/movies/%s?fields=%s'
+ % (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
- video_id = compat_str(info['id'])
+ return self._extract_video(info, display_id)
- files = info['files']
- if not files:
- if info.get('geoblocked', False):
- raise ExtractorError(
- 'Video %s is not available from your location due to geo restriction' % video_id,
- expected=True)
- if not info.get('free', True):
- raise ExtractorError(
- 'Video %s is not available for free' % video_id, expected=True)
- formats = []
- for item in files['items']:
- if determine_ext(item['path']) != 'f4v':
- continue
- app, play_path = remove_start(item['path'], '/').split('/', 1)
- formats.append({
- 'url': 'rtmpe://fms.rtl.de',
- 'app': app,
- 'play_path': 'mp4:%s' % play_path,
- 'ext': 'flv',
- 'page_url': 'http://rtlnow.rtl.de',
- 'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
- 'tbr': int_or_none(item.get('bitrate')),
- })
- self._sort_formats(formats)
+class NowTVListIE(NowTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
- title = info['title']
- description = info.get('articleLong') or info.get('articleShort')
- timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
- duration = parse_duration(info.get('duration'))
+ _SHOW_FIELDS = ('title', )
+ _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
- f = info.get('format', {})
- thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+ _TESTS = [{
+ 'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
+ 'info_dict': {
+ 'id': '17006',
+ 'title': 'stern TV - Aktuell',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
+ 'info_dict': {
+ 'id': '20716',
+ 'title': 'Das Supertalent - FREE Staffel 8',
+ },
+ 'playlist_count': 14,
+ }]
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- }
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_id = mobj.group('show_id')
+ season_id = mobj.group('id')
+
+ fields = []
+ fields.extend(self._SHOW_FIELDS)
+ fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
+ fields.extend(
+ 'formatTabs.formatTabPages.container.movies.%s' % field
+ for field in self._VIDEO_FIELDS)
+
+ list_info = self._download_json(
+ 'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
+ % (','.join(fields), show_id),
+ season_id)
+
+ season = next(
+ season for season in list_info['formatTabs']['items']
+ if season.get('seoheadline') == season_id)
+
+ title = '%s - %s' % (list_info['title'], season['headline'])
+
+ entries = []
+ for container in season['formatTabPages']['items']:
+ for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
+ entries.append(self._extract_video(info))
+
+ return self.playlist_result(
+ entries, compat_str(season.get('id') or season_id), title)
diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py
index 17baa9679..57ee3d366 100644
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
- _HOST = 'www.nowvideo.ch'
+ _HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 887c8020d..63cc764bb 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -2,16 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope'
- _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -29,6 +25,9 @@ class PeriscopeIE(InfoExtractor):
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
+ 'only_matching': True,
}]
def _call_api(self, method, value):
@@ -81,24 +80,3 @@ class PeriscopeIE(InfoExtractor):
'thumbnails': thumbnails,
'formats': formats,
}
-
-
-class QuickscopeIE(InfoExtractor):
- IE_DESC = 'Quick Scope'
- _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://watchonperiscope.com/broadcast/56180087',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- broadcast_id = self._match_id(url)
- request = compat_urllib_request.Request(
- 'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
- 'broadcast_id': broadcast_id,
- 'entry_ticket': '',
- 'from_push': 'false',
- 'uses_sessions': 'true',
- }).encode('utf-8'))
- return self.url_result(
- self._download_json(request, broadcast_id)['share_url'], 'Periscope')
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index a16b73ff4..e417bf661 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -57,16 +57,21 @@ class RuutuIE(InfoExtractor):
extract_formats(child)
elif child.tag.endswith('File'):
video_url = child.text
- if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
+ if (not video_url or video_url in processed_urls or
+ any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
return
processed_urls.append(video_url)
ext = determine_ext(video_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id='hls'))
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id='hds'))
+ f4m_formats = self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
else:
proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp':
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index a602af692..e9e33d0a3 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
from ..compat import (
compat_urllib_parse,
@@ -112,11 +112,11 @@ class SafariIE(SafariBaseIE):
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
part)
- bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if not bc_url:
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
- return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
class SafariCourseIE(SafariBaseIE):
diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py
index c2d0d36a6..ebb5d6ec0 100644
--- a/youtube_dl/extractor/space.py
+++ b/youtube_dl/extractor/space.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': {
'id': '2780937028001',
@@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):
brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError:
# Other videos works fine with the info from the object
- brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+ brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_url is None:
raise ExtractorError(
'The webpage does not contain a video', expected=True)
- return self.url_result(brightcove_url, BrightcoveIE.ie_key())
+ return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
index 13263614c..d6d038a8d 100644
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
from .discovery import DiscoveryIE
from ..compat import compat_urlparse
@@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):
return {
'_type': 'url',
- 'url': BrightcoveIE._extract_brightcove_url(iframe),
- 'ie': BrightcoveIE.ie_key(),
+ 'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
+ 'ie': BrightcoveLegacyIE.ie_key(),
}
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 9d3e46b94..055047340 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -9,6 +9,8 @@ from ..utils import (
float_or_none,
xpath_text,
remove_end,
+ int_or_none,
+ ExtractorError,
)
@@ -18,7 +20,7 @@ class TwitterCardIE(InfoExtractor):
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
- 'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
+ 'md5': '4fa26a35f9d1bf4b646590ba8e84be19',
'info_dict': {
'id': '560070183650213889',
'ext': 'mp4',
@@ -50,6 +52,20 @@ class TwitterCardIE(InfoExtractor):
'uploader': 'OMG! Ubuntu!',
'uploader_id': 'omgubuntu',
},
+ 'add_ie': ['Youtube'],
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+ 'md5': 'ab2745d0b0ce53319a534fccaa986439',
+ 'info_dict': {
+ 'id': 'iBb2x00UVlv',
+ 'ext': 'mp4',
+ 'upload_date': '20151113',
+ 'uploader_id': '1189339351084113920',
+ 'uploader': '@ArsenalTerje',
+ 'title': 'Vine by @ArsenalTerje',
+ },
+ 'add_ie': ['Vine'],
}
]
@@ -69,11 +85,11 @@ class TwitterCardIE(InfoExtractor):
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(request, video_id)
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
- webpage, 'youtube iframe', default=None)
- if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
+ iframe_url = self._html_search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+ webpage, 'video iframe', default=None)
+ if iframe_url:
+ return self.url_result(iframe_url)
config = self._parse_json(self._html_search_regex(
r'data-player-config="([^"]+)"', webpage, 'data player config'),
@@ -120,9 +136,9 @@ class TwitterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
- _TEST = {
+ _TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
- 'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
+ 'md5': 'db6612ec5d03355953c3ca9250c97e5e',
'info_dict': {
'id': '643211948184596480',
'ext': 'mp4',
@@ -133,7 +149,30 @@ class TwitterIE(InfoExtractor):
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
},
- }
+ }, {
+ 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+ 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+ 'info_dict': {
+ 'id': '657991469417025536',
+ 'ext': 'mp4',
+ 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+ 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+ 'thumbnail': 're:^https?://.*\.png',
+ 'uploader': 'Gifs',
+ 'uploader_id': 'giphz',
+ },
+ }, {
+ 'url': 'https://twitter.com/starwars/status/665052190608723968',
+ 'md5': '39b7199856dee6cd4432e72c74bc69d4',
+ 'info_dict': {
+ 'id': '665052190608723968',
+ 'ext': 'mp4',
+ 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+ 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
+ 'uploader_id': 'starwars',
+ 'uploader': 'Star Wars',
+ },
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -144,23 +183,46 @@ class TwitterIE(InfoExtractor):
username = remove_end(self._og_search_title(webpage), ' on Twitter')
- title = self._og_search_description(webpage).strip('').replace('\n', ' ')
+ title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
- mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
- title, short_url = mobj.groups()
-
- card_id = self._search_regex(
- r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
- card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+ title = re.sub(r'\s+(https?://[^ ]+)', '', title)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'TwitterCard',
+ info = {
'uploader_id': user_id,
'uploader': username,
- 'url': card_url,
'webpage_url': url,
- 'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
+ 'description': '%s on Twitter: "%s"' % (username, description),
'title': username + ' - ' + title,
}
+
+ card_id = self._search_regex(
+ r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
+ if card_id:
+ card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'TwitterCard',
+ 'url': card_url,
+ })
+ return info
+
+ mobj = re.search(r'''(?x)
+ <video[^>]+class="animated-gif"[^>]+
+ (?:data-height="(?P<height>\d+)")?[^>]+
+ (?:data-width="(?P<width>\d+)")?[^>]+
+ (?:poster="(?P<poster>[^"]+)")?[^>]*>\s*
+ <source[^>]+video-src="(?P<url>[^"]+)"
+ ''', webpage)
+
+ if mobj:
+ info.update({
+ 'id': twid,
+ 'url': mobj.group('url'),
+ 'height': int_or_none(mobj.group('height')),
+ 'width': int_or_none(mobj.group('width')),
+ 'thumbnail': mobj.group('poster'),
+ })
+ return info
+
+ raise ExtractorError('There\'s not video in this tweet.')
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index ca716c8f5..b72341a2b 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -49,8 +49,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
}))
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- login_request.add_header('Cookie', 'vuid=%s' % vuid)
login_request.add_header('Referer', self._LOGIN_URL)
+ self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _extract_xsrft_and_vuid(self, webpage):
@@ -62,6 +62,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
webpage, 'vuid', group='vuid')
return xsrft, vuid
+ def _set_vimeo_cookie(self, name, value):
+ self._set_cookie('vimeo.com', name, value)
+
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
@@ -217,8 +220,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = url.replace('http://', 'https://')
password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
password_request.add_header('Referer', url)
+ self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
@@ -384,47 +387,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
like_count = None
comment_count = None
- # Vimeo specific: extract request signature and timestamp
- sig = config['request']['signature']
- timestamp = config['request']['timestamp']
-
- # Vimeo specific: extract video codec and quality information
- # First consider quality, then codecs, then take everything
- codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
- files = {'hd': [], 'sd': [], 'other': []}
- config_files = config["video"].get("files") or config["request"].get("files")
- for codec_name, codec_extension in codecs:
- for quality in config_files.get(codec_name, []):
- format_id = '-'.join((codec_name, quality)).lower()
- key = quality if quality in files else 'other'
- video_url = None
- if isinstance(config_files[codec_name], dict):
- file_info = config_files[codec_name][quality]
- video_url = file_info.get('url')
- else:
- file_info = {}
- if video_url is None:
- video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- % (video_id, sig, timestamp, quality, codec_name.upper())
-
- files[key].append({
- 'ext': codec_extension,
- 'url': video_url,
- 'format_id': format_id,
- 'width': int_or_none(file_info.get('width')),
- 'height': int_or_none(file_info.get('height')),
- 'tbr': int_or_none(file_info.get('bitrate')),
- })
formats = []
- m3u8_url = config_files.get('hls', {}).get('all')
+ config_files = config['video'].get('files') or config['request'].get('files', {})
+ for f in config_files.get('progressive', []):
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%s' % f.get('quality'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'fps': int_or_none(f.get('fps')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ })
+ m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
- for key in ('other', 'sd', 'hd'):
- formats += files[key]
- self._sort_formats(formats)
+ # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified. This lead to wrong sorting.
+ self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
subtitles = {}
text_tracks = config['request'].get('text_tracks')
@@ -494,8 +479,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request.Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
- password_request.add_header('Cookie', 'vuid=%s' % vuid)
- self._set_cookie('vimeo.com', 'xsrft', token)
+ self._set_vimeo_cookie('vuid', vuid)
+ self._set_vimeo_cookie('xsrft', token)
return self._download_webpage(
password_request, list_id,
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py
index 2ddf29a69..5a897371d 100644
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@@ -84,6 +84,5 @@ class WSJIE(InfoExtractor):
'duration': duration,
'upload_date': upload_date,
'title': title,
- 'formats': formats,
'categories': categories,
}
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/xfileshare.py
index d23e3eac1..952515c98 100644
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/xfileshare.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -15,11 +15,11 @@ from ..utils import (
)
-class GorillaVidIE(InfoExtractor):
- IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
+class XFileShareIE(InfoExtractor):
+ IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
- (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
+ (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
@@ -76,6 +76,13 @@ class GorillaVidIE(InfoExtractor):
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
'thumbnail': 're:http://.*\.jpg',
}
+ }, {
+ 'url': 'http://vidto.me/ku5glz52nqe1.html',
+ 'info_dict': {
+ 'id': 'ku5glz52nqe1',
+ 'ext': 'mp4',
+ 'title': 'test'
+ }
}]
def _real_extract(self, url):
@@ -104,13 +111,18 @@ class GorillaVidIE(InfoExtractor):
webpage = self._download_webpage(req, video_id, 'Downloading video page')
- title = self._search_regex(
- [r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
- webpage, 'title', default=None) or self._og_search_title(webpage)
+ title = (self._search_regex(
+ [r'style="z-index: [0-9]+;">([^<]+)</span>',
+ r'<td nowrap>([^<]+)</td>',
+ r'>Watch (.+) ',
+ r'<h2 class="video-page-head">([^<]+)</h2>'],
+ webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
video_url = self._search_regex(
- r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
+ [r'file\s*:\s*["\'](http[^"\']+)["\'],',
+ r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
+ webpage, 'file url')
thumbnail = self._search_regex(
- r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
+ r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index b3d254005..6585d60d5 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.11.10'
+__version__ = '2015.11.13'