aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/extractor/__init__.py7
-rw-r--r--youtube_dl/extractor/animeondemand.py160
-rw-r--r--youtube_dl/extractor/cbc.py113
-rw-r--r--youtube_dl/extractor/comcarcoff.py16
-rw-r--r--youtube_dl/extractor/comedycentral.py9
-rw-r--r--youtube_dl/extractor/common.py18
-rw-r--r--youtube_dl/extractor/crackle.py95
-rw-r--r--youtube_dl/extractor/fox.py5
-rw-r--r--youtube_dl/extractor/generic.py19
-rw-r--r--youtube_dl/extractor/hotstar.py12
-rw-r--r--youtube_dl/extractor/nbc.py2
-rw-r--r--youtube_dl/extractor/pbs.py61
-rw-r--r--youtube_dl/extractor/plays.py51
-rw-r--r--youtube_dl/extractor/theplatform.py5
-rw-r--r--youtube_dl/extractor/viddler.py50
-rw-r--r--youtube_dl/extractor/vimeo.py2
-rw-r--r--youtube_dl/extractor/youku.py3
-rw-r--r--youtube_dl/extractor/youtube.py20
-rw-r--r--youtube_dl/utils.py2
-rw-r--r--youtube_dl/version.py2
21 files changed, 587 insertions, 71 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 50228bb32..0072c7d35 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1288,6 +1288,9 @@ class YoutubeDL(object):
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
+ else:
+ # Sanitize format_id from characters used in format selector expression
+ format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
format_id = format['format_id']
if format_id not in formats_dict:
formats_dict[format_id] = []
@@ -1338,7 +1341,6 @@ class YoutubeDL(object):
if req_format is None:
req_format_list = []
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- info_dict['extractor'] in ['youtube', 'ted'] and
not info_dict.get('is_live')):
merger = FFmpegMergerPP(self)
if merger.available and merger.can_merge():
@@ -1795,7 +1797,7 @@ class YoutubeDL(object):
else:
res = '%sp' % format['height']
elif format.get('width') is not None:
- res = '?x%d' % format['width']
+ res = '%dx?' % format['width']
else:
res = default
return res
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 18951c287..6937f28d3 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -20,6 +20,7 @@ from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
+from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
from .aol import AolIE
@@ -89,6 +90,10 @@ from .camdemy import (
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .canvas import CanvasIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+)
from .cbs import CBSIE
from .cbsnews import (
CBSNewsIE,
@@ -126,6 +131,7 @@ from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
+from .crackle import CrackleIE
from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import (
@@ -533,6 +539,7 @@ from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
+from .plays import PlaysTVIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py
new file mode 100644
index 000000000..a7d8daf7b
--- /dev/null
+++ b/youtube_dl/extractor/animeondemand.py
@@ -0,0 +1,160 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ determine_ext,
+ encode_dict,
+ ExtractorError,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class AnimeOnDemandIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
+ _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
+ _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
+ _NETRC_MACHINE = 'animeondemand'
+ _TEST = {
+ 'url': 'https://www.anime-on-demand.de/anime/161',
+ 'info_dict': {
+ 'id': '161',
+ 'title': 'Grimgar, Ashes and Illusions (OmU)',
+ 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._form_hidden_inputs('new_user', login_page)
+
+ login_form.update({
+ 'user[login]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ request = sanitized_Request(
+ post_url, urlencode_postdata(encode_dict(login_form)))
+ request.add_header('Referer', self._LOGIN_URL)
+
+ response = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
+ error = self._search_regex(
+ r'<p class="alert alert-danger">(.+?)</p>',
+ response, 'error', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ anime_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, anime_id)
+
+ if 'data-playlist=' not in webpage:
+ self._download_webpage(
+ self._APPLY_HTML5_URL, anime_id,
+ 'Activating HTML5 beta', 'Unable to apply HTML5 beta')
+ webpage = self._download_webpage(url, anime_id)
+
+ csrf_token = self._html_search_meta(
+ 'csrf-token', webpage, 'csrf token', fatal=True)
+
+ anime_title = self._html_search_regex(
+ r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
+ webpage, 'anime name')
+ anime_description = self._html_search_regex(
+ r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
+ webpage, 'anime description', default=None)
+
+ entries = []
+
+ for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
+ m = re.search(
+ r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
+ if not m:
+ continue
+
+ episode_number = int(m.group('number'))
+ episode_title = m.group('title')
+ video_id = 'episode-%d' % episode_number
+
+ common_info = {
+ 'id': video_id,
+ 'series': anime_title,
+ 'episode': episode_title,
+ 'episode_number': episode_number,
+ }
+
+ formats = []
+
+ playlist_url = self._search_regex(
+ r'data-playlist=(["\'])(?P<url>.+?)\1',
+ episode_html, 'data playlist', default=None, group='url')
+ if playlist_url:
+ request = sanitized_Request(
+ compat_urlparse.urljoin(url, playlist_url),
+ headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-CSRF-Token': csrf_token,
+ 'Referer': url,
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
+ })
+
+ playlist = self._download_json(
+ request, video_id, 'Downloading playlist JSON', fatal=False)
+ if playlist:
+ playlist = playlist['playlist'][0]
+ title = playlist['title']
+ description = playlist.get('description')
+ for source in playlist.get('sources', []):
+ file_ = source.get('file')
+ if file_ and determine_ext(file_) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ file_, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ if formats:
+ f = common_info.copy()
+ f.update({
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ })
+ entries.append(f)
+
+ m = re.search(
+ r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
+ episode_html)
+ if m:
+ f = common_info.copy()
+ f.update({
+ 'id': '%s-teaser' % f['id'],
+ 'title': m.group('title'),
+ 'url': compat_urlparse.urljoin(url, m.group('href')),
+ })
+ entries.append(f)
+
+ return self.playlist_result(entries, anime_id, anime_title, anime_description)
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
new file mode 100644
index 000000000..d8aa31038
--- /dev/null
+++ b/youtube_dl/extractor/cbc.py
@@ -0,0 +1,113 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class CBCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # with mediaId
+ 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+ 'info_dict': {
+ 'id': '2682904050',
+ 'ext': 'flv',
+ 'title': 'Don Cherry – All-Stars',
+ 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
+ 'timestamp': 1454475540,
+ 'upload_date': '20160203',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ # with clipId
+ 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+ 'info_dict': {
+ 'id': '2487345465',
+ 'ext': 'flv',
+ 'title': 'Robin Williams freestyles on 90 Minutes Live',
+ 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
+ 'upload_date': '19700101',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ # multiple iframes
+ 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2680832926',
+ 'ext': 'flv',
+ 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
+ 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
+ 'upload_date': '19700101',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '2658915080',
+ 'ext': 'flv',
+ 'title': 'Fly like an eagle!',
+ 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
+ 'upload_date': '19700101',
+ },
+ }],
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_init = self._search_regex(
+ r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
+ default=None)
+ if player_init:
+ player_info = self._parse_json(player_init, display_id, js_to_json)
+ media_id = player_info.get('mediaId')
+ if not media_id:
+ clip_id = player_info['clipId']
+ media_id = self._download_json(
+ 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+ clip_id)['entries'][0]['id'].split('/')[-1]
+ return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+ else:
+ entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
+ return self.playlist_result(entries)
+
+
+class CBCPlayerIE(InfoExtractor):
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.cbc.ca/player/play/2683190193',
+ 'info_dict': {
+ 'id': '2683190193',
+ 'ext': 'flv',
+ 'title': 'Gerry Runs a Sweat Shop',
+ 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
+ 'timestamp': 1455067800,
+ 'upload_date': '20160210',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
+ 'ThePlatformFeed', video_id)
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
index 2efa200b5..7dff68492 100644
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
parse_duration,
@@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
_TESTS = [{
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
'info_dict': {
- 'id': 'miranda-sings-happy-thanksgiving-miranda',
+ 'id': '2494164',
'ext': 'mp4',
'upload_date': '20141127',
'timestamp': 1417107600,
'duration': 1232,
'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
- 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
},
'params': {
'skip_download': 'requires ffmpeg',
@@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
display_id)['videoData']
- video_id = full_data['activeVideo']['video']
- video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
+ display_id = full_data['activeVideo']['video']
+ video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
+ video_id = compat_str(video_data['mediaId'])
thumbnails = [{
'url': video_data['images']['thumb'],
}, {
'url': video_data['images']['poster'],
}]
- formats = self._extract_m3u8_formats(
- video_data['mediaUrl'], video_id, ext='mp4')
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
video_data.get('pubDate'))
@@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
video_data.get('duration'))
return {
+ '_type': 'url_transparent',
+ 'url': 'crackle:%s' % video_id,
'id': video_id,
'display_id': display_id,
'title': video_data['title'],
@@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
'timestamp': timestamp,
'duration': duration,
'thumbnails': thumbnails,
- 'formats': formats,
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
}
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 3e4bd10b6..055c9eec5 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -16,11 +16,11 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (video-clips|episodes|cc-studios|video-collections|full-episodes)
+ (video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': {
@@ -29,7 +29,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
},
- }
+ }, {
+ 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+ 'only_matching': True,
+ }]
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1143f6dbb..444d412d9 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1186,12 +1186,13 @@ class InfoExtractor(object):
http_count = 0
m3u8_count = 0
- src_urls = []
+ srcs = []
videos = smil.findall(self._xpath_ns('.//video', namespace))
for video in videos:
src = video.get('src')
- if not src:
+ if not src or src in srcs:
continue
+ srcs.append(src)
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
filesize = int_or_none(video.get('size') or video.get('fileSize'))
@@ -1223,9 +1224,7 @@ class InfoExtractor(object):
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
- if src_url in src_urls:
- continue
- src_urls.append(src_url)
+ src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
@@ -1436,12 +1435,16 @@ class InfoExtractor(object):
base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url):
break
- if not re.match(r'^https?://', base_url):
+ if mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
+ mpd_base_url += '/'
base_url = mpd_base_url + base_url
representation_id = representation_attrib.get('id')
lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
f = {
- 'format_id': mpd_id or representation_id,
+ 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url,
'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')),
@@ -1452,6 +1455,7 @@ class InfoExtractor(object):
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
+ 'filesize': filesize,
}
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py
new file mode 100644
index 000000000..79238cce7
--- /dev/null
+++ b/youtube_dl/extractor/crackle.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class CrackleIE(InfoExtractor):
+ _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.crackle.com/the-art-of-more/2496419',
+ 'info_dict': {
+ 'id': '2496419',
+ 'ext': 'mp4',
+ 'title': 'Heavy Lies the Head',
+ 'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
+ _SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
+ _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
+ _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
+
+ # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
+ _MEDIA_FILE_SLOTS = {
+ 'c544.flv': {
+ 'width': 544,
+ 'height': 306,
+ },
+ '360p.mp4': {
+ 'width': 640,
+ 'height': 360,
+ },
+ '480p.mp4': {
+ 'width': 852,
+ 'height': 478,
+ },
+ '480p_1mbps.mp4': {
+ 'width': 852,
+ 'height': 478,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ item = self._download_xml(
+ 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
+ video_id).find('i')
+ title = item.attrib['t']
+
+ thumbnail = None
+ subtitles = {}
+ formats = self._extract_m3u8_formats(
+ 'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
+ video_id, 'mp4', m3u8_id='hls', fatal=None)
+ path = item.attrib.get('p')
+ if path:
+ thumbnail = self._THUMBNAIL_TEMPLATE % path
+ http_base_url = 'http://ahttp.crackle.com/' + path
+ for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
+ formats.append({
+ 'url': http_base_url + mfs_path,
+ 'format_id': 'http-' + mfs_path.split('.')[0],
+ 'width': mfs_info['width'],
+ 'height': mfs_info['height'],
+ })
+ for cc in item.findall('cc'):
+ locale = cc.attrib.get('l')
+ v = cc.attrib.get('v')
+ if locale and v:
+ if locale not in subtitles:
+ subtitles[locale] = []
+ subtitles[locale] = [{
+ 'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
+ 'ext': 'ttml',
+ }]
+ self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.attrib.get('d'),
+ 'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
+ 'series': item.attrib.get('sn'),
+ 'season_number': int_or_none(item.attrib.get('se')),
+ 'episode_number': int_or_none(item.attrib.get('ep')),
+ 'thumbnail': thumbnail,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py
index 4a8acd53d..fa05af50d 100644
--- a/youtube_dl/extractor/fox.py
+++ b/youtube_dl/extractor/fox.py
@@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.fox.com/watch/255180355939/7684182528',
+ 'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
'info_dict': {
'id': '255180355939',
'ext': 'mp4',
@@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
'duration': 129,
},
'add_ie': ['ThePlatform'],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index bf61ab2e7..45adbb7a3 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
+ # MPD from http://dash-mse-test.appspot.com/media.html
+ {
+ 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+ 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+ 'info_dict': {
+ 'id': 'car-20120827-manifest',
+ 'ext': 'mp4',
+ 'title': 'car-20120827-manifest',
+ 'formats': 'mincount:9',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ },
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor):
return {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
- 'formats': self._parse_mpd_formats(doc, video_id),
+ 'formats': self._parse_mpd_formats(
+ doc, video_id, mpd_base_url=url.rpartition('/')[0]),
}
except compat_xml_parse_error:
pass
@@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor):
# Look for embedded Dailymotion player
matches = re.findall(
- r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
+ r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
if matches:
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py
index a7c3ce4ab..f05d765d6 100644
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@@ -10,8 +10,8 @@ from ..utils import (
class HotStarIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
+ _TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': {
'id': '1000076273',
@@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
- }
+ }, {
+ 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.hotstar.com/1000000515',
+ 'only_matching': True,
+ }]
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 18d01f423..2202cfa33 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):
{
# This video has expired but with an escaped embedURL
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
- 'skip': 'Expired'
+ 'only_matching': True,
}
]
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 97e8ffc97..cca012953 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
+ js_to_json,
strip_jsonp,
unified_strdate,
US_RATINGS,
@@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
'id': '2365006249',
'ext': 'mp4',
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
- 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+ 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
'duration': 3190,
},
'params': {
@@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
'id': '2365297690',
'ext': 'mp4',
'title': 'FRONTLINE - Losing Iraq',
- 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+ 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
'duration': 5050,
},
'params': {
@@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
'id': '2201174722',
'ext': 'mp4',
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
- 'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+ 'description': 'md5:95a19f568689d09a166dff9edada3301',
'duration': 801,
},
},
@@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
'info_dict': {
'id': '2365297708',
'ext': 'mp4',
- 'description': 'md5:68d87ef760660eb564455eb30ca464fe',
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+ 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$',
},
@@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
'display_id': 'player',
'ext': 'mp4',
'title': 'American Experience - Death and the Civil War, Chapter 1',
- 'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
+ 'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
'duration': 682,
'thumbnail': 're:^https?://.*\.jpg$',
},
@@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
},
},
{
- 'url': 'http://video.pbs.org/video/2365367186/',
+ 'url': 'http://www.pbs.org/video/2365245528/',
'info_dict': {
- 'id': '2365367186',
- 'display_id': '2365367186',
+ 'id': '2365245528',
+ 'display_id': '2365245528',
'ext': 'mp4',
- 'title': 'To Catch A Comet - Full Episode',
- 'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
- 'duration': 3342,
+ 'title': 'FRONTLINE - United States of Secrets (Part One)',
+ 'description': 'md5:55756bd5c551519cc4b7703e373e217e',
+ 'duration': 6851,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # requires ffmpeg
},
- 'skip': 'Expired',
},
{
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
@@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
'ext': 'mp4',
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
- 'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
+ 'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
'duration': 1480,
'thumbnail': 're:^https?://.*\.jpg$',
},
@@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
'display_id': 'the-atomic-artists',
'ext': 'mp4',
'title': 'FRONTLINE - The Atomic Artists',
- 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+ 'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
'duration': 723,
'thumbnail': 're:^https?://.*\.jpg$',
},
@@ -365,10 +366,14 @@ class PBSIE(InfoExtractor):
webpage, 'upload date', default=None))
# tabbed frontline videos
- tabbed_videos = re.findall(
- r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
- if tabbed_videos:
- return tabbed_videos, presumptive_id, upload_date
+ MULTI_PART_REGEXES = (
+ r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+ r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+ )
+ for p in MULTI_PART_REGEXES:
+ tabbed_videos = re.findall(p, webpage)
+ if tabbed_videos:
+ return tabbed_videos, presumptive_id, upload_date
MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
@@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):
for vid_id in video_id]
return self.playlist_result(entries, display_id)
- info = self._download_json(
- 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
- display_id)
+ try:
+ info = self._download_json(
+ 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ display_id, 'Downloading video info JSON')
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
+ raise
+ # videoInfo API may not work for some videos, fallback to portalplayer API
+ player = self._download_webpage(
+ 'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
+ info = self._parse_json(
+ self._search_regex(
+ r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+ player, 'video data', default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
formats = []
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
@@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'title': info['title'],
- 'description': info['program'].get('description'),
+ 'description': info.get('description') or info.get('program', {}).get('description'),
'thumbnail': info.get('image_url'),
'duration': int_or_none(info.get('duration')),
'age_limit': age_limit,
diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py
new file mode 100644
index 000000000..c3c38cf4a
--- /dev/null
+++ b/youtube_dl/extractor/plays.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PlaysTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
+ _TEST = {
+ 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
+ 'md5': 'dfeac1198506652b5257a62762cec7bc',
+ 'info_dict': {
+ 'id': '56af17f56c95335490',
+ 'ext': 'mp4',
+ 'title': 'When you outplay the Azir wall',
+ 'description': 'Posted by Bjergsen',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ content = self._parse_json(
+ self._search_regex(
+ r'R\.bindContent\(({.+?})\);', webpage,
+ 'content'), video_id)['content']
+ mpd_url, sources = re.search(
+ r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
+ content).groups()
+ formats = self._extract_mpd_formats(
+ self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
+ for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
+ formats.append({
+ 'url': self._proto_relative_url(format_url),
+ 'format_id': 'http-' + format_id,
+ 'height': int_or_none(height),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 10f2cad55..755f816ff 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -20,7 +20,6 @@ from ..utils import (
int_or_none,
sanitized_Request,
unsmuggle_url,
- url_basename,
xpath_with_ns,
)
@@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
first_video_id = None
duration = None
for item in entry['media$content']:
- smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
- cur_video_id = url_basename(smil_url)
+ smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+ cur_video_id = ThePlatformIE._match_id(smil_url)
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
index 40ffbad2a..6bfbd4d85 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -1,6 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urlparse,
+)
from ..utils import (
float_or_none,
int_or_none,
@@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'http://www.viddler.com/v/43903784',
- 'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
+ 'md5': '9eee21161d2c7f5b39690c3e325fab2f',
'info_dict': {
'id': '43903784',
- 'ext': 'mp4',
+ 'ext': 'mov',
'title': 'Video Made Easy',
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
'uploader': 'viddler',
@@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
}
}, {
'url': 'http://www.viddler.com/v/4d03aad9/',
- 'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
+ 'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
'info_dict': {
'id': '4d03aad9',
- 'ext': 'mp4',
+ 'ext': 'ts',
'title': 'WALL-TO-GORTAT',
'upload_date': '20150126',
'uploader': 'deadspin',
@@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
}
}, {
'url': 'http://www.viddler.com/player/221ebbbd/0/',
- 'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
+ 'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
'info_dict': {
'id': '221ebbbd',
- 'ext': 'mp4',
+ 'ext': 'mov',
'title': 'LETeens-Grammar-snack-third-conditional',
'description': ' ',
'upload_date': '20140929',
@@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
'view_count': int,
'comment_count': int,
}
+ }, {
+ # secret protected
+ 'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
+ 'info_dict': {
+ 'id': '890c0985',
+ 'ext': 'mp4',
+ 'title': 'Complete Property Training - Traineeships',
+ 'description': ' ',
+ 'upload_date': '20130606',
+ 'uploader': 'TiffanyBowtell',
+ 'timestamp': 1370496993,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- json_url = (
- 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
- video_id)
+ query = {
+ 'video_id': video_id,
+ 'key': 'v0vhrt7bg2xq1vyxhkct',
+ }
+
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ secret = qs.get('secret', [None])[0]
+ if secret:
+ query['secret'] = secret
+
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
- request = sanitized_Request(json_url, None, headers)
+ request = sanitized_Request(
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
+ % compat_urllib_parse.urlencode(query), None, headers)
data = self._download_json(request, video_id)['video']
formats = []
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 2389e7f0f..6a8f9b49d 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
- r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
+ r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
webpage, 'login token', group='xsrft')
vuid = self._search_regex(
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index f767fa15f..49687371a 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
if error_note is not None and '因版权原因无法观看此视频' in error_note:
raise ExtractorError(
'Youku said: Sorry, this video is available in China only', expected=True)
+ elif error_note and '该视频被设为私密' in error_note:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is private', expected=True)
else:
msg = 'Youku server reported error %i' % error.get('code')
if error_note is not None:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b9a91dea2..18f7d37f4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube'
_TESTS = [
{
- 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
+ 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
@@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
},
{
- 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
+ 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
'note': 'Use the first video ID in the URL',
'info_dict': {
'id': 'BaW_jenozKc',
@@ -705,6 +705,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
},
{
+ # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ },
+ {
'url': 'http://vid.plus/FlRa-iH7PGw',
'only_matching': True,
},
@@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not self._downloader.params.get('noplaylist'):
entries = []
feed_ids = []
- multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
+ multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
for feed in multifeed_metadata_list.split(','):
- feed_data = compat_parse_qs(feed)
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/rg3/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
entries.append({
'_type': 'url_transparent',
'ie_key': 'Youtube',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index dce67de8a..a82a262a0 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -56,7 +56,7 @@ from .compat import (
compiled_regex_type = type(re.compile(''))
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 53b210290..9aca8001a 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.02.09'
+__version__ = '2016.02.13'