aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/bet.py4
-rw-r--r--youtube_dl/extractor/bliptv.py65
-rw-r--r--youtube_dl/extractor/ceskatelevize.py3
-rw-r--r--youtube_dl/extractor/common.py46
-rw-r--r--youtube_dl/extractor/crunchyroll.py3
-rw-r--r--youtube_dl/extractor/dailymotion.py2
-rw-r--r--youtube_dl/extractor/dfb.py27
-rw-r--r--youtube_dl/extractor/dramafever.py49
-rw-r--r--youtube_dl/extractor/ehow.py6
-rw-r--r--youtube_dl/extractor/facebook.py4
-rw-r--r--youtube_dl/extractor/gamespot.py4
-rw-r--r--youtube_dl/extractor/generic.py7
-rw-r--r--youtube_dl/extractor/gorillavid.py2
-rw-r--r--youtube_dl/extractor/hostingbulk.py2
-rw-r--r--youtube_dl/extractor/howstuffworks.py6
-rw-r--r--youtube_dl/extractor/infoq.py4
-rw-r--r--youtube_dl/extractor/jeuxvideo.py9
-rw-r--r--youtube_dl/extractor/karaoketv.py4
-rw-r--r--youtube_dl/extractor/malemotion.py6
-rw-r--r--youtube_dl/extractor/metacafe.py3
-rw-r--r--youtube_dl/extractor/mitele.py3
-rw-r--r--youtube_dl/extractor/mixcloud.py6
-rw-r--r--youtube_dl/extractor/mofosex.py4
-rw-r--r--youtube_dl/extractor/myspass.py3
-rw-r--r--youtube_dl/extractor/myvideo.py11
-rw-r--r--youtube_dl/extractor/nowtv.py2
-rw-r--r--youtube_dl/extractor/npo.py142
-rw-r--r--youtube_dl/extractor/odnoklassniki.py4
-rw-r--r--youtube_dl/extractor/openfilm.py4
-rw-r--r--youtube_dl/extractor/photobucket.py4
-rw-r--r--youtube_dl/extractor/played.py2
-rw-r--r--youtube_dl/extractor/playvid.py7
-rw-r--r--youtube_dl/extractor/pornhub.py9
-rw-r--r--youtube_dl/extractor/primesharetv.py2
-rw-r--r--youtube_dl/extractor/promptfile.py2
-rw-r--r--youtube_dl/extractor/prosiebensat1.py17
-rw-r--r--youtube_dl/extractor/shared.py2
-rw-r--r--youtube_dl/extractor/spankwire.py4
-rw-r--r--youtube_dl/extractor/twitch.py2
-rw-r--r--youtube_dl/extractor/udemy.py35
-rw-r--r--youtube_dl/extractor/veehd.py3
-rw-r--r--youtube_dl/extractor/vimeo.py2
-rw-r--r--youtube_dl/extractor/vk.py2
-rw-r--r--youtube_dl/extractor/vodlocker.py2
-rw-r--r--youtube_dl/extractor/xbef.py6
-rw-r--r--youtube_dl/extractor/xnxx.py6
-rw-r--r--youtube_dl/extractor/xtube.py6
-rw-r--r--youtube_dl/extractor/xvideos.py4
-rw-r--r--youtube_dl/extractor/ynet.py4
-rw-r--r--youtube_dl/extractor/youtube.py12
51 files changed, 384 insertions, 187 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3f4f23521..06f21064b 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -389,7 +389,8 @@ from .npo import (
NPOLiveIE,
NPORadioIE,
NPORadioFragmentIE,
- TegenlichtVproIE,
+ VPROIE,
+ WNLIE
)
from .nrk import (
NRKIE,
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
index 26b934543..03dad4636 100644
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -1,7 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
xpath_text,
xpath_with_ns,
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- media_url = compat_urllib_parse.unquote(self._search_regex(
+ media_url = compat_urllib_parse_unquote(self._search_regex(
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index fb56cd78d..a69ee482b 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -14,6 +14,8 @@ from ..utils import (
int_or_none,
parse_iso8601,
unescapeHTML,
+ xpath_text,
+ xpath_with_ns,
)
@@ -23,10 +25,10 @@ class BlipTVIE(InfoExtractor):
_TESTS = [
{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
- 'md5': 'c6934ad0b6acf2bd920720ec888eb812',
+ 'md5': '80baf1ec5c3d2019037c1c707d676b9f',
'info_dict': {
'id': '5779306',
- 'ext': 'mov',
+ 'ext': 'm4v',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'timestamp': 1323138843,
@@ -100,6 +102,20 @@ class BlipTVIE(InfoExtractor):
'vcodec': 'none',
}
},
+ {
+ # missing duration
+ 'url': 'http://blip.tv/rss/flash/6700880',
+ 'info_dict': {
+ 'id': '6684191',
+ 'ext': 'm4v',
+ 'title': 'Cowboy Bebop: Gateway Shuffle Review',
+ 'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
+ 'timestamp': 1386639757,
+ 'upload_date': '20131210',
+ 'uploader': 'sfdebris',
+ 'uploader_id': '706520',
+ }
+ }
]
@staticmethod
@@ -128,35 +144,34 @@ class BlipTVIE(InfoExtractor):
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
- def blip(s):
- return '{http://blip.tv/dtd/blip/1.0}%s' % s
-
- def media(s):
- return '{http://search.yahoo.com/mrss/}%s' % s
-
- def itunes(s):
- return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
+ def _x(p):
+ return xpath_with_ns(p, {
+ 'blip': 'http://blip.tv/dtd/blip/1.0',
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+ })
item = rss.find('channel/item')
- video_id = item.find(blip('item_id')).text
- title = item.find('./title').text
- description = clean_html(compat_str(item.find(blip('puredescription')).text))
- timestamp = parse_iso8601(item.find(blip('datestamp')).text)
- uploader = item.find(blip('user')).text
- uploader_id = item.find(blip('userid')).text
- duration = int(item.find(blip('runtime')).text)
- media_thumbnail = item.find(media('thumbnail'))
- thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
- categories = [category.text for category in item.findall('category')]
+ video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
+ title = xpath_text(item, 'title', 'title', fatal=True)
+ description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
+ timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
+ uploader = xpath_text(item, _x('blip:user'), 'uploader')
+ uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
+ duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
+ media_thumbnail = item.find(_x('media:thumbnail'))
+ thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
+ else xpath_text(item, 'image', 'thumbnail'))
+ categories = [category.text for category in item.findall('category') if category is not None]
formats = []
subtitles_urls = {}
- media_group = item.find(media('group'))
- for media_content in media_group.findall(media('content')):
+ media_group = item.find(_x('media:group'))
+ for media_content in media_group.findall(_x('media:content')):
url = media_content.get('url')
- role = media_content.get(blip('role'))
+ role = media_content.get(_x('blip:role'))
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
@@ -175,8 +190,8 @@ class BlipTVIE(InfoExtractor):
'url': real_url,
'format_id': role,
'format_note': media_type,
- 'vcodec': media_content.get(blip('vcodec')) or 'none',
- 'acodec': media_content.get(blip('acodec')),
+ 'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
+ 'acodec': media_content.get(_x('blip:acodec')),
'filesize': media_content.get('filesize'),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 65f6be623..dda583680 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import (
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
- req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
+ req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist = self._download_json(req, video_id)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 82f5de2d8..5a2d0d995 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -27,7 +27,9 @@ from ..utils import (
bug_reports_message,
clean_html,
compiled_regex_type,
+ determine_ext,
ExtractorError,
+ fix_xml_ampersands,
float_or_none,
int_or_none,
RegexNotFoundError,
@@ -706,10 +708,23 @@ class InfoExtractor(object):
'twitter card player')
@staticmethod
- def _form_hidden_inputs(html):
- return dict(re.findall(
- r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
- html))
+ def _hidden_inputs(html):
+ return dict([
+ (input.group('name'), input.group('value')) for input in re.finditer(
+ r'''(?x)
+ <input\s+
+ type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+ name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+ (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+ value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+ ''', html)
+ ])
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
def _sort_formats(self, formats, field_preference=None):
if not formats:
@@ -821,10 +836,14 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
- def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip()):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
- 'Unable to download f4m manifest')
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source)
formats = []
manifest_version = '1.0'
@@ -834,8 +853,19 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
- manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
- (media_el.attrib.get('href') or media_el.attrib.get('url')))
+ media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ if determine_ext(manifest_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
+ continue
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 73f1e22ef..d1b6d7366 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -12,6 +12,7 @@ from math import pow, sqrt, floor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -254,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
- playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
+ playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 8852f0add..1a41c0db1 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -53,6 +53,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader': 'IGN',
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
'upload_date': '20150306',
+ 'duration': 74,
}
},
# Vevo video
@@ -164,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
+ 'duration': info['duration']
}
def _get_subtitles(self, video_id, webpage):
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
index 8049779b0..263532cc6 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import unified_strdate
class DFBIE(InfoExtractor):
IE_NAME = 'tv.dfb.de'
- _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
_TEST = {
- 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+ 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
# The md5 is different each time
'info_dict': {
- 'id': '9070',
+ 'id': '11633',
+ 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
'ext': 'flv',
- 'title': 'Highlights des Empfangs in Berlin',
- 'upload_date': '20140716',
+ 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+ 'upload_date': '20150714',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, display_id)
player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
- video_id)
+ display_id)
video_info = player_info.find('video')
- f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
+ f4m_info = self._download_xml(
+ self._proto_relative_url(video_info.find('url').text.strip()), display_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+ formats = self._extract_f4m_formats(manifest_url, display_id)
return {
'id': video_id,
+ 'display_id': display_id,
'title': video_info.find('title').text,
- 'url': manifest_url,
- 'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
- 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+ 'upload_date': unified_strdate(video_info.find('time_date').text),
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index ca41a3abf..38e6597c8 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -23,8 +23,23 @@ class DramaFeverBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
+ _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
+
+ _consumer_secret = None
+
+ def _get_consumer_secret(self):
+ mainjs = self._download_webpage(
+ 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
+ None, 'Downloading main.js', fatal=False)
+ if not mainjs:
+ return self._CONSUMER_SECRET
+ return self._search_regex(
+ r"var\s+cs\s*=\s*'([^']+)'", mainjs,
+ 'consumer secret', default=self._CONSUMER_SECRET)
+
def _real_initialize(self):
self._login()
+ self._consumer_secret = self._get_consumer_secret()
def _login(self):
(username, password) = self._get_login_info()
@@ -119,6 +134,23 @@ class DramaFeverIE(DramaFeverBaseIE):
'url': href,
}]
+ series_id, episode_number = video_id.split('.')
+ episode_info = self._download_json(
+ # We only need a single episode info, so restricting page size to one episode
+ # and dealing with page number as with episode number
+ r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
+ % (self._consumer_secret, series_id, episode_number),
+ video_id, 'Downloading episode info JSON', fatal=False)
+ if episode_info:
+ value = episode_info.get('value')
+ if value:
+ subfile = value[0].get('subfile') or value[0].get('new_subfile')
+ if subfile and subfile != 'http://www.dramafever.com/st/':
+ subtitles.setdefault('English', []).append({
+ 'ext': 'srt',
+ 'url': subfile,
+ })
+
return {
'id': video_id,
'title': title,
@@ -152,27 +184,14 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
'playlist_count': 20,
}]
- _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
- def _get_consumer_secret(self, video_id):
- mainjs = self._download_webpage(
- 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
- video_id, 'Downloading main.js', fatal=False)
- if not mainjs:
- return self._CONSUMER_SECRET
- return self._search_regex(
- r"var\s+cs\s*=\s*'([^']+)'", mainjs,
- 'consumer secret', default=self._CONSUMER_SECRET)
-
def _real_extract(self, url):
series_id = self._match_id(url)
- consumer_secret = self._get_consumer_secret(series_id)
-
series = self._download_json(
'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
- % (consumer_secret, series_id),
+ % (self._consumer_secret, series_id),
series_id, 'Downloading series JSON')['series'][series_id]
title = clean_html(series['name'])
@@ -182,7 +201,7 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
for page_num in itertools.count(1):
episodes = self._download_json(
'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
- % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
+ % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
series_id, 'Downloading episodes JSON page #%d' % page_num)
for episode in episodes.get('value', []):
episode_url = episode.get('episode_url')
diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py
index 9cb1bf301..b1cd4f5d4 100644
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -1,9 +1,7 @@
from __future__ import unicode_literals
-from ..compat import (
- compat_urllib_parse,
-)
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
class EHowIE(InfoExtractor):
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
- final_url = compat_urllib_parse.unquote(video_url)
+ final_url = compat_urllib_parse_unquote(video_url)
uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '')
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 82dc27bc6..e17bb9aea 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -9,7 +9,7 @@ from ..compat import (
compat_http_client,
compat_str,
compat_urllib_error,
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -136,7 +136,7 @@ class FacebookIE(InfoExtractor):
else:
raise ExtractorError('Cannot parse data')
data = dict(json.loads(m.group(1)))
- params_raw = compat_urllib_parse.unquote(data['params'])
+ params_raw = compat_urllib_parse_unquote(data['params'])
params = json.loads(params_raw)
video_data = params['video_data'][0]
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 2d33fa7f5..b3f1bafcc 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -5,7 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
return {
'id': data_video['guid'],
'display_id': page_id,
- 'title': compat_urllib_parse.unquote(data_video['title']),
+ 'title': compat_urllib_parse_unquote(data_video['title']),
'formats': formats,
'description': self._html_search_meta('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 392ad3648..a62287e50 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,7 +8,6 @@ import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
- compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
@@ -1115,7 +1114,7 @@ class GenericIE(InfoExtractor):
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
- webpage = compat_urllib_parse.unquote(webpage)
+ webpage = compat_urllib_parse_unquote(webpage)
# it's tempting to parse this further, but you would
# have to take into account all the variations like
@@ -1369,7 +1368,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+ return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -1682,7 +1681,7 @@ class GenericIE(InfoExtractor):
entries = []
for video_url in found:
video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+ video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py
index aabf07a20..f006f0cb1 100644
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor):
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- fields = self._form_hidden_inputs(webpage)
+ fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1':
countdown = int_or_none(self._search_regex(
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py
index 63f579592..a3154cfde 100644
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor):
r'<img src="([^"]+)".+?class="pic"',
webpage, 'thumbnail', fatal=False)
- fields = self._form_hidden_inputs(webpage)
+ fields = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
request.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
index e97339121..663e6632a 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -10,7 +10,7 @@ from ..utils import (
class HowStuffWorksIE(InfoExtractor):
- _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
+ _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_TESTS = [
{
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
},
},
+ {
+ 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index 91a1b3ccb..71cfd12c5 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -4,7 +4,7 @@ import base64
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urlparse,
)
@@ -39,7 +39,7 @@ class InfoQIE(InfoExtractor):
# Extract video URL
encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
- real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
+ real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
playpath = 'mp4:' + real_id
video_filename = playpath.split('/')[-1]
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index d0720ff56..1df084d87 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -8,9 +8,9 @@ from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
- _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
+ _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
},
- }
+ }, {
+ 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py
index e3b43ff8d..06daf5a89 100644
--- a/youtube_dl/extractor/karaoketv.py
+++ b/youtube_dl/extractor/karaoketv.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
js_to_json,
)
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
page_video_url = self._og_search_video_url(webpage, video_id)
- config_json = compat_urllib_parse.unquote_plus(self._search_regex(
+ config_json = compat_urllib_parse_unquote_plus(self._search_regex(
r'config=(.*)', page_video_url, 'configuration'))
urls_info_json = self._download_json(
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py
index 0b85a59d1..92511a671 100644
--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class MalemotionIE(InfoExtractor):
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = compat_urllib_parse.unquote(self._search_regex(
+ video_url = compat_urllib_parse_unquote(self._search_regex(
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 8bc333b02..6e2e73a51 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
video_url = None
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
if mobj is not None:
- mediaURL = compat_urllib_parse.unquote(mobj.group(1))
+ mediaURL = compat_urllib_parse_unquote(mobj.group(1))
video_ext = mediaURL[-3:]
# Extract gdaKey if available
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 7091f3335..852d72266 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -5,6 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor):
domain = 'http://' + domain
info_url = compat_urlparse.urljoin(
domain,
- compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+ compat_urllib_parse_unquote(embed_data['flashvars']['host'])
)
info_el = self._download_xml(info_url, episode).find('./video/info')
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 425a4ccf1..d47aeceda 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
HEADRequest,
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group(1)
cloudcast_name = mobj.group(2)
- track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
+ track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
webpage = self._download_webpage(url, track_id)
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py
index 2cec12d35..9bf99a54a 100644
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -5,9 +5,9 @@ import re
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
- compat_urllib_parse,
)
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
- video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
+ video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 5b9b9fbcd..4557a2b13 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
- metadata = self._download_xml(metadata_url, video_id)
+ metadata = self._download_xml(
+ metadata_url, video_id, transform_source=lambda s: s.strip())
# extract values from metadata
url_flv_el = metadata.find('url_flv')
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index 5e754fcff..c96f472a3 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -10,6 +10,7 @@ from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
if not a == '_encxml':
params[a] = b
else:
- encxml = compat_urllib_parse.unquote(b)
+ encxml = compat_urllib_parse_unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj:
- video_url = compat_urllib_parse.unquote(mobj.group(1))
+ video_url = compat_urllib_parse_unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
'Rewriting URL to use unencrypted rtmp:// ...',
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError('unable to extract url')
- video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
+ video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
- video_file = compat_urllib_parse.unquote(video_file)
+ video_file = compat_urllib_parse_unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
video_playpath = ''
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
- video_swfobj = compat_urllib_parse.unquote(video_swfobj)
+ video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title')
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index 173e46cd8..0b5ff4760 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor):
station = mobj.group('station')
info = self._download_json(
- 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+ 'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
display_id)
video_id = compat_str(info['id'])
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 62d12b7a6..0c2d02c10 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
fix_xml_ampersands,
@@ -7,7 +9,6 @@ from ..utils import (
qualities,
strip_jsonp,
unified_strdate,
- url_basename,
)
@@ -37,8 +38,21 @@ class NPOBaseIE(InfoExtractor):
class NPOIE(NPOBaseIE):
- IE_NAME = 'npo.nl'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
+ IE_NAME = 'npo'
+ IE_DESC = 'npo.nl and ntr.nl'
+ _VALID_URL = r'''(?x)
+ (?:
+ npo:|
+ https?://
+ (?:www\.)?
+ (?:
+ npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+ ntr\.nl/(?:[^/]+/){2,}|
+ omroepwnl\.nl/video/fragment/[^/]+__
+ )
+ )
+ (?P<id>[^/?#]+)
+ '''
_TESTS = [
{
@@ -58,7 +72,7 @@ class NPOIE(NPOBaseIE):
'info_dict': {
'id': 'VARA_101191800',
'ext': 'm4v',
- 'title': 'De Mega Mike & Mega Thomas show',
+ 'title': 'De Mega Mike & Mega Thomas show: The best of.',
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
'upload_date': '20090227',
'duration': 2400,
@@ -70,8 +84,8 @@ class NPOIE(NPOBaseIE):
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
- 'title': 'Tegenlicht',
- 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'title': 'Tegenlicht: De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
'duration': 3000,
},
@@ -100,6 +114,30 @@ class NPOIE(NPOBaseIE):
'title': 'Hoe gaat Europa verder na Parijs?',
},
},
+ {
+ 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+ 'md5': '01c6a2841675995da1f0cf776f03a9c3',
+ 'info_dict': {
+ 'id': 'VPWON_1233944',
+ 'ext': 'm4v',
+ 'title': 'Aap, poot, pies',
+ 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+ 'upload_date': '20150508',
+ 'duration': 599,
+ },
+ },
+ {
+ 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+ 'md5': 'd30cd8417b8b9bca1fdff27428860d08',
+ 'info_dict': {
+ 'id': 'POW_00996502',
+ 'ext': 'm4v',
+ 'title': '''"Dit is wel een 'landslide'..."''',
+ 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+ 'upload_date': '20150508',
+ 'duration': 462,
+ },
+ }
]
def _real_extract(self, url):
@@ -114,6 +152,18 @@ class NPOIE(NPOBaseIE):
transform_source=strip_jsonp,
)
+ # For some videos actual video id (prid) is different (e.g. for
+ # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+ # video id is POMS_WNL_853698 but prid is POW_00996502)
+ video_id = metadata.get('prid') or video_id
+
+ # titel is too generic in some cases so utilize aflevering_titel as well
+ # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+ title = metadata['titel']
+ sub_title = metadata.get('aflevering_titel')
+ if sub_title and sub_title != title:
+ title += ': %s' % sub_title
+
token = self._get_token(video_id)
formats = []
@@ -186,8 +236,8 @@ class NPOIE(NPOBaseIE):
return {
'id': video_id,
- 'title': metadata['titel'],
- 'description': metadata['info'],
+ 'title': title,
+ 'description': metadata.get('info'),
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
'upload_date': unified_strdate(metadata.get('gidsdatum')),
'duration': parse_duration(metadata.get('tijdsduur')),
@@ -356,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor):
}
-class TegenlichtVproIE(NPOIE):
- IE_NAME = 'tegenlicht.vpro.nl'
- _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+class VPROIE(NPOIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
_TESTS = [
{
@@ -367,17 +416,72 @@ class TegenlichtVproIE(NPOIE):
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
- 'title': 'Tegenlicht',
- 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'title': 'De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
},
},
+ {
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+ 'info_dict': {
+ 'id': 'sergio-herman',
+ 'title': 'Sergio Herman: Fucking perfect',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # playlist with youtube embed
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+ 'info_dict': {
+ 'id': 'education-education',
+ 'title': '2Doc',
+ },
+ 'playlist_count': 2,
+ }
]
def _real_extract(self, url):
- name = url_basename(url)
- webpage = self._download_webpage(url, name)
- urn = self._html_search_meta('mediaurn', webpage)
- info_page = self._download_json(
- 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
- return self._get_info(info_page['mid'])
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+ for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
+ ]
+
+ playlist_title = self._search_regex(
+ r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
+ webpage, 'playlist title', default=None) or self._og_search_title(webpage)
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class WNLIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+
+ _TEST = {
+ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+ 'info_dict': {
+ 'id': 'vandaag-de-dag-6-mei',
+ 'title': 'Vandaag de Dag 6 mei',
+ },
+ 'playlist_count': 4,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id, 'NPO')
+ for video_id, part in re.findall(
+ r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
+ ]
+
+ playlist_title = self._html_search_regex(
+ r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
+ webpage, 'playlist title')
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index 6c7149fe3..215ffe87b 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
unified_strdate,
int_or_none,
@@ -62,7 +62,7 @@ class OdnoklassnikiIE(InfoExtractor):
metadata = self._parse_json(metadata, video_id)
else:
metadata = self._download_json(
- compat_urllib_parse.unquote(flashvars['metadataUrl']),
+ compat_urllib_parse_unquote(flashvars['metadataUrl']),
video_id, 'Downloading metadata JSON')
movie = metadata['movie']
diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py
index 2249657eb..d2ceedd01 100644
--- a/youtube_dl/extractor/openfilm.py
+++ b/youtube_dl/extractor/openfilm.py
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
parse_iso8601,
- compat_urllib_parse,
parse_age_limit,
int_or_none,
)
@@ -37,7 +37,7 @@ class OpenFilmIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- player = compat_urllib_parse.unquote_plus(
+ player = compat_urllib_parse_unquote_plus(
self._og_search_video_url(webpage))
video = json.loads(self._search_regex(
diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py
index c66db3cdc..788411ccc 100644
--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dl/extractor/photobucket.py
@@ -4,7 +4,7 @@ import json
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
class PhotobucketIE(InfoExtractor):
@@ -34,7 +34,7 @@ class PhotobucketIE(InfoExtractor):
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
webpage, 'info json')
info = json.loads(info_json)
- url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
+ url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
return {
'id': video_id,
'url': url,
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py
index 9fe1524f2..8a1c296dd 100644
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor):
if m_error:
raise ExtractorError(m_error.group('msg'), expected=True)
- data = self._form_hidden_inputs(orig_webpage)
+ data = self._hidden_inputs(orig_webpage)
self._sleep(2, video_id)
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py
index c3e667e9e..2eb4fd96d 100644
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -4,7 +4,8 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
)
from ..utils import (
clean_html,
@@ -44,7 +45,7 @@ class PlayvidIE(InfoExtractor):
flashvars = self._html_search_regex(
r'flashvars="(.+?)"', webpage, 'flashvars')
- infos = compat_urllib_parse.unquote(flashvars).split(r'&')
+ infos = compat_urllib_parse_unquote(flashvars).split(r'&')
for info in infos:
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
if videovars_match:
@@ -52,7 +53,7 @@ class PlayvidIE(InfoExtractor):
val = videovars_match.group(2)
if key == 'title':
- video_title = compat_urllib_parse.unquote_plus(val)
+ video_title = compat_urllib_parse_unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 8172bc997..0b7886840 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -5,7 +5,8 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
@@ -69,7 +70,7 @@ class PornHubIE(InfoExtractor):
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
- thumbnail = compat_urllib_parse.unquote(thumbnail)
+ thumbnail = compat_urllib_parse_unquote(thumbnail)
view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
@@ -80,9 +81,9 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+ video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
- password = compat_urllib_parse.unquote_plus(
+ password = compat_urllib_parse_unquote_plus(
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
index 9aa0c862a..304359dc5 100644
--- a/youtube_dl/extractor/primesharetv.py
+++ b/youtube_dl/extractor/primesharetv.py
@@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
if '>File not exist<' in webpage:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- fields = self._form_hidden_inputs(webpage)
+ fields = self._hidden_inputs(webpage)
headers = {
'Referer': url,
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
index 81a63c7fc..8190ed676 100644
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor):
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
- fields = self._form_hidden_inputs(webpage)
+ fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 536a42dc8..fec008ce7 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -9,8 +9,9 @@ from ..compat import (
compat_urllib_parse,
)
from ..utils import (
- unified_strdate,
+ determine_ext,
int_or_none,
+ unified_strdate,
)
@@ -21,6 +22,11 @@ class ProSiebenSat1IE(InfoExtractor):
_TESTS = [
{
+ # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
+ # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+ # - malformed f4m manifest support
+ # - proper handling of URLs starting with `https?://` in 2.0 manifests
+ # - recursive child f4m manifests extraction
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
@@ -208,7 +214,7 @@ class ProSiebenSat1IE(InfoExtractor):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'prosieben'
- client_name = 'kolibri-1.12.6'
+ client_name = 'kolibri-2.0.19-splec4'
client_location = url
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
@@ -275,8 +281,9 @@ class ProSiebenSat1IE(InfoExtractor):
for source in urls_sources:
protocol = source['protocol']
+ source_url = source['url']
if protocol == 'rtmp' or protocol == 'rtmpe':
- mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
continue
path = mobj.group('path')
@@ -293,9 +300,11 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4',
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
})
+ elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(source_url, clip_id))
else:
formats.append({
- 'url': source['url'],
+ 'url': source_url,
'vbr': fix_bitrate(source['bitrate']),
})
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index 6e2b94e7d..a07677686 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -34,7 +34,7 @@ class SharedIE(InfoExtractor):
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
- download_form = self._form_hidden_inputs(webpage)
+ download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(
url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index bff75d6b2..5fa6faf18 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -4,7 +4,7 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
@@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor):
webpage, 'comment count', fatal=False))
video_urls = list(map(
- compat_urllib_parse.unquote,
+ compat_urllib_parse_unquote,
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._search_regex(
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index af2b798fb..92b6dc1b8 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
- login_form = self._form_hidden_inputs(login_page)
+ login_form = self._hidden_inputs(login_page)
login_form.update({
'login': username.encode('utf-8'),
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py
index 4667ed83b..e2bab52fe 100644
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -15,7 +15,8 @@ from ..utils import (
class UdemyIE(InfoExtractor):
IE_NAME = 'udemy'
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
- _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
+ _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+ _ORIGIN_URL = 'https://www.udemy.com'
_NETRC_MACHINE = 'udemy'
_TESTS = [{
@@ -74,29 +75,33 @@ class UdemyIE(InfoExtractor):
expected=True)
login_popup = self._download_webpage(
- 'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
- 'Downloading login popup')
+ self._LOGIN_URL, None, 'Downloading login popup')
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
return
- csrf = self._html_search_regex(
- r'<input type="hidden" name="csrf" value="(.+?)"',
- login_popup, 'csrf token')
+ login_form = self._form_hidden_inputs('login-form', login_popup)
+
+ login_form.update({
+ 'email': username.encode('utf-8'),
+ 'password': password.encode('utf-8'),
+ })
- login_form = {
- 'email': username,
- 'password': password,
- 'csrf': csrf,
- 'displayType': 'json',
- 'isSubmitted': '1',
- }
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
- response = self._download_json(
+ request.add_header('Referer', self._ORIGIN_URL)
+ request.add_header('Origin', self._ORIGIN_URL)
+
+ response = self._download_webpage(
request, None, 'Logging in as %s' % username)
- if 'returnUrl' not in response:
+ if all(logout_pattern not in response
+ for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
index 346edf485..0d8d832cc 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -5,6 +5,7 @@ import json
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
@@ -76,7 +77,7 @@ class VeeHDIE(InfoExtractor):
if config_json:
config = json.loads(config_json)
- video_url = compat_urlparse.unquote(config['clip']['url'])
+ video_url = compat_urllib_parse_unquote(config['clip']['url'])
if not video_url:
video_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index d63c03183..10d6745af 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
- fields = self._form_hidden_inputs(login_form)
+ fields = self._hidden_inputs(login_form)
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
fields['token'] = token
fields['password'] = password
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 8ac3aeac0..8f677cae3 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -168,7 +168,7 @@ class VKIE(InfoExtractor):
login_page = self._download_webpage(
'https://vk.com', None, 'Downloading login page')
- login_form = self._form_hidden_inputs(login_page)
+ login_form = self._hidden_inputs(login_page)
login_form.update({
'email': username.encode('cp1251'),
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index 4804692bf..ccf1928b5 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- fields = self._form_hidden_inputs(webpage)
+ fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1':
self._sleep(3, video_id) # they do detect when requests happen too fast!
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py
index 80c48c37d..4ff99e5ca 100644
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dl/extractor/xbef.py
@@ -1,9 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class XBefIE(InfoExtractor):
@@ -30,7 +28,7 @@ class XBefIE(InfoExtractor):
config_url_enc = self._download_webpage(
'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
note='Retrieving config URL')
- config_url = compat_urllib_parse.unquote(config_url_enc)
+ config_url = compat_urllib_parse_unquote(config_url_enc)
config = self._download_xml(
config_url, video_id, note='Retrieving config')
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py
index 79ed6c744..5a41f8ffa 100644
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
class XNXXIE(InfoExtractor):
@@ -26,7 +24,7 @@ class XNXXIE(InfoExtractor):
video_url = self._search_regex(r'flv_url=(.*?)&amp;',
webpage, 'video URL')
- video_url = compat_urllib_parse.unquote(video_url)
+ video_url = compat_urllib_parse_unquote(video_url)
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
webpage, 'title')
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index 1644f53c8..779e4f46a 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -5,7 +5,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
)
from ..utils import (
parse_duration,
@@ -59,7 +59,7 @@ class XTubeIE(InfoExtractor):
for format_id, video_url in re.findall(
r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
fmt = {
- 'url': compat_urllib_parse.unquote(video_url),
+ 'url': compat_urllib_parse_unquote(video_url),
'format_id': format_id,
}
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
@@ -68,7 +68,7 @@ class XTubeIE(InfoExtractor):
formats.append(fmt)
if not formats:
- video_url = compat_urllib_parse.unquote(self._search_regex(
+ video_url = compat_urllib_parse_unquote(self._search_regex(
r'flashvars\.video_url\s*=\s*"([^"]+)"',
webpage, 'video URL'))
formats.append({'url': video_url})
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index d8415bed4..5dcf2fdd1 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -4,7 +4,7 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
@@ -37,7 +37,7 @@ class XVideosIE(InfoExtractor):
if mobj:
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
- video_url = compat_urllib_parse.unquote(
+ video_url = compat_urllib_parse_unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py
index 894678a23..869f3e819 100644
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@@ -5,7 +5,7 @@ import re
import json
from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
class YnetIE(InfoExtractor):
@@ -34,7 +34,7 @@ class YnetIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
+ content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
f4m_url = config['clip']['url']
title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3c629d38a..e7f5c7861 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -17,6 +17,8 @@ from ..compat import (
compat_chr,
compat_parse_qs,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
compat_str,
@@ -865,7 +867,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
- url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+ url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url)
# Get video webpage
@@ -973,7 +975,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# uploader
if 'author' not in video_info:
raise ExtractorError('Unable to extract uploader name')
- video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
+ video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
# uploader_id
video_uploader_id = None
@@ -1000,7 +1002,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning('unable to extract video thumbnail')
video_thumbnail = None
else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
+ video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
# upload date
upload_date = self._html_search_meta(
@@ -1062,7 +1064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning('unable to extract video duration')
video_duration = None
else:
- video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
+ video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
# annotations
video_annotations = None
@@ -1609,7 +1611,7 @@ class YoutubeSearchURLIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+ query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query)
result_code = self._search_regex(