aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py432
1 files changed, 333 insertions, 99 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 480f403da..ad2e933ee 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
+ compat_kwargs,
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
@@ -38,7 +39,6 @@ from ..utils import (
parse_duration,
remove_quotes,
remove_start,
- sanitized_Request,
smuggle_url,
str_to_int,
try_get,
@@ -54,7 +54,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
- _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
+
+ _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
+ _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
+ _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
@@ -96,74 +100,157 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
login_form = self._hidden_inputs(login_page)
- login_form.update({
- 'checkConnection': 'youtube',
- 'Email': username,
- 'Passwd': password,
- })
+ def req(url, f_req, note, errnote):
+ data = login_form.copy()
+ data.update({
+ 'pstMsg': 1,
+ 'checkConnection': 'youtube',
+ 'checkedDomains': 'youtube',
+ 'hl': 'en',
+ 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
+ 'f.req': json.dumps(f_req),
+ 'flowName': 'GlifWebSignIn',
+ 'flowEntry': 'ServiceLogin',
+ })
+ return self._download_json(
+ url, None, note=note, errnote=errnote,
+ transform_source=lambda s: re.sub(r'^[^[]*', '', s),
+ fatal=False,
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
+ 'Google-Accounts-XSRF': 1,
+ })
- login_results = self._download_webpage(
- self._PASSWORD_CHALLENGE_URL, None,
- note='Logging in', errnote='unable to log in', fatal=False,
- data=urlencode_postdata(login_form))
- if login_results is False:
- return False
+ def warn(message):
+ self._downloader.report_warning(message)
+
+ lookup_req = [
+ username,
+ None, [], None, 'US', None, None, 2, False, True,
+ [
+ None, None,
+ [2, 1, None, 1,
+ 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
+ None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ],
+ username,
+ ]
- error_msg = self._html_search_regex(
- r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
- login_results, 'error message', default=None)
- if error_msg:
- raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
+ lookup_results = req(
+ self._LOOKUP_URL, lookup_req,
+ 'Looking up account info', 'Unable to look up account info')
- if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
- raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
+ if lookup_results is False:
+ return False
- # Two-Factor
- # TODO add SMS and phone call support - these require making a request and then prompting the user
+ user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
+ if not user_hash:
+ warn('Unable to extract user hash')
+ return False
- if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
- tfa_code = self._get_tfa_info('2-step verification code')
+ challenge_req = [
+ user_hash,
+ None, 1, None, [1, None, None, None, [password, None, True]],
+ [
+ None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ]]
- if not tfa_code:
- self._downloader.report_warning(
- 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
- '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
- return False
+ challenge_results = req(
+ self._CHALLENGE_URL, challenge_req,
+ 'Logging in', 'Unable to log in')
- tfa_code = remove_start(tfa_code, 'G-')
+ if challenge_results is False:
+ return
- tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
+ login_res = try_get(challenge_results, lambda x: x[0][5], list)
+ if login_res:
+ login_msg = try_get(login_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to login: %s' % 'Invalid password'
+ if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
+ return False
- tfa_form_strs.update({
- 'Pin': tfa_code,
- 'TrustDevice': 'on',
- })
+ res = try_get(challenge_results, lambda x: x[0][-1], list)
+ if not res:
+ warn('Unable to extract result entry')
+ return False
- tfa_data = urlencode_postdata(tfa_form_strs)
+ tfa = try_get(res, lambda x: x[0][0], list)
+ if tfa:
+ tfa_str = try_get(tfa, lambda x: x[2], compat_str)
+ if tfa_str == 'TWO_STEP_VERIFICATION':
+ # SEND_SUCCESS - TFA code has been successfully sent to phone
+ # QUOTA_EXCEEDED - reached the limit of TFA codes
+ status = try_get(tfa, lambda x: x[5], compat_str)
+ if status == 'QUOTA_EXCEEDED':
+ warn('Exceeded the limit of TFA codes, try later')
+ return False
+
+ tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
+ if not tl:
+ warn('Unable to extract TL')
+ return False
+
+ tfa_code = self._get_tfa_info('2-step verification code')
+
+ if not tfa_code:
+ warn(
+ 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
+ '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+ return False
+
+ tfa_code = remove_start(tfa_code, 'G-')
+
+ tfa_req = [
+ user_hash, None, 2, None,
+ [
+ 9, None, None, None, None, None, None, None,
+ [None, tfa_code, True, 2]
+ ]]
+
+ tfa_results = req(
+ self._TFA_URL.format(tl), tfa_req,
+ 'Submitting TFA code', 'Unable to submit TFA code')
+
+ if tfa_results is False:
+ return False
+
+ tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
+ if tfa_res:
+ tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to finish TFA: %s' % 'Invalid TFA code'
+ if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
+ return False
+
+ check_cookie_url = try_get(
+ tfa_results, lambda x: x[0][-1][2], compat_str)
+ else:
+ check_cookie_url = try_get(res, lambda x: x[2], compat_str)
- tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
- tfa_results = self._download_webpage(
- tfa_req, None,
- note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
+ if not check_cookie_url:
+ warn('Unable to extract CheckCookie URL')
+ return False
- if tfa_results is False:
- return False
+ check_cookie_results = self._download_webpage(
+ check_cookie_url, None, 'Checking cookie', fatal=False)
- if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
- self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
- return False
- if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
- self._downloader.report_warning('unable to log in - did the page structure change?')
- return False
- if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
- self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
- return False
+ if check_cookie_results is False:
+ return False
- if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
- self._downloader.report_warning('unable to log in: bad username or password')
+ if 'https://myaccount.google.com/' not in check_cookie_results:
+ warn('Unable to log in')
return False
+
return True
+ def _download_webpage(self, *args, **kwargs):
+ kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+ *args, **compat_kwargs(kwargs))
+
def _real_initialize(self):
if self._downloader is None:
return
@@ -592,6 +679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
'info_dict': {
@@ -922,6 +1010,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
],
},
{
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ 'license': 'Standard YouTube License',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
# itag 212
'url': '1t24XAntNCY',
'only_matching': True,
@@ -1188,37 +1297,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
+ def make_captions(sub_url, sub_langs):
+ parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
+ caption_qs = compat_parse_qs(parsed_sub_url.query)
+ captions = {}
+ for sub_lang in sub_langs:
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ caption_qs.update({
+ 'tlang': [sub_lang],
+ 'fmt': [ext],
+ })
+ sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
+ sub_formats.append({
+ 'url': sub_url,
+ 'ext': ext,
+ })
+ captions[sub_lang] = sub_formats
+ return captions
+
+ # New captions format as of 22.06.2017
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
- caption_qs = compat_parse_qs(parsed_caption_url.query)
-
- sub_lang_list = {}
+ sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
- if not sub_lang:
- continue
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- caption_qs.update({
- 'tlang': [sub_lang],
- 'fmt': [ext],
- })
- sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
- query=compat_urllib_parse_urlencode(caption_qs, True)))
- sub_formats.append({
- 'url': sub_url,
- 'ext': ext,
- })
- sub_lang_list[sub_lang] = sub_formats
- return sub_lang_list
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
- except (KeyError, ExtractorError):
+ except (KeyError, IndexError, ExtractorError):
self._downloader.report_warning(err_msg)
return {}
@@ -1245,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False)
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/.+?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
@classmethod
def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
@@ -1257,6 +1423,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+ @staticmethod
+ def _extract_chapters(description, duration):
+ if not description:
+ return None
+ chapter_lines = re.findall(
+ r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
+ description)
+ if not chapter_lines:
+ return None
+ chapters = []
+ for next_num, (chapter_line, time_point) in enumerate(
+ chapter_lines, start=1):
+ start_time = parse_duration(time_point)
+ if start_time is None:
+ continue
+ if start_time > duration:
+ break
+ end_time = (duration if next_num == len(chapter_lines)
+ else parse_duration(chapter_lines[next_num][1]))
+ if end_time is None:
+ continue
+ if end_time > duration:
+ end_time = duration
+ if start_time > end_time:
+ break
+ chapter_title = re.sub(
+ r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
+ chapter_title = re.sub(r'\s+', ' ', chapter_title)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': chapter_title,
+ })
+ return chapters
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -1300,9 +1501,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if dash_mpd and dash_mpd[0] not in dash_mpds:
dash_mpds.append(dash_mpd[0])
+ is_live = None
+ view_count = None
+
+ def extract_view_count(v_info):
+ return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
# Get video info
embed_webpage = None
- is_live = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -1325,6 +1531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
age_gate = False
video_info = None
+ sts = None
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
@@ -1341,6 +1548,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
+ sts = ytplayer_config.get('sts')
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags
@@ -1349,17 +1557,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# The general idea is to take a union of itags of both DASH manifests (for example
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id)
- for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = (
- '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
- % (proto, video_id, el_type))
+ for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
+ query = {
+ 'video_id': video_id,
+ 'ps': 'default',
+ 'eurl': '',
+ 'gl': 'US',
+ 'hl': 'en',
+ }
+ if el:
+ query['el'] = el
+ if sts:
+ query['sts'] = sts
video_info_webpage = self._download_webpage(
- video_info_url,
+ '%s://www.youtube.com/get_video_info' % proto,
video_id, note=False,
- errnote='unable to download video info webpage')
+ errnote='unable to download video info webpage',
+ fatal=False, query=query)
+ if not video_info_webpage:
+ continue
get_video_info = compat_parse_qs(video_info_webpage)
- if get_video_info.get('use_cipher_signature') != ['True']:
- add_dash_mpd(get_video_info)
+ add_dash_mpd(get_video_info)
+ if view_count is None:
+ view_count = extract_view_count(get_video_info)
if not video_info:
video_info = get_video_info
if 'token' in get_video_info:
@@ -1399,9 +1619,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_title = '_'
# description
- video_description = get_element_by_id("eow-description", video_webpage)
+ description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
- video_description = re.sub(r'''(?x)
+ description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
@@ -1443,10 +1663,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self.playlist_result(entries, video_id, video_title, video_description)
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- if 'view_count' in video_info:
- view_count = int(video_info['view_count'][0])
- else:
- view_count = None
+ if view_count is None:
+ view_count = extract_view_count(video_info)
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
@@ -1490,10 +1708,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not upload_date:
upload_date = self._search_regex(
[r'(?s)id="eow-date.*?>(.*?)</span>',
- r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
video_webpage, 'upload date', default=None)
- if upload_date:
- upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
video_license = self._html_search_regex(
@@ -1501,7 +1717,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_webpage, 'license', default=None)
m_music = re.search(
- r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
+ r'''(?x)
+ <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+ <ul[^>]*>\s*
+ <li>(?P<title>.+?)
+ by (?P<creator>.+?)
+ (?:
+ \(.+?\)|
+ <a[^>]*
+ (?:
+ \bhref=["\']/red[^>]*>| # drop possible
+ >\s*Listen ad-free with YouTube Red # YouTube Red ad
+ )
+ .*?
+ )?</li
+ ''',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
@@ -1558,6 +1788,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
+ chapters = self._extract_chapters(description_original, video_duration)
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@ -1591,12 +1823,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
format_id = url_data['itag'][0]
url = url_data['url'][0]
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
@@ -1617,6 +1845,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_webpage, 'age gate player URL')
player_url = json.loads(player_url_json)
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
if self._downloader.params.get('verbose'):
if player_url is None:
player_version = 'unknown'
@@ -1790,6 +2023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
+ 'chapters': chapters,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
@@ -1861,7 +2095,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist'
_TESTS = [{