aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/firsttv.py133
-rw-r--r--youtube_dl/extractor/kaltura.py11
-rw-r--r--youtube_dl/extractor/twitch.py80
-rw-r--r--youtube_dl/extractor/zingmp3.py131
5 files changed, 173 insertions, 187 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d4d90c1f8..b0644be11 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1115,7 +1115,4 @@ from .youtube import (
)
from .zapiks import ZapiksIE
from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import (
- ZingMp3SongIE,
- ZingMp3AlbumIE,
-)
+from .zingmp3 import ZingMp3IE
diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py
index 88bca1007..af7de10b7 100644
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dl/extractor/firsttv.py
@@ -2,130 +2,75 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_xpath
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
qualities,
unified_strdate,
- xpath_attr,
- xpath_element,
- xpath_text,
- xpath_with_ns,
)
class FirstTVIE(InfoExtractor):
IE_NAME = '1tv'
IE_DESC = 'Первый канал'
- _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
- # single format via video_materials.json API
- 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
- 'md5': '82a2777648acae812d58b3f5bd42882b',
+ 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
+ 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
'info_dict': {
- 'id': '35930',
+ 'id': '40049',
'ext': 'mp4',
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
- 'description': 'md5:357933adeede13b202c7c21f91b871b2',
+ 'description': 'md5:36a39c1d19618fec57d12efe212a8370',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20150212',
'duration': 2694,
},
}, {
- # multiple formats via video_materials.json API
- 'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
- 'info_dict': {
- 'id': '113641',
- 'ext': 'mp4',
- 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
- 'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
- 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
- 'upload_date': '20160407',
- 'duration': 179,
- 'formats': 'mincount:3',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
- 'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
- 'md5': '519d306c5b5669761fd8906c39dbee23',
- 'info_dict': {
- 'id': '47038',
- 'ext': 'mp4',
- 'title': '"Побег". Второй сезон. 3 серия',
- 'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
- 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
- 'upload_date': '20120516',
- 'duration': 3080,
- },
- }, {
- 'url': 'http://www.1tv.ru/videoarchive/9967',
- 'only_matching': True,
+ 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
+ 'only_matching': 'true',
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # Videos with multiple formats only available via this API
- video = self._download_json(
- 'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
- video_id, fatal=False)
-
- description, thumbnail, upload_date, duration = [None] * 4
-
- if video:
- item = video[0]
- title = item['title']
- quality = qualities(('ld', 'sd', 'hd', ))
- formats = [{
- 'url': f['src'],
- 'format_id': f.get('name'),
- 'quality': quality(f.get('name')),
- } for f in item['mbr'] if f.get('src')]
- thumbnail = item.get('poster')
- else:
- # Some videos are not available via video_materials.json
- video = self._download_xml(
- 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
- video_id)
-
- NS_MAP = {
- 'media': 'http://search.yahoo.com/mrss/',
- }
+ display_id = self._match_id(url)
- item = xpath_element(video, './channel/item', fatal=True)
- title = xpath_text(item, './title', fatal=True)
- formats = [{
- 'url': content.attrib['url'],
- } for content in item.findall(
- compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
- thumbnail = xpath_attr(
- item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
+ webpage = self._download_webpage(url, display_id)
+ playlist_url = compat_urlparse.urljoin(url, self._search_regex(
+ r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
+ item = self._download_json(playlist_url, display_id)[0]
+ video_id = item['id']
+ quality = qualities(('ld', 'sd', 'hd', ))
+ formats = []
+ for f in item.get('mbr', []):
+ src = f.get('src')
+ if not src:
+ continue
+ fname = f.get('name')
+ formats.append({
+ 'url': src,
+ 'format_id': fname,
+ 'quality': quality(fname),
+ })
self._sort_formats(formats)
- webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
- if webpage:
- title = self._html_search_regex(
- (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
- r"'title'\s*:\s*'([^']+)'"),
- webpage, 'title', default=None) or title
- description = self._html_search_regex(
- r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
- webpage, 'description', default=None) or self._html_search_meta(
- 'description', webpage, 'description')
- thumbnail = thumbnail or self._og_search_thumbnail(webpage)
- duration = int_or_none(self._html_search_meta(
- 'video:duration', webpage, 'video duration', fatal=False))
- upload_date = unified_strdate(self._html_search_meta(
- 'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
+ title = self._html_search_regex(
+ (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+ r"'title'\s*:\s*'([^']+)'"),
+ webpage, 'title', default=None) or item['title']
+ description = self._html_search_regex(
+ r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+ webpage, 'description', default=None) or self._html_search_meta(
+ 'description', webpage, 'description')
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'video duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
return {
'id': video_id,
- 'thumbnail': thumbnail,
+ 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
'title': title,
'description': description,
'upload_date': upload_date,
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index e0f7366c2..6a8464998 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -36,6 +36,12 @@ class KalturaIE(InfoExtractor):
'''
_SERVICE_URL = 'http://cdnapi.kaltura.com'
_SERVICE_BASE = '/api_v3/index.php'
+ # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
+ _CAPTION_TYPES = {
+ 1: 'srt',
+ 2: 'ttml',
+ 3: 'vtt',
+ }
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
@@ -285,9 +291,12 @@ class KalturaIE(InfoExtractor):
# Continue if caption is not ready
if f.get('status') != 2:
continue
+ if not caption.get('id'):
+ continue
+ caption_format = int_or_none(caption.get('format'))
subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
- 'ext': caption.get('fileExt', 'ttml'),
+ 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
})
return {
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 890f55180..359a8859c 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -7,6 +7,7 @@ import random
from .common import InfoExtractor
from ..compat import (
+ compat_HTTPError,
compat_parse_qs,
compat_str,
compat_urllib_parse_urlencode,
@@ -14,13 +15,13 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
+ clean_html,
ExtractorError,
int_or_none,
js_to_json,
orderedSet,
parse_duration,
parse_iso8601,
- sanitized_Request,
urlencode_postdata,
)
@@ -42,7 +43,7 @@ class TwitchBaseIE(InfoExtractor):
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
expected=True)
- def _download_json(self, url, video_id, note='Downloading JSON metadata'):
+ def _call_api(self, path, item_id, note):
headers = {
'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
'X-Requested-With': 'XMLHttpRequest',
@@ -50,8 +51,8 @@ class TwitchBaseIE(InfoExtractor):
for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value
- request = sanitized_Request(url, headers=headers)
- response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
+ response = self._download_json(
+ '%s/%s' % (self._API_BASE, path), item_id, note)
self._handle_error(response)
return response
@@ -63,9 +64,17 @@ class TwitchBaseIE(InfoExtractor):
if username is None:
return
+ def fail(message):
+ raise ExtractorError(
+ 'Unable to login. Twitch said: %s' % message, expected=True)
+
login_page, handle = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login page')
+ # Some TOR nodes and public proxies are blocked completely
+ if 'blacklist_message' in login_page:
+ fail(clean_html(login_page))
+
login_form = self._hidden_inputs(login_page)
login_form.update({
@@ -82,21 +91,24 @@ class TwitchBaseIE(InfoExtractor):
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(redirect_url, post_url)
- request = sanitized_Request(
- post_url, urlencode_postdata(login_form))
- request.add_header('Referer', redirect_url)
- response = self._download_webpage(
- request, None, 'Logging in as %s' % username)
-
- error_message = self._search_regex(
- r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
- response, 'error message', default=None)
- if error_message:
- raise ExtractorError(
- 'Unable to login. Twitch said: %s' % error_message, expected=True)
+ headers = {'Referer': redirect_url}
- if '>Reset your password<' in response:
- self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
+ try:
+ response = self._download_json(
+ post_url, None, 'Logging in as %s' % username,
+ data=urlencode_postdata(login_form),
+ headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ response = self._parse_json(
+ e.cause.read().decode('utf-8'), None)
+ fail(response['message'])
+ raise
+
+ if response.get('redirect'):
+ self._download_webpage(
+ response['redirect'], None, 'Downloading login redirect page',
+ headers=headers)
def _prefer_source(self, formats):
try:
@@ -109,14 +121,14 @@ class TwitchBaseIE(InfoExtractor):
class TwitchItemBaseIE(TwitchBaseIE):
def _download_info(self, item, item_id):
- return self._extract_info(self._download_json(
- '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+ return self._extract_info(self._call_api(
+ 'kraken/videos/%s%s' % (item, item_id), item_id,
'Downloading %s info JSON' % self._ITEM_TYPE))
def _extract_media(self, item_id):
info = self._download_info(self._ITEM_SHORTCUT, item_id)
- response = self._download_json(
- '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
+ response = self._call_api(
+ 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
'Downloading %s playlist JSON' % self._ITEM_TYPE)
entries = []
chunks = response['chunks']
@@ -246,8 +258,8 @@ class TwitchVodIE(TwitchItemBaseIE):
item_id = self._match_id(url)
info = self._download_info(self._ITEM_SHORTCUT, item_id)
- access_token = self._download_json(
- '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+ access_token = self._call_api(
+ 'api/vods/%s/access_token' % item_id, item_id,
'Downloading %s access token' % self._ITEM_TYPE)
formats = self._extract_m3u8_formats(
@@ -275,12 +287,12 @@ class TwitchVodIE(TwitchItemBaseIE):
class TwitchPlaylistBaseIE(TwitchBaseIE):
- _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
+ _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
_PAGE_LIMIT = 100
def _extract_playlist(self, channel_id):
- info = self._download_json(
- '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+ info = self._call_api(
+ 'kraken/channels/%s' % channel_id,
channel_id, 'Downloading channel info JSON')
channel_name = info.get('display_name') or info.get('name')
entries = []
@@ -289,8 +301,8 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
broken_paging_detected = False
counter_override = None
for counter in itertools.count(1):
- response = self._download_json(
- self._PLAYLIST_URL % (channel_id, offset, limit),
+ response = self._call_api(
+ self._PLAYLIST_PATH % (channel_id, offset, limit),
channel_id,
'Downloading %s videos JSON page %s'
% (self._PLAYLIST_TYPE, counter_override or counter))
@@ -345,7 +357,7 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
IE_NAME = 'twitch:past_broadcasts'
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
- _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
+ _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
_PLAYLIST_TYPE = 'past broadcasts'
_TEST = {
@@ -389,8 +401,8 @@ class TwitchStreamIE(TwitchBaseIE):
def _real_extract(self, url):
channel_id = self._match_id(url)
- stream = self._download_json(
- '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
+ stream = self._call_api(
+ 'kraken/streams/%s' % channel_id, channel_id,
'Downloading stream JSON').get('stream')
# Fallback on profile extraction if stream is offline
@@ -405,8 +417,8 @@ class TwitchStreamIE(TwitchBaseIE):
# JSON and fallback to lowercase if it's not available.
channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
- access_token = self._download_json(
- '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
+ access_token = self._call_api(
+ 'api/channels/%s/access_token' % channel_id, channel_id,
'Downloading channel access token')
query = {
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py
index 437eecb67..bd708b42c 100644
--- a/youtube_dl/extractor/zingmp3.py
+++ b/youtube_dl/extractor/zingmp3.py
@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ update_url_query,
+)
class ZingMp3BaseInfoExtractor(InfoExtractor):
- def _extract_item(self, item, fatal=True):
- error_message = item.find('./errormessage').text
+ def _extract_item(self, item, page_type, fatal=True):
+ error_message = item.get('msg')
if error_message:
if not fatal:
return
@@ -18,25 +22,48 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_message),
expected=True)
- title = item.find('./title').text.strip()
- source = item.find('./source').text
- extension = item.attrib['type']
- thumbnail = item.find('./backimage').text
+ formats = []
+ for quality, source_url in zip(item.get('qualities') or item.get('quality', []), item.get('source_list') or item.get('source', [])):
+ if not source_url or source_url == 'require vip':
+ continue
+ if not re.match(r'https?://', source_url):
+ source_url = '//' + source_url
+ source_url = self._proto_relative_url(source_url, 'http:')
+ quality_num = int_or_none(quality)
+ f = {
+ 'format_id': quality,
+ 'url': source_url,
+ }
+ if page_type == 'video':
+ f.update({
+ 'height': quality_num,
+ 'ext': 'mp4',
+ })
+ else:
+ f.update({
+ 'abr': quality_num,
+ 'ext': 'mp3',
+ })
+ formats.append(f)
+
+ cover = item.get('cover')
return {
- 'title': title,
- 'url': source,
- 'ext': extension,
- 'thumbnail': thumbnail,
+ 'title': (item.get('name') or item.get('title')).strip(),
+ 'formats': formats,
+ 'thumbnail': 'http:/' + cover if cover else None,
+ 'artist': item.get('artist'),
}
- def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
- player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
- items = player_xml.findall('./item')
+ def _extract_player_json(self, player_json_url, id, page_type, playlist_title=None):
+ player_json = self._download_json(player_json_url, id, 'Downloading Player JSON')
+ items = player_json['data']
+ if 'item' in items:
+ items = items['item']
if len(items) == 1:
# one single song
- data = self._extract_item(items[0])
+ data = self._extract_item(items[0], page_type)
data['id'] = id
return data
@@ -45,7 +72,7 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
entries = []
for i, item in enumerate(items, 1):
- entry = self._extract_item(item, fatal=False)
+ entry = self._extract_item(item, page_type, fatal=False)
if not entry:
continue
entry['id'] = '%s-%d' % (id, i)
@@ -59,8 +86,8 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
}
-class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
- _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
+class ZingMp3IE(ZingMp3BaseInfoExtractor):
+ _VALID_URL = r'https?://mp3\.zing\.vn/(?:bai-hat|album|playlist|video-clip)/[^/]+/(?P<id>\w+)\.html'
_TESTS = [{
'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'md5': 'ead7ae13693b3205cbc89536a077daed',
@@ -70,51 +97,47 @@ class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
'ext': 'mp3',
'thumbnail': 're:^https?://.*\.jpg$',
},
- }]
- IE_NAME = 'zingmp3:song'
- IE_DESC = 'mp3.zing.vn songs'
-
- def _real_extract(self, url):
- matched = re.match(self._VALID_URL, url)
- slug = matched.group('slug')
- song_id = matched.group('song_id')
-
- webpage = self._download_webpage(
- 'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
-
- player_xml_url = self._search_regex(
- r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
-
- return self._extract_player_xml(player_xml_url, song_id)
-
-
-class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
- _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
- _TESTS = [{
+ }, {
+ 'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html',
+ 'md5': '870295a9cd8045c0e15663565902618d',
+ 'info_dict': {
+ 'id': 'ZW6BAEA0',
+ 'title': 'Let It Go (Frozen OST)',
+ 'ext': 'mp4',
+ },
+ }, {
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
'info_dict': {
'_type': 'playlist',
'id': 'ZWZBWDAF',
- 'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
+ 'title': 'Lâu Đài Tình Ái - Bằng Kiều,Minh Tuyết | Album 320 lossless',
},
'playlist_count': 10,
+ 'skip': 'removed at the request of the owner',
}, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True,
}]
- IE_NAME = 'zingmp3:album'
- IE_DESC = 'mp3.zing.vn albums'
+ IE_NAME = 'zingmp3'
+ IE_DESC = 'mp3.zing.vn'
def _real_extract(self, url):
- matched = re.match(self._VALID_URL, url)
- slug = matched.group('slug')
- album_id = matched.group('album_id')
-
- webpage = self._download_webpage(
- 'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
- player_xml_url = self._search_regex(
- r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
-
- return self._extract_player_xml(
- player_xml_url, album_id,
- playlist_title=self._og_search_title(webpage))
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ player_json_url = self._search_regex([
+ r'data-xml="([^"]+)',
+ r'&amp;xmlURL=([^&]+)&'
+ ], webpage, 'player xml url')
+
+ playlist_title = None
+ page_type = self._search_regex(r'/(?:html5)?xml/([^/-]+)', player_json_url, 'page type')
+ if page_type == 'video':
+ player_json_url = update_url_query(player_json_url, {'format': 'json'})
+ else:
+ player_json_url = player_json_url.replace('/xml/', '/html5xml/')
+ if page_type == 'album':
+ playlist_title = self._og_search_title(webpage)
+
+ return self._extract_player_json(player_json_url, page_id, page_type, playlist_title)