diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/__init__.py | 6 | ||||
-rw-r--r-- | youtube_dl/downloader/http.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/amcnetworks.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/byutv.py | 90 | ||||
-rw-r--r-- | youtube_dl/extractor/clubic.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/criterion.py | 12 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/jwplatform.py | 12 | ||||
-rw-r--r-- | youtube_dl/extractor/periscope.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/rudo.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/twitch.py | 1 | ||||
-rw-r--r-- | youtube_dl/utils.py | 30 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
14 files changed, 129 insertions, 63 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 72141b983..f84b866df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -266,8 +266,6 @@ def _real_main(argv=None): postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', }) - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ @@ -276,6 +274,10 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # XAttrMetadataPP should be run after post-processors that may change file + # contents + if opts.xattrs: + postprocessors.append({'key': 'XAttrMetadata'}) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 11294d106..af405b950 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -182,7 +182,7 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index c739d2c99..d2b03b177 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE): # m3u8 download 'skip_download': True, }, + 'skip': 'Requires TV provider accounts', }, { 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'only_matching': True, diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..4be175d70 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -8,15 +7,15 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' + _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', - 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { - 'id': 'studio-c-season-5-episode-5', + 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'title': 'Season 5 Episode 5', + 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, @@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - episode_json = re.sub( - r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) - ep = json.loads(episode_json) - - if ep['providerType'] == 'Ooyala': - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'title': ep['title'], - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - else: + + ep = self._parse_json( + episode_code, display_id, transform_source=lambda s: + re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) + + if ep['providerType'] != 'Ooyala': raise ExtractorError('Unsupported provider %s' % ep['provider']) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } + + +class BYUtvEventIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)' + _TEST = { + 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'info_dict': { + 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'ext': 'mp4', + 'title': 'Toledo vs. BYU (9/30/16)', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + ooyala_id = self._search_regex( + r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'ooyala id', group='id') + + title = self._search_regex( + r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage, + 'title').strip() + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ooyala_id, + 'id': video_id, + 'title': title, + } diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_page = self._download_webpage(player_url, video_id) - config_json = self._search_regex( + config = self._parse_json(self._search_regex( r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration') - config = json.loads(config_json) + 'configuration'), video_id) video_info = config['videoInfo'] sources = config['sources'] diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..cf6a5d6cb 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le Samouraï', 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) final_url = self._search_regex( - r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') title = self._og_search_title(webpage) description = self._html_search_meta('description', webpage) thumbnail = self._search_regex( - r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', webpage, 'thumbnail url') return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b3b4942..e8928307c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -116,7 +116,10 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE +from .byutv import ( + BYUtvIE, + BYUtvEventIE, +) from .c56 import C56IE from .camdemy import ( CamdemyIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1792c534..489b3c7c1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor): info_dict.update(json_ld) return info_dict + # Look for HTML5 media + entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') + if entries: + for entry in entries: + entry.update({ + 'id': video_id, + 'title': video_title, + }) + self._sort_formats(entry['formats']) + return self.playlist_result(entries) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') def filter_video(urls): return list(filter(check_video, urls)) @@ -2388,9 +2399,6 @@ class GenericIE(InfoExtractor): if m_video_type is not None: found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) if not found: - # HTML5 video - found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) - if not found: REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 38199fcd0..5d56e0a28 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,13 +32,20 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: jwplayer_data = {'playlist': [jwplayer_data]} entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + for video_data in jwplayer_data['playlist']: # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 @@ -57,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor): if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 61043cad5..0e3623024 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): user = list(data_store['UserCache']['users'].values())[0]['user'] user_id = user['id'] - session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] broadcasts = self._call_api( 'getUserBroadcastsPublic', diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 38366b784..9a330c196 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE): transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls') + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') info_dict.update({ 'title': self._og_search_title(webpage), diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc352391e..46c2cfe7b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }] def _real_extract(self, url): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2dfa8013..044520037 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -782,6 +782,7 @@ class XAttrMetadataError(Exception): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code + self.msg = msg # Parsing code and msg if (self.code in (errno.ENOSPC, errno.EDQUOT) or @@ -3161,20 +3162,25 @@ def write_xattr(path, key, value): # try the pyxattr module... import xattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) + if hasattr(xattr, 'set'): # pyxattr + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + setxattr = xattr.set + else: # xattr + setxattr = xattr.setxattr try: - xattr.set(path, key, value) + setxattr(path, key, value) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index af0c2cfc4..161ba4391 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.27' +__version__ = '2016.10.02' |