aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md6
-rw-r--r--ChangeLog20
-rw-r--r--docs/supportedsites.md2
-rw-r--r--youtube_dl/__init__.py6
-rw-r--r--youtube_dl/downloader/http.py2
-rw-r--r--youtube_dl/extractor/amcnetworks.py1
-rw-r--r--youtube_dl/extractor/byutv.py90
-rw-r--r--youtube_dl/extractor/clubic.py11
-rw-r--r--youtube_dl/extractor/criterion.py12
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/generic.py16
-rw-r--r--youtube_dl/extractor/jwplatform.py12
-rw-r--r--youtube_dl/extractor/periscope.py2
-rw-r--r--youtube_dl/extractor/rudo.py2
-rw-r--r--youtube_dl/extractor/twitch.py1
-rw-r--r--youtube_dl/utils.py30
-rw-r--r--youtube_dl/version.py2
17 files changed, 152 insertions, 68 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 273eb8c0b..e813e4c59 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.09.27
+[debug] youtube-dl version 2016.10.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/ChangeLog b/ChangeLog
index efc3e494e..4f64edabb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,26 @@
-version <unreleased>
+version 2016.10.02
+
+Core
+* Fix possibly lost extended attributes during post-processing
++ Support pyxattr as well as python-xattr for --xattrs and
+ --xattr-set-filesize (#9054)
Extractors
++ [jwplatform] Support DASH streams in JWPlayer
++ [jwplatform] Support old-style JWPlayer playlists
++ [byutv:event] Add extractor
+* [periscope:user] Fix extraction (#10820)
* [dctp] Fix extraction (#10734)
++ [instagram] Extract video dimensions (#10790)
++ [tvland] Extend URL regular expression (#10812)
++ [vgtv] Add support for tv.aftonbladet.se (#10800)
+- [aftonbladet] Remove extractor
+* [vk] Fix timestamp and view count extraction (#10760)
++ [vk] Add support for running and finished live streams (#10799)
+ [leeco] Recognize more Le Sports URLs (#10794)
++ [instagram] Extract comments (#10788)
++ [ketnet] Extract mzsource formats (#10770)
+* [limelight:media] Improve HTTP formats extraction
version 2016.09.27
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 26f275577..828ed0ba9 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -34,7 +34,6 @@
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **AfreecaTV**: afreecatv.com
- - **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
- **Allocine**
@@ -112,6 +111,7 @@
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed**
- **BYUtv**
+ - **BYUtvEvent**
- **Camdemy**
- **CamdemyFolder**
- **CamWithHer**
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 72141b983..f84b866df 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -266,8 +266,6 @@ def _real_main(argv=None):
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
})
- if opts.xattrs:
- postprocessors.append({'key': 'XAttrMetadata'})
if opts.embedthumbnail:
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({
@@ -276,6 +274,10 @@ def _real_main(argv=None):
})
if not already_have_thumbnail:
opts.writethumbnail = True
+ # XAttrMetadataPP should be run after post-processors that may change file
+ # contents
+ if opts.xattrs:
+ postprocessors.append({'key': 'XAttrMetadata'})
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 11294d106..af405b950 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -182,7 +182,7 @@ class HttpFD(FileDownloader):
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
- write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+ write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py
index c739d2c99..d2b03b177 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dl/extractor/amcnetworks.py
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'Requires TV provider accounts',
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 3aec601f8..4be175d70 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
- 'md5': '05850eb8c749e2ee05ad5a1c34668493',
'info_dict': {
- 'id': 'studio-c-season-5-episode-5',
+ 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'display_id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
- 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'title': 'Season 5 Episode 5',
+ 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 1486.486,
},
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
'skip_download': True,
},
'add_ie': ['Ooyala'],
- }
+ }, {
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, display_id)
episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
- episode_json = re.sub(
- r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
- ep = json.loads(episode_json)
-
- if ep['providerType'] == 'Ooyala':
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % ep['providerId'],
- 'id': video_id,
- 'title': ep['title'],
- 'description': ep.get('description'),
- 'thumbnail': ep.get('imageThumbnail'),
- }
- else:
+
+ ep = self._parse_json(
+ episode_code, display_id, transform_source=lambda s:
+ re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
+
+ if ep['providerType'] != 'Ooyala':
raise ExtractorError('Unsupported provider %s' % ep['provider'])
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ep['providerId'],
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': ep['title'],
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ }
+
+
+class BYUtvEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
+ _TEST = {
+ 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+ 'info_dict': {
+ 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+ 'ext': 'mp4',
+ 'title': 'Toledo vs. BYU (9/30/16)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ ooyala_id = self._search_regex(
+ r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'ooyala id', group='id')
+
+ title = self._search_regex(
+ r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
+ 'title').strip()
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ooyala_id,
+ 'id': video_id,
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index 2fba93543..f7ee3a8f8 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -1,9 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id)
- config_json = self._search_regex(
+ config = self._parse_json(self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
- 'configuration')
- config = json.loads(config_json)
+ 'configuration'), video_id)
video_info = config['videoInfo']
sources = config['sources']
diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
index ad32673a8..cf6a5d6cb 100644
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
'ext': 'mp4',
'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
final_url = self._search_regex(
- r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+ r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex(
- r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+ r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')
return {
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 09b3b4942..e8928307c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -116,7 +116,10 @@ from .brightcove import (
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
+from .byutv import (
+ BYUtvIE,
+ BYUtvEventIE,
+)
from .c56 import C56IE
from .camdemy import (
CamdemyIE,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c1792c534..489b3c7c1 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor):
info_dict.update(json_ld)
return info_dict
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ for entry in entries:
+ entry.update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries)
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
def filter_video(urls):
return list(filter(check_video, urls))
@@ -2388,9 +2399,6 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
- # HTML5 video
- found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
- if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index 38199fcd0..5d56e0a28 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -32,13 +32,20 @@ class JWPlatformBaseIE(InfoExtractor):
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
- def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
jwplayer_data = {'playlist': [jwplayer_data]}
entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
for video_data in jwplayer_data['playlist']:
# JWPlayer backward compatibility: flattened sources
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
@@ -57,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
if source_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, this_video_id, mpd_id=mpd_id, fatal=False))
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
formats.append({
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 61043cad5..0e3623024 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):
user = list(data_store['UserCache']['users'].values())[0]['user']
user_id = user['id']
- session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id']
+ session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
broadcasts = self._call_api(
'getUserBroadcastsPublic',
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 38366b784..9a330c196 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE):
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
info_dict = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, m3u8_id='hls')
+ jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
info_dict.update({
'title': self._og_search_title(webpage),
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bc352391e..46c2cfe7b 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'HTTP Error 404: Not Found',
}]
def _real_extract(self, url):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d2dfa8013..044520037 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -782,6 +782,7 @@ class XAttrMetadataError(Exception):
def __init__(self, code=None, msg='Unknown error'):
super(XAttrMetadataError, self).__init__(msg)
self.code = code
+ self.msg = msg
# Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) or
@@ -3161,20 +3162,25 @@ def write_xattr(path, key, value):
# try the pyxattr module...
import xattr
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- # TODO: fallback to CLI tools
- raise XAttrUnavailableError(
- 'python-pyxattr is detected but is too old. '
- 'youtube-dl requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
try:
- xattr.set(path, key, value)
+ setxattr(path, key, value)
except EnvironmentError as e:
raise XAttrMetadataError(e.errno, e.strerror)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index af0c2cfc4..161ba4391 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.09.27'
+__version__ = '2016.10.02'