aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--test/test_utils.py2
-rw-r--r--youtube_dl/__init__.py6
-rw-r--r--youtube_dl/downloader/http.py8
-rw-r--r--youtube_dl/extractor/aftonbladet.py64
-rw-r--r--youtube_dl/extractor/clubic.py11
-rw-r--r--youtube_dl/extractor/dctp.py67
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/instagram.py23
-rw-r--r--youtube_dl/extractor/ketnet.py26
-rw-r--r--youtube_dl/extractor/leeco.py8
-rw-r--r--youtube_dl/extractor/limelight.py2
-rw-r--r--youtube_dl/extractor/tvland.py5
-rw-r--r--youtube_dl/extractor/twitch.py1
-rw-r--r--youtube_dl/extractor/vgtv.py13
-rw-r--r--youtube_dl/extractor/vk.py88
-rw-r--r--youtube_dl/postprocessor/xattrpp.py114
-rw-r--r--youtube_dl/utils.py101
18 files changed, 285 insertions, 264 deletions
diff --git a/ChangeLog b/ChangeLog
index f8149cc30..efc3e494e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,11 @@
-vesion 2016.09.27
+version <unreleased>
+
+Extractors
+* [dctp] Fix extraction (#10734)
++ [leeco] Recognize more Le Sports URLs (#10794)
+
+
+version 2016.09.27
Core
+ Add hdcore query parameter to akamai f4m formats
diff --git a/test/test_utils.py b/test/test_utils.py
index 9789d8611..b1b2effca 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+ self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+ self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 1cf3140a0..72141b983 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -283,12 +283,6 @@ def _real_main(argv=None):
'key': 'ExecAfterDownload',
'exec_cmd': opts.exec_cmd,
})
- if opts.xattr_set_filesize:
- try:
- import xattr
- xattr # Confuse flake8
- except ImportError:
- parser.error('setting filesize xattr requested but python-xattr is not available')
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index f8b69d186..11294d106 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -13,6 +13,9 @@ from ..utils import (
encodeFilename,
sanitize_open,
sanitized_Request,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
)
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
- import xattr
- xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
- except(OSError, IOError, ImportError) as err:
+ write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
deleted file mode 100644
index 5766b4fe8..000000000
--- a/youtube_dl/extractor/aftonbladet.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class AftonbladetIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
- 'info_dict': {
- 'id': '36015',
- 'ext': 'mp4',
- 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
- 'description': 'Jupiters måne mest aktiv av alla himlakroppar',
- 'timestamp': 1394142732,
- 'upload_date': '20140306',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- # find internal video meta data
- meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
- player_config = self._parse_json(self._html_search_regex(
- r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
- internal_meta_id = player_config['aptomaVideoId']
- internal_meta_url = meta_url % internal_meta_id
- internal_meta_json = self._download_json(
- internal_meta_url, video_id, 'Downloading video meta data')
-
- # find internal video formats
- format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
- internal_video_id = internal_meta_json['videoId']
- internal_formats_url = format_url % internal_video_id
- internal_formats_json = self._download_json(
- internal_formats_url, video_id, 'Downloading video formats')
-
- formats = []
- for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
- p = fmt['paths'][0]
- formats.append({
- 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
- 'ext': 'mp4',
- 'width': int_or_none(fmt.get('width')),
- 'height': int_or_none(fmt.get('height')),
- 'tbr': int_or_none(fmt.get('bitrate')),
- 'protocol': 'http',
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': internal_meta_json['title'],
- 'formats': formats,
- 'thumbnail': internal_meta_json.get('imageUrl'),
- 'description': internal_meta_json.get('shortPreamble'),
- 'timestamp': int_or_none(internal_meta_json.get('timePublished')),
- 'duration': int_or_none(internal_meta_json.get('duration')),
- 'view_count': int_or_none(internal_meta_json.get('views')),
- }
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index 2fba93543..f7ee3a8f8 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -1,9 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id)
- config_json = self._search_regex(
+ config = self._parse_json(self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
- 'configuration')
- config = json.loads(config_json)
+ 'configuration'), video_id)
video_info = config['videoInfo']
sources = config['sources']
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index a47e04993..14ba88715 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
- 'id': '1324',
+ 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'flv',
- 'title': 'Videoinstallation für eine Kaufhausfassade'
+ 'ext': 'mp4',
+ 'title': 'Videoinstallation für eine Kaufhausfassade',
+ 'description': 'Kurzfilm',
+ 'upload_date': '20110407',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}
def _real_extract(self, url):
video_id = self._match_id(url)
- base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
- version_json = self._download_json(
- base_url + 'version.json',
- video_id, note='Determining file version')
- version = version_json['version_name']
- info_json = self._download_json(
- '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
- video_id, note='Fetching object ID')
- object_id = compat_str(info_json['object_id'])
- meta_json = self._download_json(
- '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
- video_id, note='Downloading metadata')
- uuid = meta_json['uuid']
- title = meta_json['title']
- wide = meta_json['is_wide']
- if wide:
- ratio = '16x9'
- else:
- ratio = '4x3'
- play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+ webpage = self._download_webpage(url, video_id)
+
+ object_id = self._html_search_meta('DC.identifier', webpage)
servers_json = self._download_json(
- 'http://www.dctp.tv/streaming_servers/',
+ 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
- url = servers_json[0]['endpoint']
+ server = servers_json[0]['server']
+ m3u8_path = self._search_regex(
+ r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+ formats = self._extract_m3u8_formats(
+ 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+ entry_protocol='m3u8_native')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_meta('DC.description', webpage)
+ upload_date = unified_strdate(
+ self._html_search_meta('DC.date.created', webpage))
+ thumbnail = self._og_search_thumbnail(webpage)
return {
'id': object_id,
'title': title,
- 'format': 'rtmp',
- 'url': url,
- 'play_path': play_path,
- 'rtmp_real_time': True,
- 'ext': 'flv',
- 'display_id': video_id
+ 'formats': formats,
+ 'display_id': video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 23fd2a308..09b3b4942 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -31,7 +31,6 @@ from .aenetworks import (
HistoryTopicIE,
)
from .afreecatv import AfreecaTVIE
-from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 8f7f232be..196407b06 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
'uploader': 'Naomi Leonor Phan-Quang',
'like_count': int,
'comment_count': int,
+ 'comments': list,
},
}, {
# missing description
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
'uploader': 'Britney Spears',
'like_count': int,
'comment_count': int,
+ 'comments': list,
},
'params': {
'skip_download': True,
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
(video_url, description, thumbnail, timestamp, uploader,
- uploader_id, like_count, comment_count) = [None] * 8
+ uploader_id, like_count, comment_count, height, width) = [None] * 10
shared_data = self._parse_json(
self._search_regex(
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
if media:
video_url = media.get('video_url')
+ height = int_or_none(media.get('dimensions', {}).get('height'))
+ width = int_or_none(media.get('dimensions', {}).get('width'))
description = media.get('caption')
thumbnail = media.get('display_src')
timestamp = int_or_none(media.get('date'))
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
uploader_id = media.get('owner', {}).get('username')
like_count = int_or_none(media.get('likes', {}).get('count'))
comment_count = int_or_none(media.get('comments', {}).get('count'))
+ comments = [{
+ 'author': comment.get('user', {}).get('username'),
+ 'author_id': comment.get('user', {}).get('id'),
+ 'id': comment.get('id'),
+ 'text': comment.get('text'),
+ 'timestamp': int_or_none(comment.get('created_at')),
+ } for comment in media.get(
+ 'comments', {}).get('nodes', []) if comment.get('text')]
if not video_url:
video_url = self._og_search_video_url(webpage, secure=False)
+ formats = [{
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ }]
+
if not uploader_id:
uploader_id = self._search_regex(
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
return {
'id': video_id,
- 'url': video_url,
+ 'formats': formats,
'ext': 'mp4',
'title': 'Video by %s' % uploader_id,
'description': description,
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
'uploader': uploader,
'like_count': like_count,
'comment_count': comment_count,
+ 'comments': comments,
}
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py
index aaf3f807a..eb0a16008 100644
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
}, {
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
'only_matching': True,
+ }, {
+ # mzsource, geo restricted to Belgium
+ 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
title = config['title']
- formats = self._extract_m3u8_formats(
- config['source']['hls'], video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
+ formats = []
+ for source_key in ('', 'mz'):
+ source = config.get('%ssource' % source_key)
+ if not isinstance(source, dict):
+ continue
+ for format_id, format_url in source.items():
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False))
+ elif format_id == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
index e9cc9aa59..c48a5aad1 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -29,7 +29,7 @@ from ..utils import (
class LeIE(InfoExtractor):
IE_DESC = '乐视网'
- _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
}, {
'url': 'http://sports.le.com/video/25737697.html',
'only_matching': True,
+ }, {
+ 'url': 'http://www.lesports.com/match/1023203003.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sports.le.com/match/1023203003.html',
+ 'only_matching': True,
}]
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index 6752ffee2..b7bfa7a6d 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor):
format_id = 'rtmp'
if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate'])
- http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
+ http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
urls.append(http_url)
http_fmt = fmt.copy()
http_fmt.update({
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py
index cb76a2a58..957cf1ea2 100644
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dl/extractor/tvland.py
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
class TVLandIE(MTVServicesInfoExtractor):
IE_NAME = 'tvland.com'
- _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+ _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
_TESTS = [{
# Geo-restricted. Without a proxy metadata are still there. With a
@@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor):
'upload_date': '20151228',
'timestamp': 1451289600,
},
+ }, {
+ 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+ 'only_matching': True,
}]
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bc352391e..46c2cfe7b 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'HTTP Error 404: Not Found',
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 185756301..3b38ac700 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -22,6 +22,7 @@ class VGTVIE(XstreamIE):
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
'ap.vgtv.no/webtv': 'aptv',
+ 'tv.aftonbladet.se/abtv': 'abtv',
}
_APP_NAME_TO_VENDOR = {
@@ -30,6 +31,7 @@ class VGTVIE(XstreamIE):
'satv': 'sa',
'fvntv': 'fvn',
'aptv': 'ap',
+ 'abtv': 'ab',
}
_VALID_URL = r'''(?x)
@@ -40,7 +42,8 @@ class VGTVIE(XstreamIE):
/?
(?:
\#!/(?:video|live)/|
- embed?.*id=
+ embed?.*id=|
+ articles/
)|
(?P<appname>
%s
@@ -135,6 +138,14 @@ class VGTVIE(XstreamIE):
'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
'only_matching': True,
},
+ {
+ 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'abtv:140026',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index f26e0732c..58799d413 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -20,7 +20,7 @@ from ..utils import (
remove_start,
str_to_int,
unescapeHTML,
- unified_strdate,
+ unified_timestamp,
urlencode_postdata,
)
from .dailymotion import DailymotionIE
@@ -106,6 +106,7 @@ class VKIE(VKBaseIE):
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
+ 'timestamp': 1329060660,
'upload_date': '20120212',
'view_count': int,
},
@@ -119,6 +120,7 @@ class VKIE(VKBaseIE):
'uploader': 'Tom Cruise',
'title': 'No name',
'duration': 9,
+ 'timestamp': 1374374880,
'upload_date': '20130721',
'view_count': int,
}
@@ -195,6 +197,7 @@ class VKIE(VKBaseIE):
'upload_date': '20150709',
'view_count': int,
},
+ 'skip': 'Removed',
},
{
# youtube embed
@@ -226,7 +229,7 @@ class VKIE(VKBaseIE):
},
'params': {
'skip_download': True,
- }
+ },
},
{
# video key is extra_data not url\d+
@@ -237,11 +240,31 @@ class VKIE(VKBaseIE):
'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля',
+ 'timestamp': 1454870100,
'upload_date': '20160207',
'view_count': int,
},
},
{
+ # finished live stream, live_mp4
+ 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+ 'md5': '90d22d051fccbbe9becfccc615be6791',
+ 'info_dict': {
+ 'id': '456242764',
+ 'ext': 'mp4',
+ 'title': 'ИгроМир 2016 — день 1',
+ 'uploader': 'Игромания',
+ 'duration': 5239,
+ 'view_count': int,
+ },
+ },
+ {
+ # live stream, hls and rtmp links,most likely already finished live
+ # stream by the time you are reading this comment
+ 'url': 'https://vk.com/video-140332_456239111',
+ 'only_matching': True,
+ },
+ {
# removed video, just testing that we match the pattern
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'only_matching': True,
@@ -349,42 +372,51 @@ class VKIE(VKBaseIE):
data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
data = json.loads(data_json)
- # Extract upload date
- upload_date = None
- mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
- if mobj is not None:
- mobj.group(1) + ' ' + mobj.group(2)
- upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
-
- view_count = None
- views = self._html_search_regex(
- r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
- info_page, 'view count', default=None)
- if views:
- view_count = str_to_int(self._search_regex(
- r'([\d,.]+)', views, 'view count', fatal=False))
+ title = unescapeHTML(data['md_title'])
+
+ if data.get('live') == 2:
+ title = self._live_title(title)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+ 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+ info_page, 'view count', fatal=False))
formats = []
- for k, v in data.items():
- if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+ for format_id, format_url in data.items():
+ if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
continue
- height = int_or_none(self._search_regex(
- r'^(?:url|cache)(\d+)', k, 'height', default=None))
- formats.append({
- 'format_id': k,
- 'url': v,
- 'height': height,
- })
+ if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+ height = int_or_none(self._search_regex(
+ r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': height,
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id,
+ fatal=False, live=True))
+ elif format_id == 'rtmp':
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': 'flv',
+ })
self._sort_formats(formats)
return {
- 'id': compat_str(data['vid']),
+ 'id': compat_str(data.get('vid') or video_id),
'formats': formats,
- 'title': unescapeHTML(data['md_title']),
+ 'title': title,
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
'duration': data.get('duration'),
- 'upload_date': upload_date,
+ 'timestamp': timestamp,
'view_count': view_count,
}
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py
index e39ca60aa..fbdfa02ac 100644
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -1,37 +1,15 @@
from __future__ import unicode_literals
-import os
-import subprocess
-import sys
-import errno
-
from .common import PostProcessor
from ..compat import compat_os_name
from ..utils import (
- check_executable,
hyphenate_date,
- version_tuple,
- PostProcessingError,
- encodeArgument,
- encodeFilename,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
)
-class XAttrMetadataError(PostProcessingError):
- def __init__(self, code=None, msg='Unknown error'):
- super(XAttrMetadataError, self).__init__(msg)
- self.code = code
-
- # Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT) or
- 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
- self.reason = 'NO_SPACE'
- elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
- self.reason = 'VALUE_TOO_LONG'
- else:
- self.reason = 'NOT_SUPPORTED'
-
-
class XAttrMetadataPP(PostProcessor):
#
@@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor):
def run(self, info):
""" Set extended attributes on downloaded file (if xattr support is found). """
- # This mess below finds the best xattr tool for the job and creates a
- # "write_xattr" function.
- try:
- # try the pyxattr module...
- import xattr
-
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- self._downloader.report_warning(
- 'python-pyxattr is detected but is too old. '
- 'youtube-dl requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
-
- raise ImportError
-
- def write_xattr(path, key, value):
- try:
- xattr.set(path, key, value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
-
- except ImportError:
- if compat_os_name == 'nt':
- # Write xattrs to NTFS Alternate Data Streams:
- # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
- def write_xattr(path, key, value):
- assert ':' not in key
- assert os.path.exists(path)
-
- ads_fn = path + ':' + key
- try:
- with open(ads_fn, 'wb') as f:
- f.write(value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- else:
- user_has_setfattr = check_executable('setfattr', ['--version'])
- user_has_xattr = check_executable('xattr', ['-h'])
-
- if user_has_setfattr or user_has_xattr:
-
- def write_xattr(path, key, value):
- value = value.decode('utf-8')
- if user_has_setfattr:
- executable = 'setfattr'
- opts = ['-n', key, '-v', value]
- elif user_has_xattr:
- executable = 'xattr'
- opts = ['-w', key, value]
-
- cmd = ([encodeFilename(executable, True)] +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(path, True)])
-
- try:
- p = subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate()
- stderr = stderr.decode('utf-8', 'replace')
- if p.returncode != 0:
- raise XAttrMetadataError(p.returncode, stderr)
-
- else:
- # On Unix, and can't find pyxattr, setfattr, or xattr.
- if sys.platform.startswith('linux'):
- self._downloader.report_error(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'pyxattr' or 'xattr' "
- "modules, or the GNU 'attr' package "
- "(which contains the 'setfattr' tool).")
- else:
- self._downloader.report_error(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'xattr' module, "
- "or the 'xattr' binary.")
-
# Write the metadata to the file's xattrs
self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
@@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor):
return [], info
+ except XAttrUnavailableError as e:
+ self._downloader.report_error(str(e))
+ return [], info
+
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
self._downloader.report_warning(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 69ca88c85..d2dfa8013 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,6 +42,7 @@ from .compat import (
compat_html_entities_html5,
compat_http_client,
compat_kwargs,
+ compat_os_name,
compat_parse_qs,
compat_shlex_quote,
compat_socket_create_connection,
@@ -141,6 +142,8 @@ DATE_FORMATS = (
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -775,6 +778,25 @@ class ContentTooShortError(Exception):
self.expected = expected
+class XAttrMetadataError(Exception):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+ 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+ pass
+
+
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -3131,3 +3153,82 @@ def decode_png(png_data):
current_row.append(color)
return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ try:
+ xattr.set(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)] +
+ [encodeArgument(o) for o in opts] +
+ [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")