aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/aftenposten.py103
-rw-r--r--youtube_dl/extractor/aparat.py7
-rw-r--r--youtube_dl/extractor/common.py34
-rw-r--r--youtube_dl/extractor/goshgay.py4
-rw-r--r--youtube_dl/extractor/izlesene.py5
-rw-r--r--youtube_dl/extractor/rtp.py43
-rw-r--r--youtube_dl/extractor/rts.py28
-rw-r--r--youtube_dl/extractor/soulanime.py80
-rw-r--r--youtube_dl/extractor/teamcoco.py7
-rw-r--r--youtube_dl/extractor/tvigle.py22
-rw-r--r--youtube_dl/extractor/tweakers.py58
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py13
-rw-r--r--youtube_dl/version.py2
15 files changed, 269 insertions, 144 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1730df4cd..633e3d8a1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -938,6 +938,9 @@ class YoutubeDL(object):
def has_header(self, h):
return h in self.headers
+ def get_header(self, h, default=None):
+ return self.headers.get(h, default)
+
pr = _PseudoRequest(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.headers.get('Cookie')
@@ -1076,7 +1079,8 @@ class YoutubeDL(object):
else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
- 'format': rf,
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
formats_info[1].get('format_id')),
'width': formats_info[0].get('width'),
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 047f7002a..0d7a120bc 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
+from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py
new file mode 100644
index 000000000..2b257ede7
--- /dev/null
+++ b/youtube_dl/extractor/aftenposten.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class AftenpostenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+
+ _TEST = {
+ 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mov',
+ 'title': 'TRAILER: "Sweatshop" - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'timestamp': 1416927969,
+ 'upload_date': '20141125',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._html_search_regex(
+ r'data-xs-id="(\d+)"', webpage, 'video id')
+
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
index 15006336f..63429780e 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
+ 'age_limit': 0,
},
# 'skip': 'Extremely unreliable',
}
@@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
video_id + '/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
- video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+ video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
+ r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
@@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
thumbnail = self._search_regex(
- r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+ r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
@@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
+ 'age_limit': self._family_friendly_search(webpage),
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 602601b24..2f5ba7aee 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -656,6 +656,21 @@ class InfoExtractor(object):
}
return RATING_TABLE.get(rating.lower(), None)
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObj
+ family_friendly = self._html_search_meta('isFamilyFriendly', html)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower(), None)
+
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
@@ -707,9 +722,9 @@ class InfoExtractor(object):
f.get('quality') if f.get('quality') is not None else -1,
f.get('tbr') if f.get('tbr') is not None else -1,
f.get('vbr') if f.get('vbr') is not None else -1,
- ext_preference,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
+ ext_preference,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('fps') if f.get('fps') is not None else -1,
@@ -765,7 +780,7 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
- def _extract_f4m_formats(self, manifest_url, video_id):
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest')
@@ -778,26 +793,28 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
- manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
+ manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
+ + (media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
- format_id = 'f4m-%d' % (i if tbr is None else tbr)
formats.append({
- 'format_id': format_id,
+ 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
'width': int_or_none(media_el.attrib.get('width')),
'height': int_or_none(media_el.attrib.get('height')),
+ 'preference': preference,
})
self._sort_formats(formats)
return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
- entry_protocol='m3u8', preference=None):
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None):
formats = [{
- 'format_id': 'm3u8-meta',
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
@@ -833,9 +850,8 @@ class InfoExtractor(object):
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
-
f = {
- 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
index b116d251d..1d9166455 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
duration = parse_duration(self._html_search_regex(
r'<span class="duration">\s*-?\s*(.*?)</span>',
webpage, 'duration', fatal=False))
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', webpage, default='false')
flashvars = compat_parse_qs(self._html_search_regex(
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
@@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'duration': duration,
- 'age_limit': 0 if family_friendly == 'true' else 18,
+ 'age_limit': self._family_friendly_search(webpage),
}
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py
index d16d483ee..99a1361f8 100644
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor):
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', webpage, 'age limit', fatal=False)
-
content_url = self._html_search_meta(
'contentURL', webpage, 'content URL', fatal=False)
ext = determine_ext(content_url, 'mp4')
@@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor):
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
- 'age_limit': 18 if family_friendly == 'False' else 0,
+ 'age_limit': self._family_friendly_search(webpage),
'formats': formats,
}
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py
index 7736cabba..ecf4939cd 100644
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -1,16 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
+import re
from .common import InfoExtractor
-from ..utils import js_to_json
class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+ 'md5': 'e736ce0c665e459ddb818546220b4ef8',
'info_dict': {
'id': 'e174042',
'ext': 'mp3',
@@ -18,9 +18,6 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': 're:^https?://.*\.jpg',
},
- 'params': {
- 'skip_download': True, # RTMP download
- },
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
@@ -37,20 +34,48 @@ class RTPIE(InfoExtractor):
player_config = self._search_regex(
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
- config = json.loads(js_to_json(player_config))
+ config = self._parse_json(player_config, video_id)
path, ext = config.get('file').rsplit('.', 1)
formats = [{
+ 'format_id': 'rtmp',
+ 'ext': ext,
+ 'vcodec': config.get('type') == 'audio' and 'none' or None,
+ 'preference': -2,
+ 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
'app': config.get('application'),
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
'page_url': url,
- 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
'rtmp_live': config.get('live', False),
- 'ext': ext,
- 'vcodec': config.get('type') == 'audio' and 'none' or None,
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+ 'rtmp_real_time': True,
}]
+ # Construct regular HTTP download URLs
+ replacements = {
+ 'audio': {
+ 'format_id': 'mp3',
+ 'pattern': r'^nas2\.share/wavrss/',
+ 'repl': 'http://rsspod.rtp.pt/podcasts/',
+ 'vcodec': 'none',
+ },
+ 'video': {
+ 'format_id': 'mp4_h264',
+ 'pattern': r'^nas2\.share/h264/',
+ 'repl': 'http://rsspod.rtp.pt/videocasts/',
+ 'vcodec': 'h264',
+ },
+ }
+ r = replacements[config['type']]
+ if re.match(r['pattern'], config['file']) is not None:
+ formats.append({
+ 'format_id': r['format_id'],
+ 'url': re.sub(r['pattern'], r['repl'], config['file']),
+ 'vcodec': r['vcodec'],
+ })
+
+ self._sort_formats(formats)
+
return {
'id': video_id,
'title': title,
diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py
index 5e84c1098..d0981115d 100644
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -6,12 +6,14 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_str,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
+ xpath_text,
)
@@ -159,11 +161,27 @@ class RTSIE(InfoExtractor):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
- formats = [{
- 'format_id': fid,
- 'url': furl,
- 'tbr': extract_bitrate(furl),
- } for fid, furl in info['streams'].items()]
+ formats = []
+ for format_id, format_url in info['streams'].items():
+ if format_url.endswith('.f4m'):
+ token = self._download_xml(
+ 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
+ video_id, 'Downloading %s token' % format_id)
+ auth_params = xpath_text(token, './/authparams', 'auth params')
+ if not auth_params:
+ continue
+ formats.extend(self._extract_f4m_formats(
+ '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
+ video_id, f4m_id=format_id))
+ elif format_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'tbr': extract_bitrate(format_url),
+ })
if 'media' in info:
formats.extend([{
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
deleted file mode 100644
index feef33e27..000000000
--- a/youtube_dl/extractor/soulanime.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- HEADRequest,
- urlhandle_detect_ext,
-)
-
-
-class SoulAnimeWatchingIE(InfoExtractor):
- IE_NAME = "soulanime:watching"
- IE_DESC = "SoulAnime video"
- _TEST = {
- 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
- 'md5': '05fae04abf72298098b528e98abf4298',
- 'info_dict': {
- 'id': 'seirei-tsukai-no-blade-dance-episode-9',
- 'ext': 'mp4',
- 'title': 'seirei-tsukai-no-blade-dance-episode-9',
- 'description': 'seirei-tsukai-no-blade-dance-episode-9'
- }
- }
- _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- domain = mobj.group('domain')
-
- page = self._download_webpage(url, video_id)
-
- video_url_encoded = self._html_search_regex(
- r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
- video_url = "http://www.soul-anime." + domain + video_url_encoded
-
- ext_req = HEADRequest(video_url)
- ext_handle = self._request_webpage(
- ext_req, video_id, note='Determining extension')
- ext = urlhandle_detect_ext(ext_handle)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': ext,
- 'title': video_id,
- 'description': video_id
- }
-
-
-class SoulAnimeSeriesIE(InfoExtractor):
- IE_NAME = "soulanime:series"
- IE_DESC = "SoulAnime Series"
-
- _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
-
- _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
-
- _TEST = {
- 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
- 'info_dict': {
- 'id': 'black-rock-shooter-tv'
- },
- 'playlist_count': 8
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- series_id = mobj.group('id')
- domain = mobj.group('domain')
-
- pattern = re.compile(self._EPISODE_REGEX)
-
- page = self._download_webpage(url, series_id, "Downloading series page")
- mobj = pattern.findall(page)
-
- entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
-
- return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 18a823719..e85d452a3 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor):
'id': '80187',
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
- 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+ 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'age_limit': 0,
}
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@@ -24,7 +25,8 @@ class TeamcocoIE(InfoExtractor):
'id': '19705',
'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
- "title": "Louis C.K. Interview Pt. 1 11/3/11"
+ "title": "Louis C.K. Interview Pt. 1 11/3/11",
+ 'age_limit': 0,
}
}
]
@@ -83,4 +85,5 @@ class TeamcocoIE(InfoExtractor):
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
+ 'age_limit': self._family_friendly_search(webpage),
}
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py
index ba65996dc..102362b29 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -1,6 +1,8 @@
# encoding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -11,7 +13,7 @@ from ..utils import (
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
- _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
+ _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
_TESTS = [
{
@@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor):
'duration': 186.080,
'age_limit': 0,
},
- },
+ }, {
+ 'url': 'https://cloud.tvigle.ru/video/5267604/',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
- video_id = self._html_search_regex(
- r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ r'<li class="video-preview current_playing" id="(\d+)">',
+ webpage, 'video id')
video_data = self._download_json(
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py
index e332d4694..c80ec15cf 100644
--- a/youtube_dl/extractor/tweakers.py
+++ b/youtube_dl/extractor/tweakers.py
@@ -1,35 +1,65 @@
-# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ xpath_with_ns,
+ int_or_none,
+ float_or_none,
+)
class TweakersIE(InfoExtractor):
- _VALID_URL = r'https?://tweakers\.net/video/(?P<id>[0-9]+).*'
+ _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
_TEST = {
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
- 'md5': 'f7f7f3027166a7f32f024b4ae6571ced',
+ 'md5': '1b5afa817403bb5baa08359dca31e6df',
'info_dict': {
'id': '9926',
'ext': 'mp4',
- 'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter',
+ 'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+ 'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'duration': 386,
}
}
def _real_extract(self, url):
- splitted_url = re.split('.html|/', url)
- del splitted_url[-1] # To remove extra '/' at the end
video_id = self._match_id(url)
- title = splitted_url[5].title() # Retrieve title for URL and capitalize
- splitted_url[3] = splitted_url[3] + '/player' # Add /player to get the player page
- player_url = '/'.join(splitted_url) + '.html'
- player_page = self._download_webpage(player_url, video_id)
+
+ playlist = self._download_xml(
+ 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
+ video_id)
+
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
+
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
+ 1000)
+
+ formats = [{
+ 'url': location.text,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
return {
'id': video_id,
- 'ext': 'mp4',
'title': title,
- 'url': re.findall('http.*mp4', player_page)[0],
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
}
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 4a4422c5a..01d25f760 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -166,14 +166,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
if filecodec is None:
raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
- uses_avconv = self._uses_avconv()
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = 'm4a'
- more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+ more_opts = ['-bsf:a', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible
acodec = 'copy'
@@ -189,9 +188,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
more_opts = []
if self._preferredquality is not None:
if int(self._preferredquality) < 10:
- more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+ more_opts += ['-q:a', self._preferredquality]
else:
- more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+ more_opts += ['-b:a', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -200,13 +199,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
if self._preferredquality is not None:
# The opus codec doesn't support the -aq option
if int(self._preferredquality) < 10 and extension != 'opus':
- more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+ more_opts += ['-q:a', self._preferredquality]
else:
- more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+ more_opts += ['-b:a', self._preferredquality + 'k']
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
if self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+ more_opts += ['-bsf:a', 'aac_adtstoasc']
if self._preferredcodec == 'vorbis':
extension = 'ogg'
if self._preferredcodec == 'wav':
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 36591a43d..1091ae61b 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.02.04'
+__version__ = '2015.02.06'