aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-09-22 12:53:41 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-09-22 12:53:41 +0200
commit45c85d7ba1dbca09c7ded9130fa5670b302e099b (patch)
tree8352b3afcdb8f4599257533216c52b678b0b0d4a
parentdf8f53f752c0f01577dcc5d63c6d9a81d924770b (diff)
parentd0df92928bc099775e18f6413e387713839012ba (diff)
Merge remote-tracking branch 'origin/master'
-rw-r--r--youtube_dl/downloader/f4m.py5
-rw-r--r--youtube_dl/extractor/__init__.py5
-rw-r--r--youtube_dl/extractor/nbc.py4
-rw-r--r--youtube_dl/extractor/npo.py30
-rw-r--r--youtube_dl/extractor/sbs.py2
-rw-r--r--youtube_dl/extractor/theplatform.py53
6 files changed, 73 insertions, 26 deletions
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 71353f607..b3be16ff1 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -16,6 +16,7 @@ from ..utils import (
format_bytes,
encodeFilename,
sanitize_open,
+ xpath_text,
)
@@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
# We only download the first fragment
fragments_list = fragments_list[:1]
total_frags = len(fragments_list)
+ # For some akamai manifests we'll need to add a query to the fragment url
+ akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
for (seg_i, frag_i) in fragments_list:
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
+ if akamai_pv:
+ url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 8a5eb8cf1..244d22297 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
-from .npo import NPOIE
+from .npo import (
+ NPOIE,
+ TegenlichtVproIE,
+)
from .nrk import (
NRKIE,
NRKTVIE,
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index d2e4acbad..e75ab7c39 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
_TEST = {
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
- 'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+ # md5 checksum is not stable
'info_dict': {
- 'id': 'u1RInQZRN7QJ',
+ 'id': 'bTmnLCvIbaaH',
'ext': 'flv',
'title': 'I Am a Firefighter',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 7a154e94a..f36d446d2 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -7,6 +7,7 @@ from ..utils import (
unified_strdate,
parse_duration,
qualities,
+ url_basename,
)
@@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ return self._get_info(video_id)
+ def _get_info(self, video_id):
metadata = self._download_json(
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id,
@@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
'duration': parse_duration(metadata.get('tijdsduur')),
'formats': formats,
}
+
+
+class TegenlichtVproIE(NPOIE):
+ IE_NAME = 'tegenlicht.vpro.nl'
+ _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+
+ _TESTS = [
+ {
+ 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+ 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'm4v',
+ 'title': 'Tegenlicht',
+ 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'upload_date': '20130225',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ name = url_basename(url)
+ webpage = self._download_webpage(url, name)
+ urn = self._html_search_meta('mediaurn', webpage)
+ info_page = self._download_json(
+ 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
+ return self._get_info(info_page['mid'])
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index 34058fd4b..214990e7a 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
'info_dict': {
'id': '320403011771',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Dingo Conservation',
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
'thumbnail': 're:http://.*\.jpg',
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index b6b2dba9c..0be793b1c 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -5,6 +5,7 @@ import json
from .common import InfoExtractor
from ..utils import (
+ compat_str,
ExtractorError,
xpath_with_ns,
)
@@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq//smil:video'))
- if f4m_node is not None:
+ if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
f4m_url += '?'
# the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
- formats = [{
- 'ext': 'flv',
- 'url': f4m_url,
- }]
+ formats = self._extract_f4m_formats(f4m_url, video_id)
else:
- base_url = head.find(_x('smil:meta')).attrib['base']
- switch = body.find(_x('smil:switch'))
formats = []
- for f in switch.findall(_x('smil:video')):
- attr = f.attrib
- width = int(attr['width'])
- height = int(attr['height'])
- vbr = int(attr['system-bitrate']) // 1000
- format_id = '%dx%d_%dk' % (width, height, vbr)
- formats.append({
- 'format_id': format_id,
- 'url': base_url,
- 'play_path': 'mp4:' + attr['src'],
- 'ext': 'flv',
- 'width': width,
- 'height': height,
- 'vbr': vbr,
- })
+ switch = body.find(_x('smil:switch'))
+ if switch is not None:
+ base_url = head.find(_x('smil:meta')).attrib['base']
+ for f in switch.findall(_x('smil:video')):
+ attr = f.attrib
+ width = int(attr['width'])
+ height = int(attr['height'])
+ vbr = int(attr['system-bitrate']) // 1000
+ format_id = '%dx%d_%dk' % (width, height, vbr)
+ formats.append({
+ 'format_id': format_id,
+ 'url': base_url,
+ 'play_path': 'mp4:' + attr['src'],
+ 'ext': 'flv',
+ 'width': width,
+ 'height': height,
+ 'vbr': vbr,
+ })
+ else:
+ switch = body.find(_x('smil:seq//smil:switch'))
+ for f in switch.findall(_x('smil:video')):
+ attr = f.attrib
+ vbr = int(attr['system-bitrate']) // 1000
+ formats.append({
+ 'format_id': compat_str(vbr),
+ 'url': attr['src'],
+ 'vbr': vbr,
+ })
self._sort_formats(formats)
return {