aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-07-28 15:25:56 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-07-28 15:32:38 +0200
commit31bb8d3f5142f68c9da389caf9fa2356464cc35e (patch)
treee85f304a6387d3bd856c66534763b58e0369f52b
parent4958ae205873980189793885824418533cd27041 (diff)
downloadyoutube-dl-31bb8d3f5142f68c9da389caf9fa2356464cc35e.tar.xz
[bloomberg] Extract the available formats (closes #2776)
It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.
-rw-r--r--youtube_dl/downloader/f4m.py11
-rw-r--r--youtube_dl/extractor/bloomberg.py5
-rw-r--r--youtube_dl/extractor/common.py17
3 files changed, 28 insertions, 5 deletions
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index e6be6ae6c..71353f607 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
+ requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
- formats = sorted(formats, key=lambda f: f[0])
- rate, media = formats[-1]
+ if requested_bitrate is None:
+ # get the best format
+ formats = sorted(formats, key=lambda f: f[0])
+ rate, media = formats[-1]
+ else:
+ rate, media = list(filter(
+ lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 25fb79e14..c51a97ce4 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
- 'md5': '7bf08858ff7c203c870e8a6190e221e5',
+ # The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
return {
'id': name.split('-')[-1],
'title': title,
- 'url': f4m_url,
- 'ext': 'flv',
+ 'formats': self._extract_f4m_formats(f4m_url, name),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 52c00186e..59030e127 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
+ int_or_none,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
@@ -590,6 +591,22 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
+ def _extract_f4m_formats(self, manifest_url, video_id):
+ manifest = self._download_xml(manifest_url, video_id)
+
+ formats = []
+ for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+ formats.append({
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'tbr': int_or_none(media_el.attrib.get('bitrate')),
+ 'width': int_or_none(media_el.attrib.get('width')),
+ 'height': int_or_none(media_el.attrib.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return formats
+
class SearchInfoExtractor(InfoExtractor):
"""