[bloomberg] Extract the available formats (closes #2776)

It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.
author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2014-07-28 15:25:56 +0200
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2014-07-28 15:32:38 +0200
commit: 31bb8d3f5142f68c9da389caf9fa2356464cc35e (patch)
tree: e85f304a6387d3bd856c66534763b58e0369f52b /youtube_dl/extractor
parent: 4958ae205873980189793885824418533cd27041 (diff)
2 files changed, 19 insertions, 3 deletions
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 25fb79e14..c51a97ce4 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        # The md5 checksum changes
         'info_dict': {
             'id': 'qurhIVlJSB6hzkVi229d8g',
             'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
         return {
             'id': name.split('-')[-1],
             'title': title,
-            'url': f4m_url,
-            'ext': 'flv',
+            'formats': self._extract_f4m_formats(f4m_url, name),
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
         }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 52c00186e..59030e127 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     ExtractorError,
+    int_or_none,
     RegexNotFoundError,
     sanitize_filename,
     unescapeHTML,
@@ -590,6 +591,22 @@ class InfoExtractor(object):
         self.to_screen(msg)
         time.sleep(timeout)
 
+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(manifest_url, video_id)
+
+        formats = []
+        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+            formats.append({
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2014-07-28 15:25:56 +0200
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2014-07-28 15:32:38 +0200
commit	31bb8d3f5142f68c9da389caf9fa2356464cc35e (patch)
tree	e85f304a6387d3bd856c66534763b58e0369f52b /youtube_dl/extractor
parent	4958ae205873980189793885824418533cd27041 (diff)