Merge branch 'master' into rtmpdump

Conflicts: youtube_dl/FileDownloader.py Merge
author: rzhxeo <rzhxeot7z81b4700@mailcatch.com> 2013-11-18 00:27:06 +0100
committer: rzhxeo <rzhxeot7z81b4700@mailcatch.com> 2013-11-18 00:27:06 +0100
commit: 2b35c9ef742bf261078ea10c6c0bba848db1a0df (patch)
tree: fe80c838c7529c8cab6f1b44d730a2849cd68c48 /youtube_dl/extractor/common.py
parent: 4894fe8c5baec8b1f21ac6fdebe08175abc7f094 (diff)
parent: 73c566695fac926e7e9e6922fe4e6d82c64a1850 (diff)
1 files changed, 21 insertions, 6 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ce349fe20..f787d0a3c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -63,7 +63,7 @@ class InfoExtractor(object):
                     * ext       Will be calculated from url if missing
                     * format    A human-readable description of the format
                                 ("mp4 container with h264/opus").
-                                Calculated from the format_id, width, height 
+                                Calculated from the format_id, width, height.
                                 and format_note fields if missing.
                     * format_id A short description of the format
                                 ("mp4_h264_opus" or "19")
@@ -71,6 +71,13 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * height    Height of the video, if known
+                    * abr       Average audio bitrate in KBit/s
+                    * acodec    Name of the audio codec in use
+                    * vbr       Average video bitrate in KBit/s
+                    * vcodec    Name of the video codec in use
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
@@ -312,13 +319,21 @@ class InfoExtractor(object):
 
     # Helper functions for extracting OpenGraph info
     @staticmethod
-    def _og_regex(prop):
-        return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
+    def _og_regexes(prop):
+        content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+        template = r'<meta[^>]+?%s[^>]+?%s'
+        return [
+            template % (property_re, content_re),
+            template % (content_re, property_re),
+        ]
 
     def _og_search_property(self, prop, html, name=None, **kargs):
         if name is None:
             name = 'OpenGraph %s' % prop
-        escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+        escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
+        if escaped is None:
+            return None
         return unescapeHTML(escaped)
 
     def _og_search_thumbnail(self, html, **kargs):
@@ -331,8 +346,8 @@ class InfoExtractor(object):
         return self._og_search_property('title', html, **kargs)
 
     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
-        regexes = [self._og_regex('video')]
-        if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+        regexes = self._og_regexes('video')
+        if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
     def _rta_search(self, html):
author	rzhxeo <rzhxeot7z81b4700@mailcatch.com>	2013-11-18 00:27:06 +0100
committer	rzhxeo <rzhxeot7z81b4700@mailcatch.com>	2013-11-18 00:27:06 +0100
commit	2b35c9ef742bf261078ea10c6c0bba848db1a0df (patch)
tree	fe80c838c7529c8cab6f1b44d730a2849cd68c48 /youtube_dl/extractor/common.py
parent	4894fe8c5baec8b1f21ac6fdebe08175abc7f094 (diff)
parent	73c566695fac926e7e9e6922fe4e6d82c64a1850 (diff)