aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-10-15 12:05:13 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-10-15 12:05:13 +0200
commit9d4660cab15f374176f87d3f747a559142e4af9b (patch)
tree6b3fe1b19cc7ca4c8123a0e0dcd80508105f5028
parentcd054fc491198a5a7c69d76f19693b1cd4d5c086 (diff)
downloadyoutube-dl-9d4660cab15f374176f87d3f747a559142e4af9b.tar.xz
[generic] Support embedded vimeo videos (#1602)
-rw-r--r--test/test_utils.py16
-rw-r--r--youtube_dl/extractor/generic.py21
-rw-r--r--youtube_dl/extractor/vimeo.py11
-rw-r--r--youtube_dl/utils.py17
4 files changed, 63 insertions, 2 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 270669044..f3fbff042 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# coding: utf-8
# Allow direct execution
import os
@@ -21,6 +22,8 @@ from youtube_dl.utils import (
find_xpath_attr,
get_meta_content,
xpath_with_ns,
+ smuggle_url,
+ unsmuggle_url,
)
if sys.version_info < (3, 0):
@@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find('media:song/media:author').text, u'The Author')
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+ def test_smuggle_url(self):
+ data = {u"ö": u"ö", u"abc": [3]}
+ url = 'https://foo.bar/baz?x=y#a'
+ smug_url = smuggle_url(url, data)
+ unsmug_url, unsmug_data = unsmuggle_url(smug_url)
+ self.assertEqual(url, unsmug_url)
+ self.assertEqual(data, unsmug_data)
+
+ res_url, res_data = unsmuggle_url(url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, None)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index d48c84f8d..89805250c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -11,6 +11,8 @@ from ..utils import (
compat_urlparse,
ExtractorError,
+ smuggle_url,
+ unescapeHTML,
)
from .brightcove import BrightcoveIE
@@ -29,6 +31,17 @@ class GenericIE(InfoExtractor):
u"title": u"R\u00e9gis plante sa Jeep"
}
},
+ # embedded vimeo video
+ {
+ u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
+ u'file': u'22444065.mp4',
+ u'md5': u'2903896e23df39722c33f015af0666e2',
+ u'info_dict': {
+ u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
+ u"uploader_id": u"skillsmatter",
+ u"uploader": u"Skills Matter",
+ }
+ }
]
def report_download_webpage(self, video_id):
@@ -127,6 +140,14 @@ class GenericIE(InfoExtractor):
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
return self.url_result(bc_url, 'Brightcove')
+ # Look for embedded Vimeo player
+ mobj = re.search(
+ r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
+ if mobj:
+ player_url = unescapeHTML(mobj.group(1))
+ surl = smuggle_url(player_url, {'Referer': url})
+ return self.url_result(surl, 'Vimeo')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index cea29f035..2de56ac81 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -11,6 +11,7 @@ from ..utils import (
get_element_by_attribute,
ExtractorError,
std_headers,
+ unsmuggle_url,
)
class VimeoIE(InfoExtractor):
@@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor):
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software',
},
- },
+ }
]
def _login(self):
@@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor):
self._login()
def _real_extract(self, url, new_video=True):
+ url, data = unsmuggle_url(url)
+ headers = std_headers
+ if data is not None:
+ headers = headers.copy()
+ headers.update(data)
+
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
- request = compat_urllib_request.Request(url, None, std_headers)
+ request = compat_urllib_request.Request(url, None, headers)
webpage = self._download_webpage(request, video_id)
# Now we begin extracting as much information as we can from what we
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3e81c308b..833f981f2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -945,3 +945,20 @@ class locked_file(object):
def shell_quote(args):
return ' '.join(map(pipes.quote, args))
+
+
+def smuggle_url(url, data):
+ """ Pass additional data in a URL for internal use. """
+
+ sdata = compat_urllib_parse.urlencode(
+ {u'__youtubedl_smuggle': json.dumps(data)})
+ return url + u'#' + sdata
+
+
+def unsmuggle_url(smug_url):
+ if not '#__youtubedl_smuggle' in smug_url:
+ return smug_url, None
+ url, _, sdata = smug_url.rpartition(u'#')
+ jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
+ data = json.loads(jsond)
+ return url, data