diff options
| -rw-r--r-- | test/test_utils.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 21 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 11 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 17 | 
4 files changed, 63 insertions, 2 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 270669044..f3fbff042 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,5 @@  #!/usr/bin/env python +# coding: utf-8  # Allow direct execution  import os @@ -21,6 +22,8 @@ from youtube_dl.utils import (      find_xpath_attr,      get_meta_content,      xpath_with_ns, +    smuggle_url, +    unsmuggle_url,  )  if sys.version_info < (3, 0): @@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase):          self.assertEqual(find('media:song/media:author').text, u'The Author')          self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') +    def test_smuggle_url(self): +        data = {u"ö": u"ö", u"abc": [3]} +        url = 'https://foo.bar/baz?x=y#a' +        smug_url = smuggle_url(url, data) +        unsmug_url, unsmug_data = unsmuggle_url(smug_url) +        self.assertEqual(url, unsmug_url) +        self.assertEqual(data, unsmug_data) + +        res_url, res_data = unsmuggle_url(url) +        self.assertEqual(res_url, url) +        self.assertEqual(res_data, None) + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d48c84f8d..89805250c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,8 @@ from ..utils import (      compat_urlparse,      ExtractorError, +    smuggle_url, +    unescapeHTML,  )  from .brightcove import BrightcoveIE @@ -29,6 +31,17 @@ class GenericIE(InfoExtractor):                  u"title": u"R\u00e9gis plante sa Jeep"              }          }, +        # embedded vimeo video +        { +            u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', +            u'file': u'22444065.mp4', +            u'md5': u'2903896e23df39722c33f015af0666e2', +            u'info_dict': { +                u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', +                u"uploader_id": u"skillsmatter", +                u"uploader": u"Skills Matter", +            } +        }      ]      def report_download_webpage(self, video_id): @@ -127,6 +140,14 @@ class GenericIE(InfoExtractor):              bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())              return self.url_result(bc_url, 'Brightcove') +        # Look for embedded Vimeo player +        mobj = re.search( +            r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage) +        if mobj: +            player_url = unescapeHTML(mobj.group(1)) +            surl = smuggle_url(player_url, {'Referer': url}) +            return self.url_result(surl, 'Vimeo') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cea29f035..2de56ac81 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -11,6 +11,7 @@ from ..utils import (      get_element_by_attribute,      ExtractorError,      std_headers, +    unsmuggle_url,  )  class VimeoIE(InfoExtractor): @@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor):                  u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',                  u'uploader': u'The BLN & Business of Software',              }, -        }, +        }      ]      def _login(self): @@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor):          self._login()      def _real_extract(self, url, new_video=True): +        url, data = unsmuggle_url(url) +        headers = std_headers +        if data is not None: +            headers = headers.copy() +            headers.update(data) +          # Extract ID from URL          mobj = re.match(self._VALID_URL, url)          if mobj is None: @@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor):              url = 'https://vimeo.com/' + video_id          # Retrieve video webpage to extract further information -        request = compat_urllib_request.Request(url, None, std_headers) +        request = compat_urllib_request.Request(url, None, headers)          webpage = self._download_webpage(request, video_id)          # Now we begin extracting as much information as we can from what we diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3e81c308b..833f981f2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -945,3 +945,20 @@ class locked_file(object):  def shell_quote(args):      return ' '.join(map(pipes.quote, args)) + + +def smuggle_url(url, data): +    """ Pass additional data in a URL for internal use. """ + +    sdata = compat_urllib_parse.urlencode( +        {u'__youtubedl_smuggle': json.dumps(data)}) +    return url + u'#' + sdata + + +def unsmuggle_url(smug_url): +    if not '#__youtubedl_smuggle' in smug_url: +        return smug_url, None +    url, _, sdata = smug_url.rpartition(u'#') +    jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] +    data = json.loads(jsond) +    return url, data | 
