diff options
| -rw-r--r-- | youtube_dl/extractor/tumblr.py | 35 | 
1 files changed, 13 insertions, 22 deletions
| diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 306fe8974..40c53ff17 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -4,9 +4,6 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -)  class TumblrIE(InfoExtractor): @@ -18,7 +15,7 @@ class TumblrIE(InfoExtractor):              'id': '54196191430',              'ext': 'mp4',              'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', -            'description': 'md5:dfac39636969fe6bf1caa2d50405f069', +            'description': 'md5:37db8211e40b50c7c44e95da14f630b7',              'thumbnail': 're:http://.*\.jpg',          }      }, { @@ -27,7 +24,7 @@ class TumblrIE(InfoExtractor):          'info_dict': {              'id': '90208453769',              'ext': 'mp4', -            'title': '5SOS STRUM ;)', +            'title': '5SOS STRUM ;]',              'description': 'md5:dba62ac8639482759c8eb10ce474586a',              'thumbnail': 're:http://.*\.jpg',          } @@ -41,18 +38,12 @@ class TumblrIE(InfoExtractor):          url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)          webpage = self._download_webpage(url, video_id) -        re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) -        video = re.search(re_video, webpage) -        if video is None: -            raise ExtractorError('Unable to extract video') -        video_url = video.group('video_url') -        ext = video.group('ext') - -        video_thumbnail = self._search_regex( -            r'posters.*?\[\\x22(.*?)\\x22', -            webpage, 'thumbnail', fatal=False)  # We pick the first poster -        if video_thumbnail: -            video_thumbnail = video_thumbnail.replace('\\\\/', '/') +        iframe_url = self._search_regex( +            r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', +            webpage, 'iframe url') +        iframe = self._download_webpage(iframe_url, video_id) +        video_url = self._search_regex(r'<source src="([^"]+)"', +            iframe, 'video url')          # The only place where you can get a title, it's not complete,          # but searching in other places doesn't work for all videos @@ -62,9 +53,9 @@ class TumblrIE(InfoExtractor):          return {              'id': video_id, -             'url': video_url, -             'title': video_title, -             'description': self._html_search_meta('description', webpage), -             'thumbnail': video_thumbnail, -             'ext': ext, +            'url': video_url, +            'ext': 'mp4', +            'title': video_title, +            'description': self._og_search_description(webpage), +            'thumbnail': self._og_search_thumbnail(webpage),          } | 
