diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-03-11 16:51:36 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-03-11 16:51:36 +0100 | 
| commit | 60cc4dc4b49c6ebd4a86a4d7f998133474662eee (patch) | |
| tree | b244e93d9f9d2c2d65494361f0f0f1683d9a0e45 | |
| parent | db95dc13a1a8e5951d09d3fd555f7a4be590821f (diff) | |
[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)
| -rw-r--r-- | youtube_dl/extractor/funnyordie.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 19 | 
2 files changed, 33 insertions, 3 deletions
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 7c40e6753..5522e4954 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,12 +1,13 @@  from __future__ import unicode_literals +import json  import re  from .common import InfoExtractor  class FunnyOrDieIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' +    _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'      _TEST = {          'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',          'file': '0732f586d7.mp4', @@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor):              [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],              webpage, 'video URL', flags=re.DOTALL) +        if mobj.group('type') == 'embed': +            post_json = self._search_regex( +                r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') +            post = json.loads(post_json)['attachment'] +            title = post['name'] +            description = post.get('description') +        else: +            title = self._og_search_title(webpage) +            description = self._og_search_description(webpage) +          return {              'id': video_id,              'url': video_url,              'ext': 'mp4', -            'title': self._og_search_title(webpage), -            'description': self._og_search_description(webpage), +            'title': title, +            'description': description,          } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7666cf207..6e6324779 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -134,6 +134,17 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        # funnyordie embed +        { +            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', +            'md5': '7cf780be104d40fea7bae52eed4a470e', +            'info_dict': { +                'id': '18e820ec3f', +                'ext': 'mp4', +                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', +                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', +            } +        },      ]      def report_download_webpage(self, video_id): @@ -432,6 +443,14 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) +        # Look for funnyordie embed +        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) +        if matches: +            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') +                     for eurl in matches] +            return self.playlist_result( +                urlrs, playlist_id=video_id, playlist_title=video_title) +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None:  | 
