[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

author: Philipp Hagemeister <phihag@phihag.de> 2014-03-11 16:51:36 +0100
committer: Philipp Hagemeister <phihag@phihag.de> 2014-03-11 16:51:36 +0100
commit: 60cc4dc4b49c6ebd4a86a4d7f998133474662eee (patch)
tree: b244e93d9f9d2c2d65494361f0f0f1683d9a0e45
parent: db95dc13a1a8e5951d09d3fd555f7a4be590821f (diff)
2 files changed, 33 insertions, 3 deletions
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 7c40e6753..5522e4954 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 
 
 class FunnyOrDieIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+    _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
     _TEST = {
         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
         'file': '0732f586d7.mp4',
@@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor):
             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
             webpage, 'video URL', flags=re.DOTALL)
 
+        if mobj.group('type') == 'embed':
+            post_json = self._search_regex(
+                r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
+            post = json.loads(post_json)['attachment']
+            title = post['name']
+            description = post.get('description')
+        else:
+            title = self._og_search_title(webpage)
+            description = self._og_search_description(webpage)
+
         return {
             'id': video_id,
             'url': video_url,
             'ext': 'mp4',
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
+            'title': title,
+            'description': description,
         }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7666cf207..6e6324779 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -134,6 +134,17 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        # funnyordie embed
+        {
+            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+            'md5': '7cf780be104d40fea7bae52eed4a470e',
+            'info_dict': {
+                'id': '18e820ec3f',
+                'ext': 'mp4',
+                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+            }
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -432,6 +443,14 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
 
+        # Look for funnyordie embed
+        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+        if matches:
+            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
+                     for eurl in matches]
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None:
author	Philipp Hagemeister <phihag@phihag.de>	2014-03-11 16:51:36 +0100
committer	Philipp Hagemeister <phihag@phihag.de>	2014-03-11 16:51:36 +0100
commit	60cc4dc4b49c6ebd4a86a4d7f998133474662eee (patch)
tree	b244e93d9f9d2c2d65494361f0f0f1683d9a0e45
parent	db95dc13a1a8e5951d09d3fd555f7a4be590821f (diff)