add support for embed links

author: remitamine <remitamine@gmail.com> 2015-06-27 00:13:14 +0100
committer: remitamine <remitamine@gmail.com> 2015-06-27 00:13:14 +0100
commit: 7d7d4690259f343385a240efcef8d157fc99c72d (patch)
tree: 66f2afdcac4cb12344ccf58f1d6492a34f429924 /youtube_dl/extractor/snagfilms.py
parent: fd40bdc0be8984bf6043e70796c1d465a0499d03 (diff)
1 files changed, 34 insertions, 14 deletions
diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py
index 60354b9af..cd345474e 100644
--- a/youtube_dl/extractor/snagfilms.py
+++ b/youtube_dl/extractor/snagfilms.py
@@ -1,10 +1,10 @@
+from re import match,DOTALL
 from .common import InfoExtractor
 from ..utils import js_to_json
-from re import DOTALL
 
 class SnagFilmsIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www.)?snagfilms\.com/films/title/(?P<display_id>.+?)(?:/|$)'
-    _TEST = {
+    _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:films/title/(?P<display_id>.+?)|embed/player\?.*filmId=(?P<id>.+?))(?:&|/|$)'
+    _TESTS = [{
         'url': 'http://www.snagfilms.com/films/title/lost_for_life',
         'info_dict':
         {
@@ -16,29 +16,48 @@ class SnagFilmsIE(InfoExtractor):
             'description': 'In the United States, more than 2500 individuals are serving life-without-parole sentences for crimes they committed when they were seventeen years old or younger. Children as young as thirteen are among the thousands serving these sentences. Directed by Joshua Rof&eacute; (who spent four intensive years on the project), LOST FOR LIFE tells the stories of these individuals, their families and the families of juvenile murder victims. This searingly powerful documentary tackles this contentious issue from multiple perspectives and explores the complexity of the lives of those affected. What is justice when a kid kills? Can a horrific act place a life beyond redemption? Could you forgive?<br />',
             'categories': ['Documentary','Crime','Award Winning','Festivals']
         }
-    }
+    },{
+        'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831',
+        'info_dict':
+        {
+            'id': '74849a00-85a9-11e1-9660-123139220831',
+            'display_id': 'while_we_watch',
+            'ext': 'mp4',
+            'title': '#whilewewatch',
+            'duration': 2311,
+            'description': 'A gripping portrait of the Occupy Wall Street media revolution,&nbsp;#WHILEWEWATCH is the first definitive film to emerge from Zuccotti Park—with full access and cooperation from masterminds who made #OccupyWallStreet a reality.&nbsp;The #OccupyWallStreet media team had no fear of a critical city government, big corporations, hostile police or a lagging mainstream media to tell their story. Through rain, snow, grueling days and sleeping on concrete, they pump out exhilarating ideas to the world. With little money, they rely on Twitter, texting, Wi-Fi, posters, Tumblr, live streams, YouTube, Facebook, dramatic marches, drumbeats and chants. As the film unfolds, we witness the burgeoning power of social media.<br />',
+            'categories': ['Documentary','Politics']
+        }
+    }]
 
     def _real_extract(self, url):
-        display_id = self._search_regex(
-            self._VALID_URL,
-            url,
-            'display_id',
-            group='display_id'
-        )
-        webpage = self._download_webpage(url, display_id)
+        display_id, video_id = match(self._VALID_URL,url).groups()
+        if display_id is None:
+            embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id)
+
+            display_id = self._html_search_regex(
+                r"snagfilms\.com/films/title/(?P<display_id>.+?)(?:/|')",
+                embed_webpage,
+                'display_id'
+            )
+        webpage = self._download_webpage('http://www.snagfilms.com/films/title/' + display_id, display_id)
 
         json_data = self._parse_json(self._html_search_regex(
             r'"data":{"film":(?P<data>{.*?}})}',
             webpage,
-            'data',
+            'data'
         ), display_id)
+
         title = json_data['title']
-        video_id = json_data['id']
         duration = int(json_data['duration'])
         description = json_data['synopsis']
         categories = [category['title'] for category in json_data['categories']]
+        thumbnail = json_data['image']
+
+        if video_id is None:
+            video_id = json_data['id']
+            embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id)
 
-        embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id)
         sources = self._parse_json(js_to_json(self._html_search_regex(
             r'sources: (?P<sources>\[.*?\])',
             embed_webpage,
@@ -61,5 +80,6 @@ class SnagFilmsIE(InfoExtractor):
             'duration': duration,
             'description': description,
             'categories': categories,
+            'thumbnail': thumbnail,
             'formats': formats,
         }
author	remitamine <remitamine@gmail.com>	2015-06-27 00:13:14 +0100
committer	remitamine <remitamine@gmail.com>	2015-06-27 00:13:14 +0100
commit	7d7d4690259f343385a240efcef8d157fc99c72d (patch)
tree	66f2afdcac4cb12344ccf58f1d6492a34f429924 /youtube_dl/extractor/snagfilms.py
parent	fd40bdc0be8984bf6043e70796c1d465a0499d03 (diff)