[fivemin] improve extraction

- skip m3u8 formats(404 error) - skip unavailable test - download embed page only when it's needed - update _VALID_URL regex(joystiq.com redirect to engadget.com)
author: remitamine <remitamine@gmail.com> 2016-03-04 10:25:16 +0100
committer: remitamine <remitamine@gmail.com> 2016-03-04 10:25:16 +0100
commit: 6d6536acb27483087a8a4ff73f501eaca313ec6a (patch)
tree: 80c723fc6aa9f365f8f83d187208a9e09a8f2c02 /youtube_dl/extractor/fivemin.py
parent: e1dd521e4964eae3aa4207c00e1673ab1ea0d4c1 (diff)
1 files changed, 30 insertions, 21 deletions
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py
index 2955965d9..67d50a386 100644
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
@@ -16,12 +18,7 @@ from ..utils import (
 
 class FiveMinIE(InfoExtractor):
     IE_NAME = '5min'
-    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
-            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
-            5min:)
-        (?P<id>\d+)
-        '''
+    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
 
     _TESTS = [
         {
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
                 'title': 'How to Make a Next-Level Fruit Salad',
                 'duration': 184,
             },
+            'skip': 'no longer available',
         },
     ]
     _ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sid = mobj.group('sid')
+
+        if mobj.group('query'):
+            qs = compat_parse_qs(mobj.group('query'))
+            if not qs.get('playList'):
+                raise ExtractorError('Invalid URL', expected=True)
+            video_id = qs['playList'][0]
+            if qs.get('sid'):
+                sid = qs['sid'][0]
+
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
-        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
-        query = compat_urllib_parse.urlencode({
-            'func': 'GetResults',
-            'playlist': video_id,
-            'sid': sid,
-            'isPlayerSeed': 'true',
-            'url': embed_url,
-        })
+        if not sid:
+            embed_page = self._download_webpage(embed_url, video_id,
+                                                'Downloading embed page')
+            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
         response = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+            compat_urllib_parse.urlencode({
+                'func': 'GetResults',
+                'playlist': video_id,
+                'sid': sid,
+                'isPlayerSeed': 'true',
+                'url': embed_url,
+            }),
             video_id)
         if not response['success']:
             raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
         for rendition in info['Renditions']:
-            if rendition['RenditionType'] == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
-            elif rendition['RenditionType'] == 'aac':
+            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
                 continue
             else:
                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
author	remitamine <remitamine@gmail.com>	2016-03-04 10:25:16 +0100
committer	remitamine <remitamine@gmail.com>	2016-03-04 10:25:16 +0100
commit	6d6536acb27483087a8a4ff73f501eaca313ec6a (patch)
tree	80c723fc6aa9f365f8f83d187208a9e09a8f2c02 /youtube_dl/extractor/fivemin.py
parent	e1dd521e4964eae3aa4207c00e1673ab1ea0d4c1 (diff)