diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-06-26 16:34:36 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-06-26 16:34:36 +0200 | 
| commit | 78338f71ca2d96e4bf507c438fbb2751742989b1 (patch) | |
| tree | ec2ffc549fce527031640777c0c1b8c28c456003 | |
| parent | f5172a308418a54d077e19495a5c560f2bd644e6 (diff) | |
[livestream:original] Add support for folder urls (closes #2631)
The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
| -rw-r--r-- | test/test_playlists.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/livestream.py | 52 | 
4 files changed, 64 insertions, 6 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py index 42051fe2a..71dac1b02 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -30,6 +30,7 @@ from youtube_dl.extractor import (      SoundcloudPlaylistIE,      TeacherTubeClassroomIE,      LivestreamIE, +    LivestreamOriginalIE,      NHLVideocenterIE,      BambuserChannelIE,      BandcampAlbumIE, @@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], 'TEDCity2.0 (English)')          self.assertTrue(len(result['entries']) >= 4) +    def test_livestreamoriginal_folder(self): +        dl = FakeYDL() +        ie = LivestreamOriginalIE(dl) +        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') +        self.assertTrue(len(result['entries']) >= 28) +      def test_nhl_videocenter(self):          dl = FakeYDL()          ie = NHLVideocenterIE(dl) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 02143de9e..a1cdcf0f7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -147,7 +147,11 @@ from .ku6 import Ku6IE  from .la7 import LA7IE  from .lifenews import LifeNewsIE  from .liveleak import LiveLeakIE -from .livestream import LivestreamIE, LivestreamOriginalIE +from .livestream import ( +    LivestreamIE, +    LivestreamOriginalIE, +    LivestreamShortenerIE, +)  from .lynda import (      LyndaIE,      LyndaCourseIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 49e75405e..e4e4feef9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -459,6 +459,9 @@ class InfoExtractor(object):          if secure: regexes = self._og_regexes('video:secure_url') + regexes          return self._html_search_regex(regexes, html, name, **kargs) +    def _og_search_url(self, html, **kargs): +        return self._og_search_property('url', html, **kargs) +      def _html_search_meta(self, name, html, display_name=None, fatal=False):          if display_name is None:              display_name = name diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5c71f4f09..2c100d424 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -9,6 +9,7 @@ from ..utils import (      compat_urlparse,      xpath_with_ns,      compat_str, +    orderedSet,  ) @@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):  # The original version of Livestream uses a different system  class LivestreamOriginalIE(InfoExtractor):      IE_NAME = 'livestream:original' -    _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' +    _VALID_URL = r'''(?x)https?://www\.livestream\.com/ +        (?P<user>[^/]+)/(?P<type>video|folder) +        (?:\?.*?Id=|/)(?P<id>.*?)(&|$) +        '''      _TEST = {          'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',          'info_dict': { @@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):          },      } -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        user = mobj.group('user') +    def _extract_video(self, user, video_id):          api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)          info = self._download_xml(api_url, video_id) @@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):              'ext': 'flv',              'thumbnail': thumbnail_url,          } + +    def _extract_folder(self, url, folder_id): +        webpage = self._download_webpage(url, folder_id) +        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage)) + +        return { +            '_type': 'playlist', +            'id': folder_id, +            'entries': [{ +                '_type': 'url', +                'url': video_url, +            } for video_url in urls], +        } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        id = mobj.group('id') +        user = mobj.group('user') +        url_type = mobj.group('type') +        if url_type == 'folder': +            return self._extract_folder(url, id) +        else: +            return self._extract_video(user, id) + + +# The server doesn't support HEAD request, the generic extractor can't detect +# the redirection +class LivestreamShortenerIE(InfoExtractor): +    IE_NAME = 'livestream:shortener' +    IE_DESC = False  # Do not list +    _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        id = mobj.group('id') +        webpage = self._download_webpage(url, id) + +        return { +            '_type': 'url', +            'url': self._og_search_url(webpage), +        }  | 
