diff options
| author | Sergey M․ <dstftw@gmail.com> | 2014-09-12 20:51:48 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2014-09-12 20:51:48 +0700 | 
| commit | adf2c0989d6d525b3a691eb64651b3330f5a76fc (patch) | |
| tree | f5c646c6444ae8d006eb1ef1284bea80abf607b0 | |
| parent | 8fb7ff25c5056ed0f23f35129bb0d6eba5dd6555 (diff) | |
[telemb] Extract all formats and modernize
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/telemb.py | 93 | 
2 files changed, 66 insertions, 29 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 94e370281..13b3616d3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -345,7 +345,7 @@ from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE -from .telemb import TelembIE +from .telemb import TeleMBIE  from .tenplay import TenPlayIE  from .testurl import TestURLIE  from .tf1 import TF1IE diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py index 383c26d96..cf5bb89b1 100644 --- a/youtube_dl/extractor/telemb.py +++ b/youtube_dl/extractor/telemb.py @@ -1,40 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals +  import re -# -*- coding: utf-8 -*- -# needed for the title french ê!  coding utf-8- -*-  -# based on the vine.co and lots of help from https://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/ -from .common import InfoExtractor +from .common import InfoExtractor +from ..utils import remove_start -class TelembIE(InfoExtractor): -    _VALID_URL = r'https?://www\.telemb\.be/(?P<id>.*)' - -    _TEST = { -        u'url': u'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', -        u'file': u'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html.mp4', -        u'md5': u'f45ea69878516ba039835794e0f8f783', -        u'info_dict': {  -            u"title": u'TéléMB : Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages' -        } -    } +class TeleMBIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html' +    _TESTS = [ +        { +            'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', +            'md5': 'f45ea69878516ba039835794e0f8f783', +            'info_dict': { +                'id': '13466', +                'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-', +                'ext': 'mp4', +                'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages', +                'description': 'md5:bc5225f47b17c309761c856ad4776265', +                'thumbnail': 're:^http://.*\.(?:jpg|png)$', +            } +        }, +        { +            'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html', +            'md5': '6e9682736e5ccd4eab7f21e855350733', +            'info_dict': { +                'id': '13514', +                'display_id': 'les-reportages-havre-incendie-mortel', +                'ext': 'mp4', +                'title': 'Havré - Incendie mortel - Les reportages', +                'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a', +                'thumbnail': 're:^http://.*\.(?:jpg|png)$', +            } +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -          video_id = mobj.group('id') -        webpage_url = 'http://www.telemb.be/' + video_id -        webpage = self._download_webpage(webpage_url, video_id) +        display_id = mobj.group('display_id') +        webpage = self._download_webpage(url, display_id) -        self.report_extraction(video_id) +        formats = [] +        for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage): +            fmt = { +                'url': video_url, +                'format_id': video_url.split(':')[0] +            } +            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) +            if rtmp: +                fmt.update({ +                    'play_path': rtmp.group('playpath'), +                    'app': rtmp.group('app'), +                    'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf', +                    'page_url': 'http://www.telemb.be', +                    'preference': -1, +                }) +            formats.append(fmt) +        self._sort_formats(formats) -        video_url = self._html_search_regex(r'"(http://wowza\.imust\.org/srv/vod/.*\.mp4)"', -            webpage, u'video URL') +        title = remove_start(self._og_search_title(webpage), 'TéléMB : ') +        description = self._html_search_regex( +            r'<meta property="og:description" content="(.+?)" />', +            webpage, 'description', fatal=False) +        thumbnail = self._og_search_thumbnail(webpage) -        return [{ -            'id':        video_id, -            'url':       video_url, -            'ext':       'mp4', -            'title':     self._og_search_title(webpage), -            'thumbnail': self._og_search_thumbnail(webpage), -        }] +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'formats': formats, +        } | 
