diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/discovery.py | 46 | 
2 files changed, 47 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b8d635c30..1e8556124 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -48,6 +48,7 @@ from .depositfiles import DepositFilesIE  from .dotsub import DotsubIE  from .dreisat import DreiSatIE  from .defense import DefenseGouvFrIE +from .discovery import DiscoveryIE  from .dropbox import DropboxIE  from .ebaumsworld import EbaumsWorldIE  from .ehow import EHowIE diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py new file mode 100644 index 000000000..885944c5e --- /dev/null +++ b/youtube_dl/extractor/discovery.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class DiscoveryIE(InfoExtractor): +    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' +    _TEST = { +        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', +        'file': '614784.mp4', +        'md5': 'e12614f9ee303a6ccef415cb0793eba2', +        'info_dict': { +            'title': 'MythBusters: Mission Impossible Outtakes', +            'description': ('Watch Jamie Hyneman and Adam Savage practice being' +                ' each other -- to the point of confusing Jamie\'s dog -- and ' +                'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' +                ' back.'), +            'duration': 156, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) + +        video_list_json = self._search_regex(r'var videoListJSON = ({.*?});', +            webpage, 'video list', flags=re.DOTALL) +        video_list = json.loads(video_list_json) +        info = video_list['clips'][0] +        formats = [] +        for f in info['mp4']: +            formats.append( +                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])}) + +        return { +            'id': info['contentId'], +            'title': video_list['name'], +            'formats': formats, +            'description': info['videoCaption'], +            'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'), +            'duration': info['duration'], +        } | 
