diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-05-10 17:31:55 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-05-10 17:31:55 +0600 | 
| commit | 98c2c0febc7e686278ceaadc73eb40fee1b46752 (patch) | |
| tree | 265c893a15666392523b9a59cad5be15cb2f7a66 /youtube_dl/extractor/voicerepublic.py | |
| parent | 63cbd19f500eb4d90c1fc7c09f04de5df43a6a04 (diff) | |
| parent | 1dcb52188d3709711b3ea5ae1ff6bdb985e79c62 (diff) | |
Merge branch 'voicerepublic' of https://github.com/duncankl/youtube-dl into duncankl-voicerepublic
Diffstat (limited to 'youtube_dl/extractor/voicerepublic.py')
| -rw-r--r-- | youtube_dl/extractor/voicerepublic.py | 52 | 
1 files changed, 52 insertions, 0 deletions
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py new file mode 100644 index 000000000..a3e40b940 --- /dev/null +++ b/youtube_dl/extractor/voicerepublic.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_request +from ..utils import ExtractorError + + +class VoiceRepublicIE(InfoExtractor): +    _VALID_URL = r'https?://voicerepublic\.com/talks/(?P<id>[0-9a-z-]+)' +    _TEST = { +        'url': 'https://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', +        'md5': '0554a24d1657915aa8e8f84e15dc9353', +        'info_dict': { +            'id': '2296', +            'ext': 'm4a', +            'title': 'Watching the Watchers: Building a Sousveillance State', +            'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', +            'description': 'md5:715ba964958afa2398df615809cfecb1', +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        req = compat_urllib_request.Request(url) +        # Older versions of Firefox get redirected to an "upgrade browser" page +        req.add_header('User-Agent', 'youtube-dl') +        webpage = self._download_webpage(req, display_id) +        thumbnail = self._og_search_thumbnail(webpage) +        video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') + +        if '<a>Queued for processing, please stand by...</a>' in webpage: +            raise ExtractorError('Audio is still queued for processing') + +        formats = [{ +            'url': 'https://voicerepublic.com' + path, +            'ext': ext, +            'format_id': ext, +            'vcodec': 'none', +        } for ext, path in re.findall(r"data-([^=]+)='(/[^']+\.\1)'", webpage)] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': self._og_search_title(webpage), +            'formats': formats, +            'url': self._og_search_url(webpage), +            'thumbnail': thumbnail, +            'description': self._og_search_description(webpage), +        }  | 
