diff options
| author | Olivier Bilodeau <olivier@bottomlesspit.org> | 2016-12-15 20:14:04 -0500 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-03-03 23:54:21 +0700 | 
| commit | cbb127568a6182df2c5a2d65426de523f1f7b43f (patch) | |
| tree | a7ad43e04a144da3c958674e4c7f0b7eaa010e19 | |
| parent | d02d4fa0a90f3182d65504508105e8d86886c6ec (diff) | |
[vrak] Add extractor
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/vrak.py | 68 | 
2 files changed, 69 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1613a9d3..0ac42138a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE  from .voxmedia import VoxMediaIE  from .vporn import VpornIE  from .vrt import VRTIE +from .vrak import VrakIE  from .vube import VubeIE  from .vuclip import VuClipIE  from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py new file mode 100644 index 000000000..692e2fcfc --- /dev/null +++ b/youtube_dl/extractor/vrak.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from .brightcove import BrightcoveNewIE + + +class VrakIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*' +    _TEST = { +        'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', +        'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', +        'info_dict': { +            'id': '5231040869001', +            'ext': 'mp4', +            'title': 'Référendums américains, animés japonais et hooligans russes', +            'upload_date': '20161201', +            'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', +            'timestamp': 1480628425, +            'uploader_id': '2890187628001', +        } +    } + +    def _real_extract(self, url): +        url_id = self._match_id(url) +        webpage = self._download_webpage(url, url_id) + +        result = {} +        result['title'] = self._html_search_regex( +            r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title') + +        # Inspired from BrightcoveNewIE._extract_url() +        entries = [] +        for account_id, player_id, _, video_id in re.findall( +                # account_id, player_id and embed from: +                #   <div class="video-player [...] +                #     data-publisher-id="2890187628001" +                #     data-player-id="VkSnGw3cx" +                # video id is extracted from weird CMS Java/Javascript notation: +                #   RW java.lang.String value = '5231040869001'; +                # Need to use backtrack to pin to a ref since video is in grid +                # layout with others +                r'''(?sx) +                    <div[^>]+ +                        data-publisher-id=["\'](\d+)["\'] +                        [^>]* +                        data-player-id=["\']([^"\']+)["\'] +                        [^>]* +                        refId":"([^&]+)" +                        [^>]* +                        >.*? +                    </div>.*? +                    RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' +                    [^>]* +                    RW\ java\.lang\.String\ value\ =\ \'(\d+)\' +                ''', webpage): + +            entries.append( +                'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' +                % (account_id, player_id, 'default', video_id)) + +        if entries: +            result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + +        return result | 
