[vrak] Add extractor

author: Olivier Bilodeau <olivier@bottomlesspit.org> 2016-12-15 20:14:04 -0500
committer: Sergey M․ <dstftw@gmail.com> 2017-03-03 23:54:21 +0700
commit: cbb127568a6182df2c5a2d65426de523f1f7b43f (patch)
tree: a7ad43e04a144da3c958674e4c7f0b7eaa010e19 /youtube_dl/extractor
parent: d02d4fa0a90f3182d65504508105e8d86886c6ec (diff)
2 files changed, 69 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b1613a9d3..0ac42138a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE
 from .voxmedia import VoxMediaIE
 from .vporn import VpornIE
 from .vrt import VRTIE
+from .vrak import VrakIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vvvvid import VVVVIDIE
diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py
new file mode 100644
index 000000000..692e2fcfc
--- /dev/null
+++ b/youtube_dl/extractor/vrak.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from .brightcove import BrightcoveNewIE
+
+
+class VrakIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*'
+    _TEST = {
+        'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721',
+        'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948',
+        'info_dict': {
+            'id': '5231040869001',
+            'ext': 'mp4',
+            'title': 'Référendums américains, animés japonais et hooligans russes',
+            'upload_date': '20161201',
+            'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.',
+            'timestamp': 1480628425,
+            'uploader_id': '2890187628001',
+        }
+    }
+
+    def _real_extract(self, url):
+        url_id = self._match_id(url)
+        webpage = self._download_webpage(url, url_id)
+
+        result = {}
+        result['title'] = self._html_search_regex(
+            r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title')
+
+        # Inspired from BrightcoveNewIE._extract_url()
+        entries = []
+        for account_id, player_id, _, video_id in re.findall(
+                # account_id, player_id and embed from:
+                #   <div class="video-player [...]
+                #     data-publisher-id="2890187628001"
+                #     data-player-id="VkSnGw3cx"
+                # video id is extracted from weird CMS Java/Javascript notation:
+                #   RW java.lang.String value = '5231040869001';
+                # Need to use backtrack to pin to a ref since video is in grid
+                # layout with others
+                r'''(?sx)
+                    <div[^>]+
+                        data-publisher-id=["\'](\d+)["\']
+                        [^>]*
+                        data-player-id=["\']([^"\']+)["\']
+                        [^>]*
+                        refId&quot;:&quot;([^&]+)&quot;
+                        [^>]*
+                        >.*?
+                    </div>.*?
+                    RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\'
+                    [^>]*
+                    RW\ java\.lang\.String\ value\ =\ \'(\d+)\'
+                ''', webpage):
+
+            entries.append(
+                'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+                % (account_id, player_id, 'default', video_id))
+
+        if entries:
+            result = self.url_result(entries[0], BrightcoveNewIE.ie_key())
+
+        return result
author	Olivier Bilodeau <olivier@bottomlesspit.org>	2016-12-15 20:14:04 -0500
committer	Sergey M․ <dstftw@gmail.com>	2017-03-03 23:54:21 +0700
commit	cbb127568a6182df2c5a2d65426de523f1f7b43f (patch)
tree	a7ad43e04a144da3c958674e4c7f0b7eaa010e19 /youtube_dl/extractor
parent	d02d4fa0a90f3182d65504508105e8d86886c6ec (diff)