[ufctv] fix extraction and add support for UFC Arabia(closes #23312)

author: Remita Amine <remitamine@gmail.com> 2019-12-04 17:20:53 +0100
committer: Remita Amine <remitamine@gmail.com> 2019-12-04 17:20:53 +0100
commit: 3ae878605dd28461896e62f56e20bc50336c45bd (patch)
tree: bb1d4b957853b209fb22754f878cd275fc3386c0
parent: 22974a378273bb43641813b0ec97d0dff1248d48 (diff)
3 files changed, 121 insertions, 66 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 465d9d364..74bf58f38 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1234,7 +1234,10 @@ from .udemy import (
     UdemyCourseIE
 )
 from .udn import UDNEmbedIE
-from .ufctv import UFCTVIE
+from .ufctv import (
+    UFCTVIE,
+    UFCArabiaIE,
+)
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
 from .dlive import (
diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py
new file mode 100644
index 000000000..96fddeac0
--- /dev/null
+++ b/youtube_dl/extractor/imggaming.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    try_get,
+)
+
+
+class ImgGamingBaseIE(InfoExtractor):
+    _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
+    _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
+    _HEADERS = None
+    _LOGIN_REQUIRED = True
+    _LOGIN_SUFFIX = ''
+    _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
+    _REALM = None
+    _TOKEN = None
+    _VALID_URL_TEMPL = r'https?://%s/(?P<type>live|video)/(?P<id>\d+)'
+
+    def _real_initialize(self):
+        if not self._LOGIN_REQUIRED:
+            return
+
+        self._HEADERS = {
+            'Realm': 'dce.' + self._REALM,
+            'x-api-key': self._API_KEY,
+        }
+
+        email, password = self._get_login_info()
+        if email is None:
+            self.raise_login_required()
+
+        p_headers = self._HEADERS.copy()
+        p_headers['Content-Type'] = 'application/json'
+        self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
+            self._API_BASE + 'login' + self._LOGIN_SUFFIX,
+            None, 'Logging in', data=json.dumps({
+                'id': email,
+                'secret': password,
+            }).encode(), headers=p_headers)['authorisationToken']
+
+    def _extract_media_id(self, url, display_id):
+        return display_id
+
+    def _extract_dve_api_url(self, media_id, media_type):
+        url = self._API_BASE + 'stream'
+        if media_type == 'video':
+            url += '/vod/' + media_id
+        else:
+            url += '?eventId=' + media_id
+        try:
+            return self._download_json(
+                url, media_id, headers=self._HEADERS)['playerUrlCallback']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                raise ExtractorError(
+                    self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
+                    expected=True)
+            raise
+
+    def _real_extract(self, url):
+        media_type, display_id = re.match(self._VALID_URL, url).groups()
+        media_id = self._extract_media_id(url, display_id)
+        dve_api_url = self._extract_dve_api_url(media_id, media_type)
+        video_data = self._download_json(dve_api_url, media_id)
+        is_live = media_type == 'live'
+        if is_live:
+            title = self._live_title(self._download_json(
+                self._API_BASE + 'event/' + media_id,
+                media_id, headers=self._HEADERS)['title'])
+        else:
+            title = video_data['name']
+
+        formats = []
+        for proto in ('hls', 'dash'):
+            media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
+            if not media_url:
+                continue
+            if proto == 'hls':
+                m3u8_formats = self._extract_m3u8_formats(
+                    media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
+                    m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
+                for f in m3u8_formats:
+                    f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
+                    formats.append(f)
+            else:
+                formats.extend(self._extract_mpd_formats(
+                    media_url, media_id, mpd_id='dash', fatal=False,
+                    headers=self._MANIFEST_HEADERS))
+        self._sort_formats(formats)
+
+        return {
+            'id': media_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': video_data.get('thumbnailUrl'),
+            'description': video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration')),
+            'tags': video_data.get('tags'),
+            'is_live': is_live,
+        }
diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py
index f3eaee6b3..160b0f104 100644
--- a/youtube_dl/extractor/ufctv.py
+++ b/youtube_dl/extractor/ufctv.py
@@ -1,73 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    parse_duration,
-    parse_iso8601,
-    urlencode_postdata,
-)
+from .imggaming import ImgGamingBaseIE
 
 
-class UFCTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+class UFCTVIE(ImgGamingBaseIE):
+    _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv'
     _NETRC_MACHINE = 'ufctv'
-    _TEST = {
-        'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
-        'info_dict': {
-            'id': '34167',
-            'ext': 'mp4',
-            'title': 'UFC 219 Countdown: Full Episode',
-            'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
-            'timestamp': 1513962360,
-            'upload_date': '20171222',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        }
-    }
+    _REALM = 'ufc'
 
-    def _real_initialize(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
 
-        code = self._download_json(
-            'https://www.ufc.tv/secure/authenticate',
-            None, 'Logging in', data=urlencode_postdata({
-                'username': username,
-                'password': password,
-                'format': 'json',
-            })).get('code')
-        if code and code != 'loginsuccess':
-            raise ExtractorError(code, expected=True)
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        video_data = self._download_json(url, display_id, query={
-            'format': 'json',
-        })
-        video_id = str(video_data['id'])
-        title = video_data['name']
-        m3u8_url = self._download_json(
-            'https://www.ufc.tv/service/publishpoint', video_id, query={
-                'type': 'video',
-                'format': 'json',
-                'id': video_id,
-            }, headers={
-                'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
-            })['path']
-        m3u8_url = m3u8_url.replace('_iphone.', '.')
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': video_data.get('description'),
-            'duration': parse_duration(video_data.get('runtime')),
-            'timestamp': parse_iso8601(video_data.get('releaseDate')),
-            'formats': formats,
-        }
+class UFCArabiaIE(ImgGamingBaseIE):
+    _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com'
+    _NETRC_MACHINE = 'ufcarabia'
+    _REALM = 'admufc'
author	Remita Amine <remitamine@gmail.com>	2019-12-04 17:20:53 +0100
committer	Remita Amine <remitamine@gmail.com>	2019-12-04 17:20:53 +0100
commit	3ae878605dd28461896e62f56e20bc50336c45bd (patch)
tree	bb1d4b957853b209fb22754f878cd275fc3386c0
parent	22974a378273bb43641813b0ec97d0dff1248d48 (diff)