[iprima] Improve extraction (closes #25138)

author: Sergey M․ <dstftw@gmail.com> 2020-05-06 23:20:14 +0700
committer: Sergey M․ <dstftw@gmail.com> 2020-05-06 23:20:14 +0700
commit: 30fa5c6087d2e5e7a2bfe395ffbb267d92959356 (patch)
tree: d0cc9bbb860e9a96603e5871242084976c763639 /youtube_dl
parent: 6c907eb33ffd79ef9c4761c6460acb31b2eded46 (diff)
1 files changed, 24 insertions, 8 deletions
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index 11bbeb592..53a550c11 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
     _GEO_BYPASS = False
 
     _TESTS = [{
-        'url': 'http://play.iprima.cz/gondici-s-r-o-33',
+        'url': 'https://prima.iprima.cz/particka/92-epizoda',
         'info_dict': {
-            'id': 'p136534',
+            'id': 'p51388',
             'ext': 'mp4',
-            'title': 'Gondíci s. r. o. (34)',
-            'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
+            'title': 'Partička (92)',
+            'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }, {
+        'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+        'info_dict': {
+            'id': 'p681554',
+            'ext': 'mp4',
+            'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
         },
         'params': {
             'skip_download': True,  # m3u8 download
@@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        title = self._og_search_title(
+            webpage, default=None) or self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title')
+
         video_id = self._search_regex(
             (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
-             r'data-product="([^"]+)">'),
+             r'data-product="([^"]+)">',
+             r'id=["\']player-(p\d+)"',
+             r'playerId\s*:\s*["\']player-(p\d+)'),
             webpage, 'real id')
 
         playerpage = self._download_webpage(
@@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
             'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
         }
author	Sergey M․ <dstftw@gmail.com>	2020-05-06 23:20:14 +0700
committer	Sergey M․ <dstftw@gmail.com>	2020-05-06 23:20:14 +0700
commit	30fa5c6087d2e5e7a2bfe395ffbb267d92959356 (patch)
tree	d0cc9bbb860e9a96603e5871242084976c763639 /youtube_dl
parent	6c907eb33ffd79ef9c4761c6460acb31b2eded46 (diff)