aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-04-08 19:42:09 +0700
committerSergey M․ <dstftw@gmail.com>2017-04-08 19:46:42 +0700
commit28b674ca238fbae29f2ee7bc85202fa2f115a876 (patch)
tree18a75fe71e0ae043372a06264d806a3cabc7467b /youtube_dl
parente18f1da97a8840522b0cefac3af0995751733d67 (diff)
[ceskateleveize:porady] Add extractor (closes #7411, closes #12645)
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/ceskatelevize.py45
-rw-r--r--youtube_dl/extractor/extractors.py5
2 files changed, 49 insertions, 1 deletions
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 0daee313f..e250de18c 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -12,6 +12,7 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
+ unescapeHTML,
urlencode_postdata,
USER_AGENTS,
)
@@ -232,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
+
+
+class CeskaTelevizePoradyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ # video with 18+ caution trailer
+ 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+ 'info_dict': {
+ 'id': '215562210900007-bogotart',
+ 'title': 'Queer: Bogotart',
+ 'description': 'Alternativní průvodce současným queer světem',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '61924494876844842',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Varování 18+)',
+ 'duration': 10.2,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '61924494877068022',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Queer)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1558.3,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data_url = unescapeHTML(self._search_regex(
+ r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'iframe player url', group='url'))
+
+ return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 2904dd4d1..72728d919 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -165,7 +165,10 @@ from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
-from .ceskatelevize import CeskaTelevizeIE
+from .ceskatelevize import (
+ CeskaTelevizeIE,
+ CeskaTelevizePoradyIE,
+)
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE