diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-10-29 23:53:39 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-10-29 23:55:55 +0700 |
commit | 94db1f7f3b7269d5843b815ef2aa5b71d0361e6f (patch) | |
tree | c3bcbfbe577f7c8208e5d51da41d7ffdb4f633bc | |
parent | ffa7b2bfee7b94191ffc20ef00c22f708c97cddf (diff) |
[cnbc] Simplify extraction (closes #14280, closes #17110)
-rw-r--r-- | youtube_dl/extractor/cnbc.py | 29 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 2 |
2 files changed, 11 insertions, 20 deletions
diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 35c0b6124..81b0c9fc4 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - smuggle_url, -) +from ..utils import smuggle_url class CNBCIE(InfoExtractor): @@ -40,36 +37,30 @@ class CNBCIE(InfoExtractor): } -class CNBCNewIE(InfoExtractor): - IE_NAME = 'CNBC:new' - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P<id>[^.]+)' +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { 'id': '7000031301', 'ext': 'mp4', - 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'title': "Trump: I don't necessarily agree with raising rates", 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'timestamp': 1531958400, 'upload_date': '20180719', 'uploader': 'NBCU-CNBC', }, 'params': { - # m3u8 download 'skip_download': True, }, } - CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._parse_json( - self._search_regex( - r'(?s).*<script[^>]*>.*?({.+?content_id.+?}).*?</script>', - webpage, display_id), - display_id, transform_source=js_to_json - )['content_id'] - - return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 93574907b..d96e23905 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -211,7 +211,7 @@ from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, - CNBCNewIE, + CNBCVideoIE, ) from .cnn import ( CNNIE, |