aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-11-21 05:25:43 +0700
committerSergey M․ <dstftw@gmail.com>2018-11-21 06:10:39 +0700
commit6a6d7f064178427d28986884524bd3434f0ca957 (patch)
tree4e48e116d897357c8a565ee45a557101cc85c73c
parent05bd5e9c77e0e8acb95f47396be4c970fc9f39c4 (diff)
downloadyoutube-dl-6a6d7f064178427d28986884524bd3434f0ca957.tar.xz
[ciscolive] Fix issues and improve extraction (closes #17984)
-rw-r--r--youtube_dl/extractor/ciscolive.py176
-rw-r--r--youtube_dl/extractor/extractors.py5
2 files changed, 87 insertions, 94 deletions
diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py
index 2db7aad2c..32f645713 100644
--- a/youtube_dl/extractor/ciscolive.py
+++ b/youtube_dl/extractor/ciscolive.py
@@ -1,84 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
+ compat_parse_qs,
compat_urllib_parse_urlparse,
- compat_parse_qs
)
from ..utils import (
clean_html,
+ float_or_none,
int_or_none,
try_get,
urlencode_postdata,
)
-class CiscoLiveIE(InfoExtractor):
- IE_NAME = 'ciscolive'
- _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P<query>[^#]+)#/(?:session/(?P<id>.+))?$'
- _TESTS = [
- {
- 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
- 'md5': 'c98acf395ed9c9f766941c70f5352e22',
- 'info_dict': {
- 'id': '5803694304001',
- 'ext': 'mp4',
- 'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
- 'description': 'md5:ec4a436019e09a918dec17714803f7cc',
- 'timestamp': 1530305395,
- 'uploader_id': '5647924234001',
- 'upload_date': '20180629',
- 'location': '16B Mezz.',
- },
- },
- {
- 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
- 'md5': '993d4cf051f6174059328b1dce8e94bd',
- 'info_dict': {
- 'upload_date': '20180629',
- 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco',
- 'timestamp': 1530316421,
- 'uploader_id': '5647924234001',
- 'id': '5803751616001',
- 'description': 'md5:5f144575cd6848117fe2f756855b038b',
- 'location': 'WoS, DevNet Theater',
- 'ext': 'mp4',
- },
- },
- {
- 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
- 'md5': '80e0c3b87e373fe3a3316b934b8915bf',
- 'info_dict': {
- 'upload_date': '20180629',
- 'title': 'Beating the CCIE Routing & Switching',
- 'timestamp': 1530311842,
- 'uploader_id': '5647924234001',
- 'id': '5803735679001',
- 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929',
- 'location': 'Tulúm 02',
- 'ext': 'mp4',
- },
- }
- ]
-
+class CiscoLiveBaseIE(InfoExtractor):
# These appear to be constant across all Cisco Live presentations
# and are not tied to any user session or event
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
- RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
- RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+ RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+ RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
+ HEADERS = {
+ 'Origin': 'https://ciscolive.cisco.com',
+ 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+ 'rfWidgetId': RAINFOCUS_WIDGET_ID,
+ }
+
+ def _call_api(self, ep, rf_id, query, referrer):
+ headers = self.HEADERS.copy()
+ headers['Referer'] = referrer
+ return self._download_json(
+ self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query),
+ headers=headers)
+
def _parse_rf_item(self, rf_item):
- ''' Parses metadata and passes to Brightcove extractor '''
event_name = rf_item.get('eventName')
title = rf_item['title']
description = clean_html(rf_item.get('abstract'))
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
bc_id = rf_item['videos'][0]['url']
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
- duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+ duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
location = try_get(rf_item, lambda x: x['times'][0]['room'])
if duration:
@@ -86,51 +51,76 @@ class CiscoLiveIE(InfoExtractor):
return {
'_type': 'url_transparent',
- 'creator': presenter_name,
+ 'url': bc_url,
+ 'ie_key': 'BrightcoveNew',
+ 'title': title,
'description': description,
'duration': duration,
- 'ie_key': 'BrightcoveNew',
+ 'creator': presenter_name,
'location': location,
'series': event_name,
- 'title': title,
- 'url': bc_url,
}
- def _check_bc_id_exists(self, rf_item):
- ''' Checks for the existence of a Brightcove URL in an API result '''
- bc_id = try_get(rf_item, lambda x: x['videos'][0]['url'])
- if bc_id:
- if bc_id.strip().isdigit():
- return rf_item
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
+ _TEST = {
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+ 'md5': 'c98acf395ed9c9f766941c70f5352e22',
+ 'info_dict': {
+ 'id': '5803694304001',
+ 'ext': 'mp4',
+ 'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+ 'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+ 'timestamp': 1530305395,
+ 'upload_date': '20180629',
+ 'uploader_id': '5647924234001',
+ 'location': '16B Mezz.',
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118',
+ }
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- HEADERS = {
- 'Origin': 'https://ciscolive.cisco.com',
- 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID,
- 'rfWidgetId': self.RAINFOCUS_WIDGETID,
- 'Referer': url,
+ rf_id = self._match_id(url)
+ rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+ return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+ 'info_dict': {
+ 'title': 'Filter query',
+ },
+ 'playlist_count': 5,
+ 'params': {
+ 'proxy': '127.0.0.1:8118',
}
- # Single session URL (single video)
- if mobj.group('id'):
- rf_id = mobj.group('id')
- request = self.RAINFOCUS_API_URL % 'session'
- data = urlencode_postdata({'id': rf_id})
- rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS)
- rf_item = self._check_bc_id_exists(rf_result['items'][0])
- return self._parse_rf_item(rf_item)
- else:
- # Filter query URL (multiple videos)
- rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query))
- rf_query['type'] = 'session'
- rf_query['size'] = 1000
- data = urlencode_postdata(rf_query)
- request = self.RAINFOCUS_API_URL % 'search'
- rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS)
- entries = [
- self._parse_rf_item(rf_item)
- for rf_item
- in rf_results['sectionList'][0]['items']
- if self._check_bc_id_exists(rf_item)
- ]
- return self.playlist_result(entries, 'Filter query')
+ }, {
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+ @staticmethod
+ def _check_bc_id_exists(rf_item):
+ return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+ def _real_extract(self, url):
+ rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ rf_query['type'] = 'session'
+ rf_query['size'] = 1000
+ rf_results = self._call_api('search', None, rf_query, url)
+ entries = [
+ self._parse_rf_item(rf_item)
+ for rf_item
+ in rf_results['sectionList'][0]['items']
+ if self._check_bc_id_exists(rf_item)
+ ]
+ return self.playlist_result(entries, playlist_title='Filter query')
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 2c5988a14..60e6175b1 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -194,7 +194,10 @@ from .chirbit import (
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
-from .ciscolive import CiscoLiveIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
from .cjsw import CJSWIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE