diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-04-17 00:23:16 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-04-17 00:23:16 +0700 |
commit | e5d39886ec8e4e40b2b7257d16cc5d8505cc1f69 (patch) | |
tree | 6263ed68d3b6c4b2146636e547abd7ba7c496062 /youtube_dl | |
parent | 751c89a27d68c54375e96789cc90d4c8a3ce3dbc (diff) |
[limelight] Improve embeds extraction (closes #12761)
* Move extraction code to extractor
* Add extraction for LimelightEmbeddedPlayerFlash embeds
* Extract multiple video
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/generic.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/limelight.py | 37 |
2 files changed, 43 insertions, 0 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6a34c2491..c523abb25 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -85,6 +85,7 @@ from .ustream import UstreamIE from .openload import OpenloadIE from .videopress import VideoPressIE from .rutube import RutubeIE +from .limelight import LimelightBaseIE class GenericIE(InfoExtractor): @@ -2483,6 +2484,11 @@ class GenericIE(InfoExtractor): return self.url_result(piksel_url, PikselIE.ie_key()) # Look for Limelight embeds + limelight_urls = LimelightBaseIE._extract_urls(webpage, url) + if limelight_urls: + return self.playlist_result( + limelight_urls, video_id, video_title, video_description) + mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) if mobj: lm = { diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index f52c2e169..0041453af 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + smuggle_url, unsmuggle_url, ExtractorError, ) @@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' + @classmethod + def _extract_urls(cls, webpage, source_url): + lm = { + 'Media': 'media', + 'Channel': 'channel', + 'ChannelList': 'channel_list', + } + entries = [] + for kind, video_id in re.findall( + r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', + webpage): + print('video_id', video_id) + entries.append(cls.url_result( + smuggle_url( + 'limelight:%s:%s' % (lm[kind], video_id), + {'source_url': source_url}), + 'Limelight%s' % kind, video_id)) + for mobj in re.finditer( + # As per [1] class attribute should be exactly equal to + # LimelightEmbeddedPlayerFlash but numerous examples seen + # that don't exactly match it (e.g. [2]). + # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage + # 2. http://www.sedona.com/FacilitatorTraining2017 + r'''(?sx) + <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*? + <param[^>]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32}) + ''', webpage): + entries.append(cls.url_result( + smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': source_url}), + 'LimelightMedia', mobj.group('id'))) + return entries + def _call_playlist_service(self, item_id, method, fatal=True, referer=None): headers = {} if referer: |