aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-04-17 00:23:16 +0700
committerSergey M․ <dstftw@gmail.com>2017-04-17 00:23:16 +0700
commite5d39886ec8e4e40b2b7257d16cc5d8505cc1f69 (patch)
tree6263ed68d3b6c4b2146636e547abd7ba7c496062
parent751c89a27d68c54375e96789cc90d4c8a3ce3dbc (diff)
downloadyoutube-dl-e5d39886ec8e4e40b2b7257d16cc5d8505cc1f69.tar.xz
[limelight] Improve embeds extraction (closes #12761)
* Move extraction code to extractor * Add extraction for LimelightEmbeddedPlayerFlash embeds * Extract multiple video
-rw-r--r--youtube_dl/extractor/generic.py6
-rw-r--r--youtube_dl/extractor/limelight.py37
2 files changed, 43 insertions, 0 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 6a34c2491..c523abb25 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -85,6 +85,7 @@ from .ustream import UstreamIE
from .openload import OpenloadIE
from .videopress import VideoPressIE
from .rutube import RutubeIE
+from .limelight import LimelightBaseIE
class GenericIE(InfoExtractor):
@@ -2483,6 +2484,11 @@ class GenericIE(InfoExtractor):
return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds
+ limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
+ if limelight_urls:
+ return self.playlist_result(
+ limelight_urls, video_id, video_title, video_description)
+
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
lm = {
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index f52c2e169..0041453af 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
+ smuggle_url,
unsmuggle_url,
ExtractorError,
)
@@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
+ @classmethod
+ def _extract_urls(cls, webpage, source_url):
+ lm = {
+ 'Media': 'media',
+ 'Channel': 'channel',
+ 'ChannelList': 'channel_list',
+ }
+ entries = []
+ for kind, video_id in re.findall(
+ r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ print('video_id', video_id)
+ entries.append(cls.url_result(
+ smuggle_url(
+ 'limelight:%s:%s' % (lm[kind], video_id),
+ {'source_url': source_url}),
+ 'Limelight%s' % kind, video_id))
+ for mobj in re.finditer(
+ # As per [1] class attribute should be exactly equal to
+ # LimelightEmbeddedPlayerFlash but numerous examples seen
+ # that don't exactly match it (e.g. [2]).
+ # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
+ # 2. http://www.sedona.com/FacilitatorTraining2017
+ r'''(?sx)
+ <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+ ''', webpage):
+ entries.append(cls.url_result(
+ smuggle_url(
+ 'limelight:media:%s' % mobj.group('id'),
+ {'source_url': source_url}),
+ 'LimelightMedia', mobj.group('id')))
+ return entries
+
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
headers = {}
if referer: