aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEntropy <0fiscalentropy@users.noreply.github.com>2017-04-07 21:52:48 +0200
committerSergey M․ <dstftw@gmail.com>2017-04-08 19:54:04 +0700
commit5f3e0b69efa8ae80f536fb54ace4bb4d9c667d2e (patch)
tree5f92e5052e119a76241dd20e592b79ede72c6caf
parent28b674ca238fbae29f2ee7bc85202fa2f115a876 (diff)
[TheSun] Add new extractor
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/thesun.py27
2 files changed, 28 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 72728d919..01865479c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -983,6 +983,7 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
+from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
diff --git a/youtube_dl/extractor/thesun.py b/youtube_dl/extractor/thesun.py
new file mode 100644
index 000000000..7f96bf8c9
--- /dev/null
+++ b/youtube_dl/extractor/thesun.py
@@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TheSunIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/\w+/(?P<id>\d+)/[\w-]'
+ _TEST = {
+ 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
+ 'md5': '5667123b24f25f43f4c4f381ef34c5c2',
+ 'info_dict': {
+ 'id': 'h4OXN0NzE6rv6ObkEifKcNA-gYUw4xFf',
+ 'ext': 'mp4',
+ 'title': 'Katy Perry and Orlando Bloom shut down split rumours with cute Thanksgiving video',
+ 'description': 'Still going strong',
+ 'duration': 31.28,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ ooyala_id = self._search_regex(r'id\s*=\s*"thesun-ooyala-player-([^"]+)"', webpage, 'ooyala id')
+
+ return OoyalaIE._build_url_result(ooyala_id)