aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py20
-rw-r--r--youtube_dl/extractor/abc.py5
-rw-r--r--youtube_dl/extractor/adultswim.py6
-rw-r--r--youtube_dl/extractor/aftenposten.py23
-rw-r--r--youtube_dl/extractor/amp.py84
-rw-r--r--youtube_dl/extractor/appletrailers.py86
-rw-r--r--youtube_dl/extractor/arte.py20
-rw-r--r--youtube_dl/extractor/atresplayer.py123
-rw-r--r--youtube_dl/extractor/audimedia.py2
-rw-r--r--youtube_dl/extractor/audiomack.py2
-rw-r--r--youtube_dl/extractor/bleacherreport.py106
-rw-r--r--youtube_dl/extractor/bliptv.py290
-rw-r--r--youtube_dl/extractor/br.py127
-rw-r--r--youtube_dl/extractor/canal13cl.py48
-rw-r--r--youtube_dl/extractor/cinemassacre.py18
-rw-r--r--youtube_dl/extractor/comcarcoff.py24
-rw-r--r--youtube_dl/extractor/daum.py18
-rw-r--r--youtube_dl/extractor/dramafever.py69
-rw-r--r--youtube_dl/extractor/faz.py36
-rw-r--r--youtube_dl/extractor/flickr.py112
-rw-r--r--youtube_dl/extractor/foxnews.py73
-rw-r--r--youtube_dl/extractor/franceinter.py7
-rw-r--r--youtube_dl/extractor/generic.py18
-rw-r--r--youtube_dl/extractor/googledrive.py88
-rw-r--r--youtube_dl/extractor/imgur.py51
-rw-r--r--youtube_dl/extractor/instagram.py2
-rw-r--r--youtube_dl/extractor/jwplatform.py71
-rw-r--r--youtube_dl/extractor/makertv.py32
-rw-r--r--youtube_dl/extractor/pbs.py2
-rw-r--r--youtube_dl/extractor/periscope.py3
-rw-r--r--youtube_dl/extractor/soompi.py146
-rw-r--r--youtube_dl/extractor/tele13.py81
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py39
-rw-r--r--youtube_dl/extractor/vgtv.py166
-rw-r--r--youtube_dl/extractor/viki.py24
-rw-r--r--youtube_dl/extractor/xstream.py14
36 files changed, 1116 insertions, 920 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 042b1e921..365c0b86f 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -15,7 +15,6 @@ from .adobetv import (
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
-from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
@@ -26,7 +25,10 @@ from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
-from .appletrailers import AppleTrailersIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
from .archiveorg import ArchiveOrgIE
from .ard import (
ARDIE,
@@ -61,8 +63,11 @@ from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
from .blinkx import BlinkxIE
-from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
@@ -78,7 +83,6 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
-from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
@@ -232,6 +236,7 @@ from .globo import (
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
+from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
@@ -282,6 +287,7 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
+from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
@@ -336,6 +342,7 @@ from .lynda import (
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
+from .makertv import MakerTVIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
@@ -586,10 +593,6 @@ from .snagfilms import (
)
from .snotr import SnotrIE
from .sohu import SohuIE
-from .soompi import (
- SoompiIE,
- SoompiShowIE,
-)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -648,6 +651,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
+from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index c0e5d1abf..6a29e587f 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -23,6 +23,7 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
+ 'skip': 'this video has expired',
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086',
@@ -36,6 +37,7 @@ class ABCIE(InfoExtractor):
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
},
'add_ie': ['Youtube'],
+ 'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
@@ -58,6 +60,9 @@ class ABCIE(InfoExtractor):
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage)
if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 3ae618e71..bf21a6887 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -68,7 +68,7 @@ class AdultSwimIE(InfoExtractor):
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
@@ -79,6 +79,10 @@ class AdultSwimIE(InfoExtractor):
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
}]
@staticmethod
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py
deleted file mode 100644
index 0c00acfb5..000000000
--- a/youtube_dl/extractor/aftenposten.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class AftenpostenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
- 'md5': 'fd828cd29774a729bf4d4425fe192972',
- 'info_dict': {
- 'id': '21039',
- 'ext': 'mov',
- 'title': 'TRAILER: "Sweatshop" - I can´t take any more',
- 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
- 'timestamp': 1416927969,
- 'upload_date': '20141125',
- }
- }
-
- def _real_extract(self, url):
- return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py
new file mode 100644
index 000000000..dcc3c97f1
--- /dev/null
+++ b/youtube_dl/extractor/amp.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class AMPIE(InfoExtractor):
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ item = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed')['channel']['item']
+
+ video_id = item['guid']
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ thumbnails = []
+ media_thumbnail = get_media_node('thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data['@attributes']
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = get_media_node('subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data['@attributes']
+ lang = subtitle.get('lang') or 'en'
+ subtitles[lang] = [{'url': subtitle['href']}]
+
+ formats = []
+ media_content = get_media_node('content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data['@attributes']
+ media_type = media['type']
+ if media_type == 'video/f4m':
+ f4m_formats = self._extract_f4m_formats(
+ media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif media_type == 'application/x-mpegURL':
+ m3u8_formats = self._extract_m3u8_formats(
+ media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ formats.append({
+ 'format_id': media_data['media-category']['@attributes']['label'],
+ 'url': media['url'],
+ 'tbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(item.get('pubDate'), ' '),
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index f68dc3236..62ed0c918 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -11,6 +11,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
+ IE_NAME = 'appletrailers'
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
@@ -64,6 +65,12 @@ class AppleTrailersIE(InfoExtractor):
},
]
}, {
+ 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+ 'info_dict': {
+ 'id': 'blackthorn',
+ },
+ 'playlist_mincount': 2,
+ }, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}]
@@ -79,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
- s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+ s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
@@ -96,6 +103,9 @@ class AppleTrailersIE(InfoExtractor):
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json)
+ first_url = trailer_info.get('url')
+ if not first_url:
+ continue
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
@@ -107,7 +117,6 @@ class AppleTrailersIE(InfoExtractor):
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
- first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
@@ -144,3 +153,76 @@ class AppleTrailersIE(InfoExtractor):
'id': movie,
'entries': playlist,
}
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+ IE_NAME = 'appletrailers:section'
+ _SECTIONS = {
+ 'justadded': {
+ 'feed_path': 'just_added',
+ 'title': 'Just Added',
+ },
+ 'exclusive': {
+ 'feed_path': 'exclusive',
+ 'title': 'Exclusive',
+ },
+ 'justhd': {
+ 'feed_path': 'just_hd',
+ 'title': 'Just HD',
+ },
+ 'mostpopular': {
+ 'feed_path': 'most_pop',
+ 'title': 'Most Popular',
+ },
+ 'moviestudios': {
+ 'feed_path': 'studios',
+ 'title': 'Movie Studios',
+ },
+ }
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/#section=justadded',
+ 'info_dict': {
+ 'title': 'Just Added',
+ 'id': 'justadded',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=exclusive',
+ 'info_dict': {
+ 'title': 'Exclusive',
+ 'id': 'exclusive',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=justhd',
+ 'info_dict': {
+ 'title': 'Just HD',
+ 'id': 'justhd',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=mostpopular',
+ 'info_dict': {
+ 'title': 'Most Popular',
+ 'id': 'mostpopular',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=moviestudios',
+ 'info_dict': {
+ 'title': 'Movie Studios',
+ 'id': 'moviestudios',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _real_extract(self, url):
+ section = self._match_id(url)
+ section_data = self._download_json(
+ 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+ section)
+ entries = [
+ self.url_result('http://trailers.apple.com' + e['location'])
+ for e in section_data]
+ return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 2a00da3ee..10301a8ea 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -68,9 +68,13 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_url_info(cls, url):
mobj = re.match(cls._VALID_URL, url)
lang = mobj.group('lang')
- # This is not a real id, it can be for example AJT for the news
- # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
- video_id = mobj.group('id')
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ if 'vid' in query:
+ video_id = query['vid'][0]
+ else:
+ # This is not a real id, it can be for example AJT for the news
+ # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+ video_id = mobj.group('id')
return video_id, lang
def _real_extract(self, url):
@@ -79,9 +83,15 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
+ patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
+ ids = (video_id, '')
+ # some pages contain multiple videos (like
+ # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
+ # so we first try to look for json URLs that contain the video id from
+ # the 'vid' parameter.
+ patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
json_url = self._html_search_regex(
- [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
- webpage, 'json vp url', default=None)
+ patterns, webpage, 'json vp url', default=None)
if not json_url:
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index 50e47ba0a..7ac3044c7 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -2,6 +2,8 @@ from __future__ import unicode_literals
import time
import hmac
+import hashlib
+import re
from .common import InfoExtractor
from ..compat import (
@@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$',
},
+ 'skip': 'This video is only available for registered users'
+ },
+ {
+ 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
+ 'md5': '0d0e918533bbd4b263f2de4d197d4aac',
+ 'info_dict': {
+ 'id': 'capitulo-112-david-bustamante',
+ 'ext': 'flv',
+ 'title': 'David Bustamante',
+ 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
+ 'duration': 1439.0,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
+ _ERRORS = {
+ 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
+ 'DELETED': 'This video has expired and is no longer available for online streaming.',
+ 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
+ # 'PREMIUM': 'PREMIUM',
+ }
+
def _real_initialize(self):
self._login()
@@ -83,58 +105,81 @@ class AtresPlayerIE(InfoExtractor):
episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')
+ request = sanitized_Request(
+ self._PLAYER_URL_TEMPLATE % episode_id,
+ headers={'User-Agent': self._USER_AGENT})
+ player = self._download_json(request, episode_id, 'Downloading player JSON')
+
+ episode_type = player.get('typeOfEpisode')
+ error_message = self._ERRORS.get(episode_type)
+ if error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
+ formats = []
+ video_url = player.get('urlVideo')
+ if video_url:
+ format_info = {
+ 'url': video_url,
+ 'format_id': 'http',
+ }
+ mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
+ if mobj:
+ format_info.update({
+ 'width': int_or_none(mobj.group('width')),
+ 'height': int_or_none(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('bitrate')),
+ })
+ formats.append(format_info)
+
+ m3u8_url = player.get('urlVideoHls')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
- (episode_id + timestamp_shifted).encode('utf-8')
+ (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()
- formats = []
- for fmt in ['windows', 'android_tablet']:
- request = sanitized_Request(
- self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
- request.add_header('User-Agent', self._USER_AGENT)
-
- fmt_json = self._download_json(
- request, video_id, 'Downloading %s video JSON' % fmt)
-
- result = fmt_json.get('resultDes')
- if result.lower() != 'ok':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, result), expected=True)
-
- for format_id, video_url in fmt_json['resultObject'].items():
- if format_id == 'token' or not video_url.startswith('http'):
- continue
- if video_url.endswith('/Manifest'):
- if 'geodeswowsmpra3player' in video_url:
- f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
- f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
- # this videos are protected by DRM, the f4m downloader doesn't support them
- continue
- else:
- f4m_url = video_url[:-9] + '/manifest.f4m'
- formats.extend(self._extract_f4m_formats(f4m_url, video_id))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': 'android-%s' % format_id,
- 'preference': 1,
- })
- self._sort_formats(formats)
+ request = sanitized_Request(
+ self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
+ headers={'User-Agent': self._USER_AGENT})
- player = self._download_json(
- self._PLAYER_URL_TEMPLATE % episode_id,
- episode_id)
+ fmt_json = self._download_json(
+ request, video_id, 'Downloading windows video JSON')
+
+ result = fmt_json.get('resultDes')
+ if result.lower() != 'ok':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, result), expected=True)
+
+ for format_id, video_url in fmt_json['resultObject'].items():
+ if format_id == 'token' or not video_url.startswith('http'):
+ continue
+ if 'geodeswowsmpra3player' in video_url:
+ f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
+ f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
+ # this videos are protected by DRM, the f4m downloader doesn't support them
+ continue
+ else:
+ f4m_url = video_url[:-9] + '/manifest.f4m'
+ f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ self._sort_formats(formats)
path_data = player.get('pathData')
episode = self._download_xml(
- self._EPISODE_URL_TEMPLATE % path_data,
- video_id, 'Downloading episode XML')
+ self._EPISODE_URL_TEMPLATE % path_data, video_id,
+ 'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py
index b0b089dee..4382a302b 100644
--- a/youtube_dl/extractor/audimedia.py
+++ b/youtube_dl/extractor/audimedia.py
@@ -15,7 +15,7 @@ class AudiMediaIE(InfoExtractor):
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
- 'id': '1564',
+ 'id': '1565',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 693ba22c6..3eed91279 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -56,7 +56,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
- raise ExtractorError('Invalid url %s', url)
+ raise ExtractorError('Invalid url %s' % url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py
new file mode 100644
index 000000000..38bda3af5
--- /dev/null
+++ b/youtube_dl/extractor/bleacherreport.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .amp import AMPIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class BleacherReportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
+ 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
+ 'info_dict': {
+ 'id': '2496438',
+ 'ext': 'mp4',
+ 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
+ 'uploader_id': 3992341,
+ 'description': 'CFB, ACC, Florida State',
+ 'timestamp': 1434380212,
+ 'upload_date': '20150615',
+ 'uploader': 'Team Stream Now ',
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
+ 'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+ 'info_dict': {
+ 'id': '2586817',
+ 'ext': 'mp4',
+ 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
+ 'timestamp': 1446839961,
+ 'uploader': 'Sean Fay',
+ 'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
+ 'uploader_id': 6466954,
+ 'upload_date': '20151011',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
+
+ thumbnails = []
+ primary_photo = article_data.get('primaryPhoto')
+ if primary_photo:
+ thumbnails = [{
+ 'url': primary_photo['url'],
+ 'width': primary_photo.get('width'),
+ 'height': primary_photo.get('height'),
+ }]
+
+ info = {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'title': article_data['title'],
+ 'uploader': article_data.get('author', {}).get('name'),
+ 'uploader_id': article_data.get('authorId'),
+ 'timestamp': parse_iso8601(article_data.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'comment_count': int_or_none(article_data.get('commentsCount')),
+ 'view_count': int_or_none(article_data.get('hitCount')),
+ }
+
+ video = article_data.get('video')
+ if video:
+ video_type = video['type']
+ if video_type == 'cms.bleacherreport.com':
+ info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
+ elif video_type == 'ooyala.com':
+ info['url'] = 'ooyala:%s' % video['id']
+ elif video_type == 'youtube.com':
+ info['url'] = video['id']
+ elif video_type == 'vine.co':
+ info['url'] = 'https://vine.co/v/%s' % video['id']
+ else:
+ info['url'] = video_type + video['id']
+ return info
+ else:
+ raise ExtractorError('no video in the article', expected=True)
+
+
+class BleacherReportCMSIE(AMPIE):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'md5': '8c2c12e3af7805152675446c905d159b',
+ 'info_dict': {
+ 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'ext': 'flv',
+ 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
+ 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
+ info['id'] = video_id
+ return info
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
deleted file mode 100644
index 35375f7b1..000000000
--- a/youtube_dl/extractor/bliptv.py
+++ /dev/null
@@ -1,290 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-from ..compat import compat_urlparse
-from ..utils import (
- clean_html,
- int_or_none,
- parse_iso8601,
- sanitized_Request,
- unescapeHTML,
- xpath_text,
- xpath_with_ns,
-)
-
-
-class BlipTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
-
- _TESTS = [
- {
- 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
- 'md5': '80baf1ec5c3d2019037c1c707d676b9f',
- 'info_dict': {
- 'id': '5779306',
- 'ext': 'm4v',
- 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
- 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
- 'timestamp': 1323138843,
- 'upload_date': '20111206',
- 'uploader': 'cbr',
- 'uploader_id': '679425',
- 'duration': 81,
- }
- },
- {
- # https://github.com/rg3/youtube-dl/pull/2274
- 'note': 'Video with subtitles',
- 'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
- 'md5': '309f9d25b820b086ca163ffac8031806',
- 'info_dict': {
- 'id': '6586561',
- 'ext': 'mp4',
- 'title': 'Red vs. Blue Season 11 Episode 1',
- 'description': 'One-Zero-One',
- 'timestamp': 1371261608,
- 'upload_date': '20130615',
- 'uploader': 'redvsblue',
- 'uploader_id': '792887',
- 'duration': 279,
- }
- },
- {
- # https://bugzilla.redhat.com/show_bug.cgi?id=967465
- 'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
- 'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
- 'info_dict': {
- 'id': '6573122',
- 'ext': 'mov',
- 'upload_date': '20130520',
- 'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
- 'title': 'Red vs. Blue Season 11 Trailer',
- 'timestamp': 1369029609,
- 'uploader': 'redvsblue',
- 'uploader_id': '792887',
- }
- },
- {
- 'url': 'http://blip.tv/play/gbk766dkj4Yn',
- 'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
- 'info_dict': {
- 'id': '1749452',
- 'ext': 'mp4',
- 'upload_date': '20090208',
- 'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
- 'title': 'Nostalgia Critic: Transformers',
- 'timestamp': 1234068723,
- 'uploader': 'NostalgiaCritic',
- 'uploader_id': '246467',
- }
- },
- {
- # https://github.com/rg3/youtube-dl/pull/4404
- 'note': 'Audio only',
- 'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
- 'md5': '76c0a56f24e769ceaab21fbb6416a351',
- 'info_dict': {
- 'id': '7103299',
- 'ext': 'flv',
- 'title': 'Weekly Manga Recap: Kingdom',
- 'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
- 'timestamp': 1417660321,
- 'upload_date': '20141204',
- 'uploader': 'The Rollo T',
- 'uploader_id': '407429',
- 'duration': 7251,
- 'vcodec': 'none',
- }
- },
- {
- # missing duration
- 'url': 'http://blip.tv/rss/flash/6700880',
- 'info_dict': {
- 'id': '6684191',
- 'ext': 'm4v',
- 'title': 'Cowboy Bebop: Gateway Shuffle Review',
- 'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
- 'timestamp': 1386639757,
- 'upload_date': '20131210',
- 'uploader': 'sfdebris',
- 'uploader_id': '706520',
- }
- }
- ]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
- if mobj:
- return 'http://blip.tv/a/a-' + mobj.group(1)
- mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
- if mobj:
- return mobj.group(1)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- lookup_id = mobj.group('lookup_id')
-
- # See https://github.com/rg3/youtube-dl/issues/857 and
- # https://github.com/rg3/youtube-dl/issues/4197
- if lookup_id:
- urlh = self._request_webpage(
- 'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
- url = compat_urlparse.urlparse(urlh.geturl())
- qs = compat_urlparse.parse_qs(url.query)
- mobj = re.match(self._VALID_URL, qs['file'][0])
-
- video_id = mobj.group('id')
-
- rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
-
- def _x(p):
- return xpath_with_ns(p, {
- 'blip': 'http://blip.tv/dtd/blip/1.0',
- 'media': 'http://search.yahoo.com/mrss/',
- 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
- })
-
- item = rss.find('channel/item')
-
- video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
- title = xpath_text(item, 'title', 'title', fatal=True)
- description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
- timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
- uploader = xpath_text(item, _x('blip:user'), 'uploader')
- uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
- duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
- media_thumbnail = item.find(_x('media:thumbnail'))
- thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
- else xpath_text(item, 'image', 'thumbnail'))
- categories = [category.text for category in item.findall('category') if category is not None]
-
- formats = []
- subtitles_urls = {}
-
- media_group = item.find(_x('media:group'))
- for media_content in media_group.findall(_x('media:content')):
- url = media_content.get('url')
- role = media_content.get(_x('blip:role'))
- msg = self._download_webpage(
- url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
- video_id, 'Resolving URL for %s' % role)
- real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
-
- media_type = media_content.get('type')
- if media_type == 'text/srt' or url.endswith('.srt'):
- LANGS = {
- 'english': 'en',
- }
- lang = role.rpartition('-')[-1].strip().lower()
- langcode = LANGS.get(lang, lang)
- subtitles_urls[langcode] = url
- elif media_type.startswith('video/'):
- formats.append({
- 'url': real_url,
- 'format_id': role,
- 'format_note': media_type,
- 'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
- 'acodec': media_content.get(_x('blip:acodec')),
- 'filesize': media_content.get('filesize'),
- 'width': int_or_none(media_content.get('width')),
- 'height': int_or_none(media_content.get('height')),
- })
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
-
- subtitles = self.extract_subtitles(video_id, subtitles_urls)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'categories': categories,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _get_subtitles(self, video_id, subtitles_urls):
- subtitles = {}
- for lang, url in subtitles_urls.items():
- # For some weird reason, blip.tv serves a video instead of subtitles
- # when we request with a common UA
- req = sanitized_Request(url)
- req.add_header('User-Agent', 'youtube-dl')
- subtitles[lang] = [{
- # The extension is 'srt' but it's actually an 'ass' file
- 'ext': 'ass',
- 'data': self._download_webpage(req, None, note=False),
- }]
- return subtitles
-
-
-class BlipTVUserIE(InfoExtractor):
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
- _PAGE_SIZE = 12
- IE_NAME = 'blip.tv:user'
- _TEST = {
- 'url': 'http://blip.tv/actone',
- 'info_dict': {
- 'id': 'actone',
- 'title': 'Act One: The Series',
- },
- 'playlist_count': 5,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- username = mobj.group(1)
-
- page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
-
- page = self._download_webpage(url, username, 'Downloading user page')
- mobj = re.search(r'data-users-id="([^"]+)"', page)
- page_base = page_base % mobj.group(1)
- title = self._og_search_title(page)
-
- # Download video ids using BlipTV Ajax calls. Result size per
- # query is limited (currently to 12 videos) so we need to query
- # page by page until there are no video ids - it means we got
- # all of them.
-
- video_ids = []
- pagenum = 1
-
- while True:
- url = page_base + "&page=" + str(pagenum)
- page = self._download_webpage(
- url, username, 'Downloading video ids from page %d' % pagenum)
-
- # Extract video identifiers
- ids_in_page = []
-
- for mobj in re.finditer(r'href="/([^"]+)"', page):
- if mobj.group(1) not in ids_in_page:
- ids_in_page.append(unescapeHTML(mobj.group(1)))
-
- video_ids.extend(ids_in_page)
-
- # A little optimization - if current page is not
- # "full", ie. does not contain PAGE_SIZE video ids then
- # we can assume that this page is the last one - there
- # are no more ids on further pages - no need to query
- # again.
-
- if len(ids_in_page) < self._PAGE_SIZE:
- break
-
- pagenum += 1
-
- urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
- url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
- return self.playlist_result(
- url_entries, playlist_title=title, playlist_id=username)
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py
index 66e394e10..e66854538 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -1,18 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
+ xpath_element,
+ xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
- _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
- _BASE_URL = 'http://www.br.de'
+ _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
{
@@ -22,7 +25,7 @@ class BRIE(InfoExtractor):
'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4',
'title': 'Die böse Überraschung',
- 'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
+ 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
'duration': 180,
'uploader': 'Reinhard Weber',
'upload_date': '20150422',
@@ -30,23 +33,23 @@ class BRIE(InfoExtractor):
},
{
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
- 'md5': 'a44396d73ab6a68a69a568fae10705bb',
+ 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
'info_dict': {
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Manfred Schreiber ist tot',
- 'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
+ 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
'duration': 26,
}
},
{
- 'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
+ 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
'info_dict': {
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac',
'title': 'Kurzweilig und sehr bewegend',
- 'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
+ 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
'duration': 296,
}
},
@@ -57,7 +60,7 @@ class BRIE(InfoExtractor):
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
- 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+ 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
'duration': 116,
}
},
@@ -68,7 +71,7 @@ class BRIE(InfoExtractor):
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
- 'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+ 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893,
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
@@ -77,28 +80,31 @@ class BRIE(InfoExtractor):
]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ base_url, display_id = re.search(self._VALID_URL, url).groups()
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
- xml = self._download_xml(self._BASE_URL + xml_url, None)
+ xml = self._download_xml(base_url + xml_url, display_id)
medias = []
for xml_media in xml.findall('video') + xml.findall('audio'):
+ media_id = xml_media.get('externalId')
media = {
- 'id': xml_media.get('externalId'),
- 'title': xml_media.find('title').text,
- 'duration': parse_duration(xml_media.find('duration').text),
- 'formats': self._extract_formats(xml_media.find('assets')),
- 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
- 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
- 'webpage_url': xml_media.find('permalink').text
+ 'id': media_id,
+ 'title': xpath_text(xml_media, 'title', 'title', True),
+ 'duration': parse_duration(xpath_text(xml_media, 'duration')),
+ 'formats': self._extract_formats(xpath_element(
+ xml_media, 'assets'), media_id),
+ 'thumbnails': self._extract_thumbnails(xpath_element(
+ xml_media, 'teaserImage/variants'), base_url),
+ 'description': xpath_text(xml_media, 'desc'),
+ 'webpage_url': xpath_text(xml_media, 'permalink'),
+ 'uploader': xpath_text(xml_media, 'author'),
}
- if xml_media.find('author').text:
- media['uploader'] = xml_media.find('author').text
- if xml_media.find('broadcastDate').text:
- media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
+ broadcast_date = xpath_text(xml_media, 'broadcastDate')
+ if broadcast_date:
+ media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
medias.append(media)
if len(medias) > 1:
@@ -109,35 +115,58 @@ class BRIE(InfoExtractor):
raise ExtractorError('No media entries found')
return medias[0]
- def _extract_formats(self, assets):
-
- def text_or_none(asset, tag):
- elem = asset.find(tag)
- return None if elem is None else elem.text
-
- formats = [{
- 'url': text_or_none(asset, 'downloadUrl'),
- 'ext': text_or_none(asset, 'mediaType'),
- 'format_id': asset.get('type'),
- 'width': int_or_none(text_or_none(asset, 'frameWidth')),
- 'height': int_or_none(text_or_none(asset, 'frameHeight')),
- 'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
- 'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
- 'vcodec': text_or_none(asset, 'codecVideo'),
- 'acodec': text_or_none(asset, 'codecAudio'),
- 'container': text_or_none(asset, 'mediaType'),
- 'filesize': int_or_none(text_or_none(asset, 'size')),
- } for asset in assets.findall('asset')
- if asset.find('downloadUrl') is not None]
-
+ def _extract_formats(self, assets, media_id):
+ formats = []
+ for asset in assets.findall('asset'):
+ format_url = xpath_text(asset, ['downloadUrl', 'url'])
+ asset_type = asset.get('type')
+ if asset_type == 'HDS':
+ f4m_formats = self._extract_f4m_formats(
+ format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif asset_type == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ format_info = {
+ 'ext': xpath_text(asset, 'mediaType'),
+ 'width': int_or_none(xpath_text(asset, 'frameWidth')),
+ 'height': int_or_none(xpath_text(asset, 'frameHeight')),
+ 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
+ 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
+ 'vcodec': xpath_text(asset, 'codecVideo'),
+ 'acodec': xpath_text(asset, 'codecAudio'),
+ 'container': xpath_text(asset, 'mediaType'),
+ 'filesize': int_or_none(xpath_text(asset, 'size')),
+ }
+ format_url = self._proto_relative_url(format_url)
+ if format_url:
+ http_format_info = format_info.copy()
+ http_format_info.update({
+ 'url': format_url,
+ 'format_id': 'http-%s' % asset_type,
+ })
+ formats.append(http_format_info)
+ server_prefix = xpath_text(asset, 'serverPrefix')
+ if server_prefix:
+ rtmp_format_info = format_info.copy()
+ rtmp_format_info.update({
+ 'url': server_prefix,
+ 'play_path': xpath_text(asset, 'fileName'),
+ 'format_id': 'rtmp-%s' % asset_type,
+ })
+ formats.append(rtmp_format_info)
self._sort_formats(formats)
return formats
- def _extract_thumbnails(self, variants):
+ def _extract_thumbnails(self, variants, base_url):
thumbnails = [{
- 'url': self._BASE_URL + variant.find('url').text,
- 'width': int_or_none(variant.find('width').text),
- 'height': int_or_none(variant.find('height').text),
- } for variant in variants.findall('variant')]
+ 'url': base_url + xpath_text(variant, 'url'),
+ 'width': int_or_none(xpath_text(variant, 'width')),
+ 'height': int_or_none(xpath_text(variant, 'height')),
+ } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
diff --git a/youtube_dl/extractor/canal13cl.py b/youtube_dl/extractor/canal13cl.py
deleted file mode 100644
index 93241fefe..000000000
--- a/youtube_dl/extractor/canal13cl.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class Canal13clIE(InfoExtractor):
- _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'md5': '4cb1fa38adcad8fea88487a078831755',
- 'info_dict': {
- 'id': '1403022125',
- 'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'ext': 'mp4',
- 'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
- 'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._html_search_meta(
- 'twitter:title', webpage, 'title', fatal=True)
- description = self._html_search_meta(
- 'twitter:description', webpage, 'description')
- url = self._html_search_regex(
- r'articuloVideo = \"(.*?)\"', webpage, 'url')
- real_id = self._search_regex(
- r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
- thumbnail = self._html_search_regex(
- r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
-
- return {
- 'id': real_id,
- 'display_id': display_id,
- 'url': url,
- 'title': title,
- 'description': description,
- 'ext': 'mp4',
- 'thumbnail': thumbnail,
- }
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index fd1770dac..6d9cd8abd 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
-from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE
@@ -34,18 +33,17 @@ class CinemassacreIE(InfoExtractor):
},
},
{
- # blip.tv embedded video
+ # Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
- 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
+ 'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'info_dict': {
- 'id': '4065369',
- 'ext': 'flv',
+ 'id': 'OEVzPCY2T-g',
+ 'ext': 'mp4',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
- 'uploader': 'cinemassacre',
- 'uploader_id': '250778',
- 'timestamp': 1283233867,
- 'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
+ 'uploader': 'Cinemassacre',
+ 'uploader_id': 'JamesNintendoNerd',
+ 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
}
},
{
@@ -89,8 +87,6 @@ class CinemassacreIE(InfoExtractor):
],
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
- playerdata_url = BlipTVIE._extract_url(webpage)
- if not playerdata_url:
raise ExtractorError('Unable to find player data')
video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
index 81f3d7697..2efa200b5 100644
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -1,10 +1,12 @@
# encoding: utf-8
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+)
class ComCarCoffIE(InfoExtractor):
@@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20141127',
'timestamp': 1417107600,
+ 'duration': 1232,
'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
@@ -31,9 +34,10 @@ class ComCarCoffIE(InfoExtractor):
display_id = 'comediansincarsgettingcoffee.com'
webpage = self._download_webpage(url, display_id)
- full_data = json.loads(self._search_regex(
- r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
- webpage, 'full data json'))
+ full_data = self._parse_json(
+ self._search_regex(
+ r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
+ display_id)['videoData']
video_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
@@ -45,12 +49,18 @@ class ComCarCoffIE(InfoExtractor):
formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, ext='mp4')
+ timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
+ video_data.get('pubDate'))
+ duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
+ video_data.get('duration'))
+
return {
'id': video_id,
'display_id': display_id,
'title': video_data['title'],
'description': video_data.get('description'),
- 'timestamp': parse_iso8601(video_data.get('pubDate')),
+ 'timestamp': timestamp,
+ 'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 934da765e..9a94cf361 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -25,6 +25,18 @@ class DaumIE(InfoExtractor):
'duration': 3868,
},
}, {
+ # Test for https://github.com/rg3/youtube-dl/issues/7949
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290',
+ 'md5': 'c92d78bcee4424451f1667f275c1dc97',
+ 'info_dict': {
+ 'id': '73147290',
+ 'ext': 'mp4',
+ 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218',
+ 'description': '싸이 - 나팔바지',
+ 'upload_date': '20151219',
+ 'duration': 232,
+ },
+ }, {
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
'only_matching': True,
}, {
@@ -37,9 +49,11 @@ class DaumIE(InfoExtractor):
video_id = mobj.group('id')
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
+ og_url = self._og_search_url(webpage, default=None) or self._search_regex(
+ r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2',
+ webpage, 'canonical url', group='url')
full_id = self._search_regex(
- r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
- webpage, 'full id')
+ r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index d836c1a6c..60ed438f8 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools
-from .common import InfoExtractor
+from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@@ -12,14 +12,11 @@ from ..compat import (
from ..utils import (
ExtractorError,
clean_html,
- determine_ext,
- int_or_none,
- parse_iso8601,
sanitized_Request,
)
-class DramaFeverBaseIE(InfoExtractor):
+class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
- feed = self._download_json(
- 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
- video_id, 'Downloading episode JSON')['channel']['item']
+ info = self._extract_feed_info(
+ 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
- media_group = feed.get('media-group', {})
-
- formats = []
- for media_content in media_group['media-content']:
- src = media_content.get('@attributes', {}).get('url')
- if not src:
- continue
- ext = determine_ext(src)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id='hds'))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id='hls'))
- else:
- formats.append({
- 'url': src,
- })
- self._sort_formats(formats)
-
- title = media_group.get('media-title')
- description = media_group.get('media-description')
- duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
- thumbnail = self._proto_relative_url(
- media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
- timestamp = parse_iso8601(feed.get('pubDate'), ' ')
-
- subtitles = {}
- for media_subtitle in media_group.get('media-subTitle', []):
- lang = media_subtitle.get('@attributes', {}).get('lang')
- href = media_subtitle.get('@attributes', {}).get('href')
- if not lang or not href:
- continue
- subtitles[lang] = [{
- 'ext': 'ttml',
- 'url': href,
- }]
-
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
- subtitles.setdefault('English', []).append({
+ info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
+ return info
class DramaFeverSeriesIE(DramaFeverBaseIE):
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index d9a868119..6f9b003c2 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -2,6 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+)
class FazIE(InfoExtractor):
@@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
- r'(?:var\s+)?videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
+ r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
- encodings = config.find('ENCODINGS')
+ encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
- encoding = encodings.find(code)
- if encoding is None:
- continue
- encoding_url = encoding.find('FILENAME').text
- formats.append({
- 'url': encoding_url,
- 'format_id': code.lower(),
- 'quality': pref,
- })
+ encoding = xpath_element(encodings, code)
+ if encoding:
+ encoding_url = xpath_text(encoding, 'FILENAME')
+ if encoding_url:
+ formats.append({
+ 'url': encoding_url,
+ 'format_id': code.lower(),
+ 'quality': pref,
+ 'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
+ })
self._sort_formats(formats)
- descr = self._html_search_regex(
- r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
- 'description': descr,
- 'thumbnail': config.find('STILL/STILL_BIG').text,
+ 'description': description.strip() if description else None,
+ 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
+ 'duration': int_or_none(xpath_text(config, 'DURATION')),
}
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py
index 91cd46e76..18f439df9 100644
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -1,67 +1,93 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
- find_xpath_attr,
- sanitized_Request,
+ int_or_none,
+ qualities,
)
class FlickrIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
- 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+ 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': {
'id': '5645318632',
- 'ext': 'mp4',
- "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
- "uploader_id": "forestwander-nature-pictures",
- "title": "Dark Hollow Waterfalls"
+ 'ext': 'mpg',
+ 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
+ 'title': 'Dark Hollow Waterfalls',
+ 'duration': 19,
+ 'timestamp': 1303528740,
+ 'upload_date': '20110423',
+ 'uploader_id': '10922353@N03',
+ 'uploader': 'Forest Wander',
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
}
}
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ _API_BASE_URL = 'https://api.flickr.com/services/rest?'
- video_id = mobj.group('id')
- video_uploader_id = mobj.group('uploader_id')
- webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
- req = sanitized_Request(webpage_url)
- req.add_header(
- 'User-Agent',
- # it needs a more recent version
- 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
- webpage = self._download_webpage(req, video_id)
+ def _call_api(self, method, video_id, api_key, note, secret=None):
+ query = {
+ 'photo_id': video_id,
+ 'method': 'flickr.%s' % method,
+ 'api_key': api_key,
+ 'format': 'json',
+ 'nojsoncallback': 1,
+ }
+ if secret:
+ query['secret'] = secret
+ data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
+ if data['stat'] != 'ok':
+ raise ExtractorError(data['message'])
+ return data
- secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
- first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
- first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
+ api_key = self._download_json(
+ 'https://www.flickr.com/hermes_error_beacon.gne', video_id,
+ 'Downloading api key')['site_key']
- node_id = find_xpath_attr(
- first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
- 'id').text
+ video_info = self._call_api(
+ 'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
+ if video_info['media'] == 'video':
+ streams = self._call_api(
+ 'video.getStreamInfo', video_id, api_key,
+ 'Downloading streams info', video_info['secret'])['streams']
- second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
- second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
+ preference = qualities(
+ ['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
- self.report_extraction(video_id)
+ formats = []
+ for stream in streams['stream']:
+ stream_type = str(stream.get('type'))
+ formats.append({
+ 'format_id': stream_type,
+ 'url': stream['_content'],
+ 'preference': preference(stream_type),
+ })
+ self._sort_formats(formats)
- stream = second_xml.find('.//STREAM')
- if stream is None:
- raise ExtractorError('Unable to extract video url')
- video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
+ owner = video_info.get('owner', {})
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id': video_uploader_id,
- }
+ return {
+ 'id': video_id,
+ 'title': video_info['title']['_content'],
+ 'description': video_info.get('description', {}).get('_content'),
+ 'formats': formats,
+ 'timestamp': int_or_none(video_info.get('dateuploaded')),
+ 'duration': int_or_none(video_info.get('video', {}).get('duration')),
+ 'uploader_id': owner.get('nsid'),
+ 'uploader': owner.get('realname'),
+ 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
+ 'view_count': int_or_none(video_info.get('views')),
+ 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
+ }
+ else:
+ raise ExtractorError('not a video', expected=True)
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index 3a4a59135..318ac013d 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- int_or_none,
-)
+from .amp import AMPIE
-class FoxNewsIE(InfoExtractor):
+class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
- 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+ 'description': '16-year-old girl is size of toddler',
'duration': 265,
- 'timestamp': 1304411491,
- 'upload_date': '20110503',
+ # 'timestamp': 1304411491,
+ # 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
- 'description': "Congressman discusses the president's executive action",
+ 'description': "Congressman discusses president's plan",
'duration': 292,
- 'timestamp': 1417662047,
- 'upload_date': '20141204',
+ # 'timestamp': 1417662047,
+ # 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- host = mobj.group('host')
+ host, video_id = re.match(self._VALID_URL, url).groups()
- video = self._download_json(
- 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
-
- item = video['channel']['item']
- title = item['title']
- description = item['description']
- timestamp = parse_iso8601(item['dc-date'])
-
- media_group = item['media-group']
- duration = None
- formats = []
- for media in media_group['media-content']:
- attributes = media['@attributes']
- video_url = attributes['url']
- if video_url.endswith('.f4m'):
- formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
- elif not video_url.endswith('.smil'):
- duration = int_or_none(attributes.get('duration'))
- formats.append({
- 'url': video_url,
- 'format_id': media['media-category']['@attributes']['label'],
- 'preference': 1,
- 'vbr': int_or_none(attributes.get('bitrate')),
- 'filesize': int_or_none(attributes.get('fileSize'))
- })
- self._sort_formats(formats)
-
- media_thumbnail = media_group['media-thumbnail']['@attributes']
- thumbnails = [{
- 'url': media_thumbnail['url'],
- 'width': int_or_none(media_thumbnail.get('width')),
- 'height': int_or_none(media_thumbnail.get('height')),
- }] if media_thumbnail else []
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
+ info = self._extract_feed_info(
+ 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
+ info['id'] = video_id
+ return info
diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py
index 6613ee17a..fdc51f44f 100644
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -33,7 +30,7 @@ class FranceInterIE(InfoExtractor):
video_url = 'http://www.franceinter.fr/' + path
title = self._html_search_regex(
- r'<span class="title">(.+?)</span>', webpage, 'title')
+ r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
description = self._html_search_regex(
r'<span class="description">(.*?)</span>',
webpage, 'description', fatal=False)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c2e8f9b62..3c3066e38 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -44,7 +44,6 @@ from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
-from .bliptv import BlipTVIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
@@ -55,6 +54,8 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
+from .googledrive import GoogleDriveIE
+from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor):
@@ -1440,11 +1441,6 @@ class GenericIE(InfoExtractor):
'id': match.group('id')
}
- # Look for embedded blip.tv player
- bliptv_url = BlipTVIE._extract_url(webpage)
- if bliptv_url:
- return self.url_result(bliptv_url, 'BlipTV')
-
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:
@@ -1769,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@@ -1796,6 +1797,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
+ # Look for JWPlatform embeds
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
+ if jwplatform_url:
+ return self.url_result(jwplatform_url, 'JWPlatform')
+
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
new file mode 100644
index 000000000..f354c9c7a
--- /dev/null
+++ b/youtube_dl/extractor/googledrive.py
@@ -0,0 +1,88 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class GoogleDriveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
+ _TEST = {
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+ 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+ 'info_dict': {
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'duration': 46,
+ }
+ }
+ _FORMATS_EXT = {
+ '5': 'flv',
+ '6': 'flv',
+ '13': '3gp',
+ '17': '3gp',
+ '18': 'mp4',
+ '22': 'mp4',
+ '34': 'flv',
+ '35': 'flv',
+ '36': '3gp',
+ '37': 'mp4',
+ '38': 'mp4',
+ '43': 'webm',
+ '44': 'webm',
+ '45': 'webm',
+ '46': 'webm',
+ '59': 'mp4',
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+ webpage)
+ if mobj:
+ return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+
+ reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+ if reason:
+ raise ExtractorError(reason)
+
+ title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+ duration = int_or_none(self._search_regex(
+ r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
+ fmt_stream_map = self._search_regex(
+ r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
+ fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
+
+ formats = []
+ for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
+ fmt_id, fmt_url = fmt_stream.split('|')
+ resolution = fmt.split('/')[1]
+ width, height = resolution.split('x')
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': fmt_id,
+ 'resolution': resolution,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ 'ext': self._FORMATS_EXT[fmt_id],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index 70c8ca64e..85e9344aa 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ 'description': 'Imgur: The most awesome images on the Internet.',
},
}, {
'url': 'https://imgur.com/A61SaA1',
@@ -29,8 +29,20 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ 'description': 'Imgur: The most awesome images on the Internet.',
},
+ }, {
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ 'description': 'Imgur: The most awesome images on the Internet.'
+
+ }
+ }, {
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -100,25 +112,38 @@ class ImgurIE(InfoExtractor):
class ImgurAlbumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
- _TEST = {
+ _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
},
'playlist_count': 25,
- }
+ }, {
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
album_id = self._match_id(url)
album_images = self._download_json(
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
- album_id)['data']['images']
-
- entries = [
- self.url_result('http://imgur.com/%s' % image['hash'])
- for image in album_images if image.get('hash')]
-
- return self.playlist_result(entries, album_id)
+ album_id, fatal=False)
+
+ if album_images:
+ data = album_images.get('data')
+ if data and isinstance(data, dict):
+ images = data.get('images')
+ if images and isinstance(images, list):
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'])
+ for image in images if image.get('hash')]
+ return self.playlist_result(entries, album_id)
+
+ # Fallback to single video
+ return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index c158f2064..e5e16ca3b 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -47,7 +47,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
new file mode 100644
index 000000000..cdc095a79
--- /dev/null
+++ b/youtube_dl/extractor/jwplatform.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class JWPlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TEST = {
+ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+ 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+ 'info_dict': {
+ 'id': 'nPripu9l',
+ 'ext': 'mov',
+ 'title': 'Big Buck Bunny Trailer',
+ 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+ 'upload_date': '20081127',
+ 'timestamp': 1227796140,
+ }
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+ video_data = json_data['playlist'][0]
+ subtitles = {}
+ for track in video_data['tracks']:
+ if track['kind'] == 'captions':
+ subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
+
+ formats = []
+ for source in video_data['sources']:
+ source_url = self._proto_relative_url(source['file'])
+ source_type = source.get('type') or ''
+ if source_type == 'application/vnd.apple.mpegurl':
+ m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif source_type.startswith('audio'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
new file mode 100644
index 000000000..3c34d4604
--- /dev/null
+++ b/youtube_dl/extractor/makertv.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MakerTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+ _TEST = {
+ 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
+ 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
+ 'info_dict': {
+ 'id': 'Fh3QgymL9gsc',
+ 'ext': 'mp4',
+ 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
+ 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
+ 'upload_date': '20150918',
+ 'timestamp': 1442549540,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'jwplatform:%s' % jwplatform_id,
+ 'ie_key': 'JWPlatform',
+ }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 744e4a09a..97e8ffc97 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -16,7 +16,7 @@ from ..utils import (
class PBSIE(InfoExtractor):
_STATIONS = (
- (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 63cc764bb..514e9b433 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor):
}]
def _call_api(self, method, value):
- attribute = 'token' if len(value) > 13 else 'broadcast_id'
return self._download_json(
- 'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
+ 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
def _real_extract(self, url):
token = self._match_id(url)
diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py
deleted file mode 100644
index 5da66ca9e..000000000
--- a/youtube_dl/extractor/soompi.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .crunchyroll import CrunchyrollIE
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- ExtractorError,
- int_or_none,
- remove_start,
- xpath_text,
-)
-
-
-class SoompiBaseIE(InfoExtractor):
- def _get_episodes(self, webpage, episode_filter=None):
- episodes = self._parse_json(
- self._search_regex(
- r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
- None)
- return list(filter(episode_filter, episodes))
-
-
-class SoompiIE(SoompiBaseIE, CrunchyrollIE):
- IE_NAME = 'soompi'
- _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://tv.soompi.com/en/watch/29235',
- 'info_dict': {
- 'id': '29235',
- 'ext': 'mp4',
- 'title': 'Episode 1096',
- 'description': '2015-05-20'
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _get_episode(self, webpage, video_id):
- return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
-
- def _get_subtitles(self, config, video_id):
- sub_langs = {}
- for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
- sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
-
- subtitles = {}
- for s in config.findall('./{default}preload/subtitle'):
- lang_code = sub_langs.get(s.attrib['id'])
- if not lang_code:
- continue
- sub_id = s.get('id')
- data = xpath_text(s, './data', 'data')
- iv = xpath_text(s, './iv', 'iv')
- if not id or not iv or not data:
- continue
- subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
- subtitles[lang_code] = self._extract_subtitles(subtitle)
- return subtitles
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- webpage = self._download_webpage(
- url, video_id, 'Downloading episode page')
- except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
- webpage = ee.cause.read()
- block_message = self._html_search_regex(
- r'(?s)<div class="block-message">(.+?)</div>', webpage,
- 'block message', default=None)
- if block_message:
- raise ExtractorError(block_message, expected=True)
- raise
-
- formats = []
- config = None
- for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
- config = self._download_xml(
- 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
- video_id, 'Downloading %s XML' % format_id)
- m3u8_url = xpath_text(
- config, './{default}preload/stream_info/file',
- '%s m3u8 URL' % format_id)
- if not m3u8_url:
- continue
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', m3u8_id=format_id))
- self._sort_formats(formats)
-
- episode = self._get_episode(webpage, video_id)
-
- title = episode['name']
- description = episode.get('description')
- duration = int_or_none(episode.get('duration'))
-
- thumbnails = [{
- 'id': thumbnail_id,
- 'url': thumbnail_url,
- } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
-
- subtitles = self.extract_subtitles(config, video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles
- }
-
-
-class SoompiShowIE(SoompiBaseIE):
- IE_NAME = 'soompi:show'
- _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
- _TESTS = [{
- 'url': 'http://tv.soompi.com/en/shows/liar-game',
- 'info_dict': {
- 'id': 'liar-game',
- 'title': 'Liar Game',
- 'description': 'md5:52c02bce0c1a622a95823591d0589b66',
- },
- 'playlist_count': 14,
- }]
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, show_id, 'Downloading show page')
-
- title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
- description = self._og_search_description(webpage)
-
- entries = [
- self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
- for episode in self._get_episodes(webpage)]
-
- return self.playlist_result(entries, show_id, title, description)
diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py
new file mode 100644
index 000000000..a363b4d40
--- /dev/null
+++ b/youtube_dl/extractor/tele13.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ js_to_json,
+ qualities,
+ determine_ext,
+)
+
+
+class Tele13IE(InfoExtractor):
+ _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'md5': '4cb1fa38adcad8fea88487a078831755',
+ 'info_dict': {
+ 'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'ext': 'mp4',
+ 'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
+ },
+ 'params': {
+ # HTTP Error 404: Not Found
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
+ 'md5': '867adf6a3b3fef932c68a71d70b70946',
+ 'info_dict': {
+ 'id': 'rOoKv2OMpOw',
+ 'ext': 'mp4',
+ 'title': 'Shooting star seen on 7-Sep-2015',
+ 'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
+ 'uploader': 'Porjai Jaturongkhakun',
+ 'upload_date': '20150906',
+ 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
+ },
+ 'add_ie': ['Youtube'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
+ sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
+
+ preference = qualities(['Móvil', 'SD', 'HD'])
+ formats = []
+ urls = []
+ for f in sources:
+ format_url = f['file']
+ if format_url and format_url not in urls:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif YoutubeIE.suitable(format_url):
+ return self.url_result(format_url, 'Youtube')
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': f.get('label'),
+ 'preference': preference(f.get('label')),
+ 'ext': ext,
+ })
+ urls.append(format_url)
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
+ 'description': self._html_search_meta('description', webpage, 'description'),
+ 'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index c1ee1decc..e03e2dbaa 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -5,6 +5,8 @@ from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
+ xpath_attr,
+ xpath_element,
)
@@ -15,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.24video.net/video/view/1044982',
- 'md5': 'd041af8b5b4246ea466226a0d6693345',
+ 'md5': 'e09fc0901d9eaeedac872f154931deeb',
'info_dict': {
'id': '1044982',
'ext': 'mp4',
@@ -64,33 +66,24 @@ class TwentyFourVideoIE(InfoExtractor):
r'<div class="comments-title" id="comments-count">(\d+) комментари',
webpage, 'comment count', fatal=False))
- formats = []
+ # Sets some cookies
+ self._download_xml(
+ r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
+ video_id, 'Downloading init XML')
- pc_video = self._download_xml(
+ video_xml = self._download_xml(
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
- video_id, 'Downloading PC video URL').find('.//video')
+ video_id, 'Downloading video XML')
- formats.append({
- 'url': pc_video.attrib['url'],
- 'format_id': 'pc',
- 'quality': 1,
- })
+ video = xpath_element(video_xml, './/video', 'video', fatal=True)
- like_count = int_or_none(pc_video.get('ratingPlus'))
- dislike_count = int_or_none(pc_video.get('ratingMinus'))
- age_limit = 18 if pc_video.get('adult') == 'true' else 0
+ formats = [{
+ 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
+ }]
- mobile_video = self._download_xml(
- 'http://www.24video.net/video/xml/%s' % video_id,
- video_id, 'Downloading mobile video URL').find('.//video')
-
- formats.append({
- 'url': mobile_video.attrib['url'],
- 'format_id': 'mobile',
- 'quality': 0,
- })
-
- self._sort_formats(formats)
+ like_count = int_or_none(video.get('ratingPlus'))
+ dislike_count = int_or_none(video.get('ratingMinus'))
+ age_limit = 18 if video.get('adult') == 'true' else 0
return {
'id': video_id,
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index f38a72fde..811ee197d 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -4,26 +4,48 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from .xstream import XstreamIE
from ..utils import (
ExtractorError,
float_or_none,
)
-class VGTVIE(InfoExtractor):
- IE_DESC = 'VGTV and BTTV'
+class VGTVIE(XstreamIE):
+ IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+
+ _HOST_TO_APPNAME = {
+ 'vgtv.no': 'vgtv',
+ 'bt.no/tv': 'bttv',
+ 'aftenbladet.no/tv': 'satv',
+ 'fvn.no/fvntv': 'fvntv',
+ 'aftenposten.no/webtv': 'aptv',
+ }
+
+ _APP_NAME_TO_VENDOR = {
+ 'vgtv': 'vgtv',
+ 'bttv': 'bt',
+ 'satv': 'sa',
+ 'fvntv': 'fvn',
+ 'aptv': 'ap',
+ }
+
_VALID_URL = r'''(?x)
- (?:
- vgtv:|
- http://(?:www\.)?
+ (?:https?://(?:www\.)?
+ (?P<host>
+ %s
)
- (?P<host>vgtv|bt)
+ /
(?:
- :|
- \.no/(?:tv/)?\#!/(?:video|live)/
- )
- (?P<id>[0-9]+)
- '''
+ \#!/(?:video|live)/|
+ embed?.*id=
+ )|
+ (?P<appname>
+ %s
+ ):)
+ (?P<id>\d+)
+ ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
+
_TESTS = [
{
# streamType: vod
@@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'Video is no longer available',
},
{
- # streamType: live
+ # streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': {
'id': '113063',
- 'ext': 'flv',
- 'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'ext': 'mp4',
+ 'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 0,
+ 'duration': 25966,
'timestamp': 1432975582,
'upload_date': '20150530',
'view_count': int,
@@ -80,6 +103,20 @@ class VGTVIE(InfoExtractor):
},
},
{
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mov',
+ 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'duration': 66,
+ 'timestamp': 1417002452,
+ 'upload_date': '20141126',
+ 'view_count': int,
+ }
+ },
+ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
},
@@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
-
- HOST_WEBSITES = {
- 'vgtv': 'vgtv',
- 'bt': 'bttv',
- }
+ appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
+ vendor = self._APP_NAME_TO_VENDOR[appname]
data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
- % (host, video_id, HOST_WEBSITES[host]),
+ % (vendor, video_id, appname),
video_id, 'Downloading media JSON')
if data.get('status') == 'inactive':
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
+ info = {
+ 'formats': [],
+ }
+ if len(video_id) == 5:
+ if appname == 'bttv':
+ info = self._extract_video_info('btno', video_id)
+ elif appname == 'aptv':
+ info = self._extract_video_info('ap', video_id)
+
streams = data['streamUrls']
stream_type = data.get('streamType')
@@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
hls_url = streams.get('hls')
if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, video_id, 'mp4', m3u8_id='hls'))
+ m3u8_formats = self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
hds_url = streams.get('hds')
# wasLive hds are always 404
if hds_url and stream_type != 'wasLive':
- formats.extend(self._extract_f4m_formats(
- hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
- video_id, f4m_id='hds'))
+ f4m_formats = self._extract_f4m_formats(
+ hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4')
if mp4_url:
- _url = hls_url or hds_url
- MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
- for mp4_format in _url.split(','):
- m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
- if not m:
- continue
- width = int(m.group('width'))
- height = int(m.group('height'))
- vbr = int(m.group('vbr'))
- formats.append({
- 'url': MP4_URL_TEMPLATE % mp4_format,
- 'format_id': 'mp4-%s' % vbr,
- 'width': width,
- 'height': height,
- 'vbr': vbr,
- 'preference': 1,
+ mp4_urls.append(mp4_url)
+ for mp4_url in mp4_urls:
+ format_info = {
+ 'url': mp4_url,
+ }
+ mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
+ if mobj:
+ tbr = int(mobj.group(3))
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr,
+ 'format_id': 'mp4-%s' % tbr,
})
- self._sort_formats(formats)
+ formats.append(format_info)
+
+ info['formats'].extend(formats)
+
+ self._sort_formats(info['formats'])
- return {
+ info.update({
'id': video_id,
- 'title': self._live_title(data['title']),
+ 'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
- 'formats': formats,
'is_live': True if stream_type == 'live' else False,
- }
+ })
+ return info
class BTArticleIE(InfoExtractor):
@@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
- 'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
+ 'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': {
'id': '23199',
'ext': 'mp4',
@@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex(
- r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
- return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
+ r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
+ return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': {
@@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209,
'upload_date': '20150501',
},
- }
+ 'skip': '404 Error',
+ }, {
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
+ 'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
+ 'info_dict': {
+ 'id': '86255',
+ 'ext': 'mov',
+ 'title': 'Du må tåle å fryse og være sulten',
+ 'description': 'md5:b8046f4d022d5830ddab04865791d063',
+ 'upload_date': '20150321',
+ 'timestamp': 1426942023,
+ },
+ }]
def _real_extract(self, url):
- return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
+ return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index a63c23617..ca3f20a3d 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor):
_token = None
+ _ERRORS = {
+ 'geo': 'Sorry, this content is not available in your region.',
+ 'upcoming': 'Sorry, this content is not yet available.',
+ # 'paywall': 'paywall',
+ }
+
def _prepare_call(self, path, timestamp=None, post_data=None):
path += '?' if '?' not in path else '&'
if not timestamp:
@@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error),
expected=True)
+ def _check_errors(self, data):
+ for reason, status in data.get('blocking', {}).items():
+ if status and reason in self._ERRORS:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, self._ERRORS[reason]), expected=True)
+
def _real_initialize(self):
self._login()
@@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE):
'timestamp': 1321985454,
'description': 'md5:44b1e46619df3a072294645c770cef36',
'title': 'Love In Magic',
+ 'age_limit': 13,
},
}]
@@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE):
video = self._call_api(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+ self._check_errors(video)
+
title = self.dict_selection(video.get('titles', {}), 'en')
if not title:
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
@@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE):
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
if format_id == 'm3u8':
- formats = self._extract_m3u8_formats(
- format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+ m3u8_formats = self._extract_m3u8_formats(
+ format_dict['url'], video_id, 'mp4', 'm3u8_native',
+ m3u8_id='m3u8-%s' % protocol, fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
else:
formats.append({
'url': format_dict['url'],
@@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE):
'containers/%s.json' % channel_id, channel_id,
'Downloading channel JSON')
+ self._check_errors(channel)
+
title = self.dict_selection(channel['titles'], 'en')
description = self.dict_selection(channel['descriptions'], 'en')
diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py
index 71584c291..76c91bd92 100644
--- a/youtube_dl/extractor/xstream.py
+++ b/youtube_dl/extractor/xstream.py
@@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- partner_id = mobj.group('partner_id')
- video_id = mobj.group('id')
-
+ def _extract_video_info(self, partner_id, video_id):
data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id),
@@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
+ 'preference': 1,
})
thumbnails = [{
@@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
'formats': formats,
'thumbnails': thumbnails,
}
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ return self._extract_video_info(partner_id, video_id)