aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py17
-rw-r--r--youtube_dl/extractor/adultswim.py44
-rw-r--r--youtube_dl/extractor/aftenposten.py11
-rw-r--r--youtube_dl/extractor/ard.py3
-rw-r--r--youtube_dl/extractor/arte.py1
-rw-r--r--youtube_dl/extractor/beatportpro.py103
-rw-r--r--youtube_dl/extractor/breakcom.py2
-rw-r--r--youtube_dl/extractor/cloudy.py3
-rw-r--r--youtube_dl/extractor/common.py12
-rw-r--r--youtube_dl/extractor/dailymotion.py10
-rw-r--r--youtube_dl/extractor/eagleplatform.py98
-rw-r--r--youtube_dl/extractor/eighttracks.py14
-rw-r--r--youtube_dl/extractor/footyroom.py41
-rw-r--r--youtube_dl/extractor/funnyordie.py9
-rw-r--r--youtube_dl/extractor/gazeta.py38
-rw-r--r--youtube_dl/extractor/generic.py57
-rw-r--r--youtube_dl/extractor/globo.py2
-rw-r--r--youtube_dl/extractor/jeuxvideo.py36
-rw-r--r--youtube_dl/extractor/kanalplay.py96
-rw-r--r--youtube_dl/extractor/letv.py9
-rw-r--r--youtube_dl/extractor/livestream.py41
-rw-r--r--youtube_dl/extractor/niconico.py5
-rw-r--r--youtube_dl/extractor/npo.py4
-rw-r--r--youtube_dl/extractor/nrk.py7
-rw-r--r--youtube_dl/extractor/orf.py94
-rw-r--r--youtube_dl/extractor/pladform.py90
-rw-r--r--youtube_dl/extractor/primesharetv.py69
-rw-r--r--youtube_dl/extractor/rtve.py41
-rw-r--r--youtube_dl/extractor/ssa.py58
-rw-r--r--youtube_dl/extractor/teamcoco.py6
-rw-r--r--youtube_dl/extractor/twitch.py24
-rw-r--r--youtube_dl/extractor/vidme.py15
-rw-r--r--youtube_dl/extractor/viewster.py129
-rw-r--r--youtube_dl/extractor/vimeo.py76
-rw-r--r--youtube_dl/extractor/yam.py37
-rw-r--r--youtube_dl/extractor/yandexmusic.py127
-rw-r--r--youtube_dl/extractor/youtube.py2
37 files changed, 1284 insertions, 147 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 5ca534cdf..bceed92e1 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
@@ -116,6 +117,7 @@ from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
+from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .ehow import EHowIE
@@ -150,6 +152,7 @@ from .fktv import (
)
from .flickr import FlickrIE
from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
@@ -174,6 +177,7 @@ from .gameone import (
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .giantbomb import GiantBombIE
@@ -228,6 +232,7 @@ from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
@@ -354,6 +359,7 @@ from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
+ ORFIPTVIE,
)
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
@@ -361,6 +367,7 @@ from .pbs import PBSIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
from .playvid import PlayvidIE
@@ -373,6 +380,7 @@ from .pornhub import (
)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
@@ -398,7 +406,7 @@ from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
@@ -456,6 +464,7 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
@@ -551,6 +560,7 @@ from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@@ -607,6 +617,11 @@ from .yahoo import (
YahooSearchIE,
)
from .yam import YamIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+)
from .yesjapan import YesJapanIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 34b8b0115..39335b827 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -2,13 +2,12 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- xpath_text,
float_or_none,
+ xpath_text,
)
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
+ }, {
+ 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+ 'playlist': [
+ {
+ 'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+ 'ext': 'flv',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
+ }
+ ],
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
}]
@staticmethod
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
+ return None, None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page.
- bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
- try:
- bootstrappedData = json.loads(bootstrappedDataJS)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % episode_path
- raise ExtractorError(errmsg, cause=ve)
+ bootstrapped_data = self._parse_json(self._search_regex(
+ r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
- collections = bootstrappedData['playlists']['collections']
+ collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
- collections = bootstrappedData['show']['collections']
+ collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
- show = bootstrappedData['show']
+ # Video wasn't found in the collections, let's try `slugged_video`.
+ if video_info is None:
+ if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+ video_info = bootstrapped_data['slugged_video']
+ else:
+ raise ExtractorError('Unable to find video info')
+
+ show = bootstrapped_data['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py
index 2b257ede7..e15c015fb 100644
--- a/youtube_dl/extractor/aftenposten.py
+++ b/youtube_dl/extractor/aftenposten.py
@@ -14,10 +14,10 @@ from ..utils import (
class AftenpostenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+ _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
}
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._html_search_regex(
- r'data-xs-id="(\d+)"', webpage, 'video id')
+ video_id = self._match_id(url)
data = self._download_xml(
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 783b53e23..6a35ea463 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+ if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
+ raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+
if re.search(r'[\?&]rss($|[=&])', url):
doc = parse_xml(webpage)
if doc.tag == 'rss':
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 929dd3cc5..8273bd6c9 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
formats.append(format)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py
new file mode 100644
index 000000000..3c7775d3e
--- /dev/null
+++ b/youtube_dl/extractor/beatportpro.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportProIE(InfoExtractor):
+ _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+ 'md5': 'b3c34d8639a2f6a7f734382358478887',
+ 'info_dict': {
+ 'id': '5379371',
+ 'display_id': 'synesthesia-original-mix',
+ 'ext': 'mp4',
+ 'title': 'Froxic - Synesthesia (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+ 'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+ 'info_dict': {
+ 'id': '3756896',
+ 'display_id': 'love-and-war-original-mix',
+ 'ext': 'mp3',
+ 'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+ 'md5': 'a1fd8e8046de3950fd039304c186c05f',
+ 'info_dict': {
+ 'id': '4991738',
+ 'display_id': 'birds-original-mix',
+ 'ext': 'mp4',
+ 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playables = self._parse_json(
+ self._search_regex(
+ r'window\.Playables\s*=\s*({.+?});', webpage,
+ 'playables info', flags=re.DOTALL),
+ track_id)
+
+ track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+ title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+ if track['mix']:
+ title += ' (' + track['mix'] + ')'
+
+ formats = []
+ for ext, info in track['preview'].items():
+ if not info['url']:
+ continue
+ fmt = {
+ 'url': info['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'vcodec': 'none',
+ }
+ if ext == 'mp3':
+ fmt['preference'] = 0
+ fmt['acodec'] = 'mp3'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ elif ext == 'mp4':
+ fmt['preference'] = 1
+ fmt['acodec'] = 'aac'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ images = []
+ for name, info in track['images'].items():
+ image_url = info.get('url')
+ if name == 'dynamic' or not image_url:
+ continue
+ image = {
+ 'id': name,
+ 'url': image_url,
+ 'height': int_or_none(info.get('height')),
+ 'width': int_or_none(info.get('width')),
+ }
+ images.append(image)
+
+ return {
+ 'id': compat_str(track.get('id')) or track_id,
+ 'display_id': track.get('slug') or display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': images,
+ }
diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py
index 4bcc897c9..809287d14 100644
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
'tbr': media['bitRate'],
'width': media['width'],
'height': media['height'],
- } for media in info['media']]
+ } for media in info['media'] if media.get('mediaPurpose') == 'play']
if not formats:
formats.append({
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
index abf8cc280..0fa720ee8 100644
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
file_key = self._search_regex(
- r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+ [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
+ webpage, 'file_key')
return self._extract_video(video_host, video_id, file_key)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cf39c0c21..e5245ec3f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -839,7 +839,7 @@ class InfoExtractor(object):
m3u8_id=None):
formats = [{
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
@@ -883,8 +883,13 @@ class InfoExtractor(object):
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ last_media_name = last_media.get('NAME') if last_media else None
+ format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
f = {
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
+ 'format_id': '-'.join(format_id),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,
@@ -1057,6 +1062,9 @@ class InfoExtractor(object):
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ def _subtitles_timecode(self, seconds):
+ return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 42b20a46d..4f67c3aac 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -46,13 +46,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
_TESTS = [
{
- 'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
- 'md5': '392c4b85a60a90dc4792da41ce3144eb',
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
'info_dict': {
- 'id': 'x33vw9',
+ 'id': 'x2iuewm',
'ext': 'mp4',
- 'uploader': 'Amphora Alex and Van .',
- 'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
+ 'uploader': 'IGN',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
}
},
# Vevo video
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
new file mode 100644
index 000000000..7173371ee
--- /dev/null
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ eagleplatform:(?P<custom_host>[^/]+):|
+ https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://lenta.ru/news/2015/03/06/navalny/
+ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+ 'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # http://muz-tv.ru/play/7129/
+ # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+ 'url': 'eagleplatform:media.clipyou.ru:12820',
+ 'md5': '6c2ebeab03b739597ce8d86339d5a905',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ }]
+
+ def _handle_error(self, response):
+ status = int_or_none(response.get('status', 200))
+ if status != 200:
+ raise ExtractorError(' '.join(response['errors']), expected=True)
+
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+ self._handle_error(response)
+ return response
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+ player_data = self._download_json(
+ 'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+
+ media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+ title = media['title']
+ description = media.get('description')
+ thumbnail = media.get('snapshot')
+ duration = int_or_none(media.get('duration'))
+ view_count = int_or_none(media.get('views'))
+
+ age_restriction = media.get('age_restriction')
+ age_limit = None
+ if age_restriction:
+ age_limit = 0 if age_restriction == 'allow_all' else 18
+
+ m3u8_data = self._download_json(
+ media['sources']['secure_m3u8']['auto'],
+ video_id, 'Downloading m3u8 JSON')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_data['data'][0], video_id,
+ 'mp4', entry_protocol='m3u8_native')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py
index fb5dbbe2b..0b61ea0ba 100644
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import json
import random
-import re
from .common import InfoExtractor
from ..compat import (
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- json_like = self._search_regex(
- r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
- data = json.loads(json_like)
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
duration = data['duration']
avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
entries = []
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
new file mode 100644
index 000000000..2b4691ae8
--- /dev/null
+++ b/youtube_dl/extractor/footyroom.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FootyRoomIE(InfoExtractor):
+ _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+ 'info_dict': {
+ 'id': 'schalke-04-0-2-real-madrid-2015-02',
+ 'title': 'Schalke 04 0 – 2 Real Madrid',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+ playlist_id)
+
+ playlist_title = self._og_search_title(webpage)
+
+ entries = []
+ for video in playlist:
+ payload = video.get('payload')
+ if not payload:
+ continue
+ playwire_url = self._search_regex(
+ r'data-config="([^"]+)"', payload,
+ 'playwire url', default=None)
+ if playwire_url:
+ entries.append(self.url_result(playwire_url, 'Playwire'))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index a49fc1151..dd87257c4 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
bitrates.sort()
formats = []
-
for bitrate in bitrates:
for link in links:
formats.append({
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
'vbr': bitrate,
})
+ subtitles = {}
+ for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
+ subtitles[src_lang] = [{
+ 'ext': src.split('/')[-1],
+ 'url': 'http://www.funnyordie.com%s' % src,
+ }]
+
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
'description': post.get('description'),
'thumbnail': post.get('picture'),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
new file mode 100644
index 000000000..ea32b621c
--- /dev/null
+++ b/youtube_dl/extractor/gazeta.py
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+ _TESTS = [{
+ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+ 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+ 'info_dict': {
+ 'id': '205566',
+ 'ext': 'mp4',
+ 'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+ 'description': 'md5:38617526050bd17b234728e7f9620a71',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ display_id = mobj.group('id')
+ embed_url = '%s?p=embed' % mobj.group('url')
+ embed_page = self._download_webpage(
+ embed_url, display_id, 'Downloading embed page')
+
+ video_id = self._search_regex(
+ r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+ return self.url_result(
+ 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 5dc53685c..4e6927b08 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -570,6 +570,45 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15',
},
},
+ # Eagle.Platform embed (generic URL)
+ {
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ },
+ # ClipYou (Eagle.Platform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ },
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -1155,6 +1194,24 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+ # Look for Eagle.Platform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'EaglePlatform')
+
+ # Look for ClipYou (uses Eagle.Platform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Pladform')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index 29638a194..8a95793ca 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
- _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
+ _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
_VIDEOID_REGEXES = [
r'\bdata-video-id="(\d+)"',
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index 8094cc2e4..d0720ff56 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
- 'id': '5182',
+ 'id': '114765',
'ext': 'mp4',
- 'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
- 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+ 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+ 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
},
}
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
- xml_link = self._html_search_regex(
- r'<param name="flashvars" value="config=(.*?)" />',
+ title = self._html_search_meta('name', webpage)
+ config_url = self._html_search_regex(
+ r'data-src="(/contenu/medias/video.php.*?)"',
webpage, 'config URL')
+ config_url = 'http://www.jeuxvideo.com' + config_url
video_id = self._search_regex(
- r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
- xml_link, 'video ID')
+ r'id=(\d+)',
+ config_url, 'video ID')
- config = self._download_xml(
- xml_link, title, 'Downloading XML config')
- info_json = config.find('format.json').text
- info = json.loads(info_json)['versions'][0]
+ config = self._download_json(
+ config_url, title, 'Downloading JSON config')
- video_url = 'http://video720.jeuxvideo.com/' + info['file']
+ formats = [{
+ 'url': source['file'],
+ 'format_id': source['label'],
+ 'resolution': source['label'],
+ } for source in reversed(config['sources'])]
return {
'id': video_id,
- 'title': config.find('titre_video').text,
- 'ext': 'mp4',
- 'url': video_url,
+ 'title': title,
+ 'formats': formats,
'description': self._og_search_description(webpage),
- 'thumbnail': config.find('image').text,
+ 'thumbnail': config.get('image'),
}
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py
new file mode 100644
index 000000000..2bb078036
--- /dev/null
+++ b/youtube_dl/extractor/kanalplay.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+ IE_DESC = 'Kanal 5/9/11 Play'
+ _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+ 'info_dict': {
+ 'id': '3270012277',
+ 'ext': 'flv',
+ 'title': 'Saknar både dusch och avlopp',
+ 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+ 'duration': 2636.36,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+ 'only_matching': True,
+ }]
+
+ def _fix_subtitles(self, subs):
+ return '\r\n\r\n'.join(
+ '%s\r\n%s --> %s\r\n%s'
+ % (
+ num,
+ self._subtitles_timecode(item['startMillis'] / 1000.0),
+ self._subtitles_timecode(item['endMillis'] / 1000.0),
+ item['text'],
+ ) for num, item in enumerate(subs, 1))
+
+ def _get_subtitles(self, channel_id, video_id):
+ subs = self._download_json(
+ 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+ video_id, 'Downloading subtitles JSON', fatal=False)
+ return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+
+ video = self._download_json(
+ 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+ video_id)
+
+ reasons_for_no_streams = video.get('reasonsForNoStreams')
+ if reasons_for_no_streams:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+ expected=True)
+
+ title = video['title']
+ description = video.get('description')
+ duration = float_or_none(video.get('length'), 1000)
+ thumbnail = video.get('posterUrl')
+
+ stream_base_url = video['streamBaseUrl']
+
+ formats = [{
+ 'url': stream_base_url,
+ 'play_path': stream['source'],
+ 'ext': 'flv',
+ 'tbr': float_or_none(stream.get('bitrate'), 1000),
+ 'rtmp_real_time': True,
+ } for stream in video['streams']]
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if video.get('hasSubtitle'):
+ subtitles = self.extract_subtitles(channel_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 85eee141b..1484ac0d2 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
play_json_req = compat_urllib_request.Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
- play_json_req.add_header(
- 'Ytdl-request-proxy',
- self._downloader.params.get('cn_verification_proxy'))
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
play_json = self._download_json(
play_json_req,
- media_id, 'playJson data')
+ media_id, 'Downloading playJson data')
# Check for errors
playstatus = play_json['playstatus']
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 3642089f7..2467f8bdd 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
import re
import json
+import itertools
from .common import InfoExtractor
from ..compat import (
@@ -41,6 +42,13 @@ class LivestreamIE(InfoExtractor):
},
'playlist_mincount': 4,
}, {
+ 'url': 'http://new.livestream.com/chess24/tatasteelchess',
+ 'info_dict': {
+ 'title': 'Tata Steel Chess',
+ 'id': '3705884',
+ },
+ 'playlist_mincount': 60,
+ }, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
}]
@@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor):
'view_count': video_data.get('views'),
}
+ def _extract_event(self, info):
+ event_id = compat_str(info['id'])
+ account = compat_str(info['owner_account_id'])
+ root_url = (
+ 'https://new.livestream.com/api/accounts/{account}/events/{event}/'
+ 'feed.json'.format(account=account, event=event_id))
+
+ def _extract_videos():
+ last_video = None
+ for i in itertools.count(1):
+ if last_video is None:
+ info_url = root_url
+ else:
+ info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+ root=root_url, id=last_video)
+ videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
+ videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+ if not videos_info:
+ break
+ for v in videos_info:
+ yield self._extract_video_info(v)
+ last_video = videos_info[-1]['id']
+ return self.playlist_result(_extract_videos(), event_id, info['full_name'])
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor):
result = result and compat_str(vdata['data']['id']) == vid
return result
- videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data']
- if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
- return self.playlist_result(
- videos, '%s' % info['id'], info['full_name'])
+ return self._extract_event(info)
else:
+ videos = [self._extract_video_info(video_data['data'])
+ for video_data in info['feed']['data']
+ if is_relevant(video_data, video_id)]
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
return videos[0]
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 4c1890416..7fb4e57df 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -41,7 +41,7 @@ class NiconicoIE(InfoExtractor):
},
}
- _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
# Determine whether the downloader used authentication to download video
_AUTHENTICATED = False
@@ -76,8 +76,7 @@ class NiconicoIE(InfoExtractor):
return True
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 9c01eb0af..557dffa46 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
if streams:
for stream in streams:
stream_type = stream.get('type').lower()
- if stream_type == 'ss':
+ # smooth streaming is not supported
+ if stream_type in ['ss', 'ms']:
continue
stream_info = self._download_json(
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
@@ -242,6 +243,7 @@ class NPOLiveIE(NPOBaseIE):
else:
formats.append({
'url': stream_url,
+ 'preference': -10,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 1e4cfa2e7..bff36f9d3 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -149,9 +149,6 @@ class NRKTVIE(InfoExtractor):
}
]
- def _seconds2str(self, s):
- return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
-
def _debug_print(self, txt):
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
@@ -168,8 +165,8 @@ class NRKTVIE(InfoExtractor):
for pos, p in enumerate(ps):
begin = parse_duration(p.get('begin'))
duration = parse_duration(p.get('dur'))
- starttime = self._seconds2str(begin)
- endtime = self._seconds2str(begin + duration)
+ starttime = self._subtitles_timecode(begin)
+ endtime = self._subtitles_timecode(begin + duration)
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 4e293392b..ca1a5bb3c 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -11,6 +11,11 @@ from ..utils import (
HEADRequest,
unified_strdate,
ExtractorError,
+ strip_jsonp,
+ int_or_none,
+ float_or_none,
+ determine_ext,
+ remove_end,
)
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
'description': data['subtitle'],
'entries': entries
}
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2267952',
+ 'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
+ 'info_dict': {
+ 'id': '339775',
+ 'ext': 'flv',
+ 'title': 'Kreml-Kritiker Nawalny wieder frei',
+ 'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
+ 'duration': 84.729,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150306',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
new file mode 100644
index 000000000..abde34b94
--- /dev/null
+++ b/youtube_dl/extractor/pladform.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_text,
+ qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ out\.pladform\.ru/player|
+ static\.pladform\.ru/player\.swf
+ )
+ \?.*\bvideoid=|
+ video\.pladform\.ru/catalog/video/videoid/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://muz-tv.ru/kinozal/view/7400/
+ 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+ 'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(
+ 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+ video_id)
+
+ if video.tag == 'error':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, video.text),
+ expected=True)
+
+ quality = qualities(('ld', 'sd', 'hd'))
+
+ formats = [{
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ } for src in video.findall('./src')]
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+ video_id)
+
+ title = self._og_search_title(webpage, fatal=False) or xpath_text(
+ video, './/title', 'title', fatal=True)
+ description = self._search_regex(
+ r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+ video, './/cover', 'cover')
+
+ duration = int_or_none(xpath_text(video, './/time', 'duration'))
+ age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
new file mode 100644
index 000000000..01cc3d9ea
--- /dev/null
+++ b/youtube_dl/extractor/primesharetv.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+ _TEST = {
+ 'url': 'http://primeshare.tv/download/238790B611',
+ 'md5': 'b92d9bf5461137c36228009f31533fbc',
+ 'info_dict': {
+ 'id': '238790B611',
+ 'ext': 'mp4',
+ 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>File not exist<' in webpage:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ headers = {
+ 'Referer': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ }
+
+ wait_time = int(self._search_regex(
+ r'var\s+cWaitTime\s*=\s*(\d+)',
+ webpage, 'wait time', default=7)) + 1
+ self._sleep(wait_time, video_id)
+
+ req = compat_urllib_request.Request(
+ url, compat_urllib_parse.urlencode(fields), headers)
+ video_page = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ video_url = self._search_regex(
+ r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+ video_page, 'video url')
+
+ title = self._html_search_regex(
+ r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+ video_page, 'title')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': 'mp4',
+ }
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index b42442d12..13f071077 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -127,6 +127,47 @@ class RTVEALaCartaIE(InfoExtractor):
for s in subs)
+class RTVEInfantilIE(InfoExtractor):
+ IE_NAME = 'rtve.es:infantil'
+ IE_DESC = 'RTVE infantil'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+ 'md5': '915319587b33720b8e0357caaa6617e6',
+ 'info_dict': {
+ 'id': '3040283',
+ 'ext': 'mp4',
+ 'title': 'Maneras de vivir',
+ 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+ 'duration': 357.958,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+ video_id)['page']['items'][0]
+
+ webpage = self._download_webpage(url, video_id)
+ vidplayer_id = self._search_regex(
+ r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+ png = self._download_webpage(png_url, video_id, 'Downloading url information')
+ video_url = _decrypt_url(png)
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'title': info['title'],
+ 'url': video_url,
+ 'thumbnail': info.get('image'),
+ 'duration': float_or_none(info.get('duration'), scale=1000),
+ }
+
+
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py
new file mode 100644
index 000000000..13101c714
--- /dev/null
+++ b/youtube_dl/extractor/ssa.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+)
+
+
+class SSAIE(InfoExtractor):
+ _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://ssa.nls.uk/film/3561',
+ 'info_dict': {
+ 'id': '3561',
+ 'ext': 'flv',
+ 'title': 'SHETLAND WOOL',
+ 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+ 'duration': 900,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ streamer = self._search_regex(
+ r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
+ play_path = self._search_regex(
+ r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+
+ def search_field(field_name, fatal=False):
+ return self._search_regex(
+ r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+ webpage, 'title', fatal=fatal)
+
+ title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+ description = unescapeHTML(search_field('Description'))
+ duration = parse_duration(search_field('Running time'))
+ thumbnail = self._search_regex(
+ r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': streamer,
+ 'play_path': play_path,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 5793dbc10..7cb06f351 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
embed = self._download_webpage(
embed_url, video_id, 'Downloading embed page')
- encoded_data = self._search_regex(
- r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+ player_data = self._parse_json(self._search_regex(
+ r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
data = self._parse_json(
- base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+ base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 8af136147..cbdaf9c7a 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -85,6 +85,14 @@ class TwitchBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['preference'] = 10
+ except StopIteration:
+ pass # No Source stream present
+ self._sort_formats(formats)
+
class TwitchItemBaseIE(TwitchBaseIE):
def _download_info(self, item, item_id):
@@ -209,6 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'%s/vod/%s?nauth=%s&nauthsig=%s'
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
item_id, 'mp4')
+ self._prefer_source(formats)
info['formats'] = formats
return info
@@ -349,21 +358,14 @@ class TwitchStreamIE(TwitchBaseIE):
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'segment_preference': '4',
- 'sig': access_token['sig'],
- 'token': access_token['token'],
+ 'sig': access_token['sig'].encode('utf-8'),
+ 'token': access_token['token'].encode('utf-8'),
}
-
formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8?%s'
- % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+ % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
channel_id, 'mp4')
-
- # prefer the 'source' stream, the others are limited to 30 fps
- def _sort_source(f):
- if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
- return 1
- return 0
- formats = sorted(formats, key=_sort_source)
+ self._prefer_source(formats)
view_count = stream.get('viewers')
timestamp = parse_iso8601(stream.get('created_at'))
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 5c89824c1..bd953fb4c 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+ video_url = self._html_search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default='')
@@ -44,13 +41,10 @@ class VidmeIE(InfoExtractor):
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
- r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+ r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
- webpage, 'comment count', fatal=False))
return {
'id': video_id,
@@ -64,5 +58,4 @@ class VidmeIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'like_count': like_count,
- 'comment_count': comment_count,
}
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
new file mode 100644
index 000000000..1742e66f4
--- /dev/null
+++ b/youtube_dl/extractor/viewster.py
@@ -0,0 +1,129 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+
+
+class ViewsterIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+ _TESTS = [{
+ # movielink, paymethod=fre
+ 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
+ 'playlist': [{
+ 'md5': '8f9d94b282d80c42b378dffdbb11caf3',
+ 'info_dict': {
+ 'id': '1293-19341-000-movie',
+ 'ext': 'flv',
+ 'title': "'Hout' (Wood) - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1293-19341-000',
+ 'title': "'Hout' (Wood)",
+ 'description': 'md5:925733185a9242ef96f436937683f33b',
+ }
+ }, {
+ # movielink, paymethod=adv
+ 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
+ 'playlist': [{
+ 'md5': '77a005453ca7396cbe3d35c9bea30aef',
+ 'info_dict': {
+ 'id': '1140-11855-000-movie',
+ 'ext': 'flv',
+ 'title': "THE LISTENING PROJECT - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1140-11855-000',
+ 'title': "THE LISTENING PROJECT",
+ 'description': 'md5:714421ae9957e112e672551094bf3b08',
+ }
+ }, {
+ # direct links, no movielink
+ 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
+ 'playlist': [{
+ 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
+ 'info_dict': {
+ 'id': '1198-56411-000-trailer',
+ 'ext': 'mp4',
+ 'title': "Sinister - Trailer",
+ },
+ }, {
+ 'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
+ 'info_dict': {
+ 'id': '1198-56411-000-behind-scenes',
+ 'ext': 'mp4',
+ 'title': "Sinister - Behind Scenes",
+ },
+ }, {
+ 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
+ 'info_dict': {
+ 'id': '1198-56411-000-scene-from-movie',
+ 'ext': 'mp4',
+ 'title': "Sinister - Scene from movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1198-56411-000',
+ 'title': "Sinister",
+ 'description': 'md5:014c40b0488848de9683566a42e33372',
+ }
+ }]
+
+ _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie = self._download_json(
+ request, video_id, 'Downloading movie metadata JSON')
+
+ title = movie.get('title') or movie['original_title']
+ description = movie.get('synopsis')
+ thumbnail = movie.get('large_artwork') or movie.get('artwork')
+
+ entries = []
+ for clip in movie['play_list']:
+ entry = None
+
+ # movielink api
+ link_request = clip.get('link_request')
+ if link_request:
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s&currency=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
+ % link_request)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie_link = self._download_json(
+ request, video_id, 'Downloading movie link JSON', fatal=False)
+
+ if movie_link:
+ formats = self._extract_f4m_formats(
+ movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
+ self._sort_formats(formats)
+ entry = {
+ 'formats': formats,
+ }
+
+ # direct link
+ clip_url = clip.get('clip_data', {}).get('url')
+ if clip_url:
+ entry = {
+ 'url': clip_url,
+ 'ext': 'mp4',
+ }
+
+ if entry:
+ entry.update({
+ 'id': '%s-%s' % (video_id, clip['canonical_title']),
+ 'title': '%s - %s' % (title, clip['title']),
+ })
+ entries.append(entry)
+
+ playlist = self.playlist_result(entries, video_id, title, description)
+ playlist['thumbnail'] = thumbnail
+ return playlist
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 8f540f578..bd09652cd 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import json
import re
import itertools
-import hashlib
from .common import InfoExtractor
from ..compat import (
@@ -20,6 +19,7 @@ from ..utils import (
RegexNotFoundError,
smuggle_url,
std_headers,
+ unified_strdate,
unsmuggle_url,
urlencode_postdata,
)
@@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
data = urlencode_postdata({
'email': username,
'password': password,
@@ -140,6 +140,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
+ 'upload_date': '20130927',
'duration': 187,
},
},
@@ -176,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
- data = compat_urllib_parse.urlencode({
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
+ data = urlencode_postdata({
'password': password,
'token': token,
})
- # I didn't manage to use the password with https
- if url.startswith('https'):
- pass_url = url.replace('https', 'http')
- else:
- pass_url = url
- password_request = compat_urllib_request.Request(pass_url + '/password', data)
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
return self._download_webpage(
@@ -223,12 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video_id = mobj.group('id')
orig_url = url
if mobj.group('pro') or mobj.group('player'):
- url = 'http://player.vimeo.com/video/' + video_id
-
- password = self._downloader.params.get('videopassword', None)
- if password:
- headers['Cookie'] = '%s_password=%s' % (
- video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+ url = 'https://player.vimeo.com/video/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
@@ -323,9 +317,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Extract upload date
video_upload_date = None
- mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
+ mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
if mobj is not None:
- video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+ video_upload_date = unified_strdate(mobj.group(1))
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -379,7 +373,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
for tt in text_tracks:
subtitles[tt['lang']] = [{
'ext': 'vtt',
- 'url': 'http://vimeo.com' + tt['url'],
+ 'url': 'https://vimeo.com' + tt['url'],
}]
return {
@@ -402,11 +396,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
class VimeoChannelIE(InfoExtractor):
IE_NAME = 'vimeo:channel'
- _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
_TESTS = [{
- 'url': 'http://vimeo.com/channels/tributes',
+ 'url': 'https://vimeo.com/channels/tributes',
'info_dict': {
'id': 'tributes',
'title': 'Vimeo Tributes',
@@ -435,10 +429,10 @@ class VimeoChannelIE(InfoExtractor):
name="([^"]+)"\s+
value="([^"]*)"
''', login_form))
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
fields['token'] = token
fields['password'] = password
- post = compat_urllib_parse.urlencode(fields)
+ post = urlencode_postdata(fields)
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
@@ -465,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
- entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
return {'_type': 'playlist',
'id': list_id,
@@ -476,15 +470,15 @@ class VimeoChannelIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
- return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+ return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user'
- _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+ _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{
- 'url': 'http://vimeo.com/nkistudio/videos',
+ 'url': 'https://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
'id': 'nkistudio',
@@ -495,15 +489,15 @@ class VimeoUserIE(VimeoChannelIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/%s' % name)
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
+ _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
- 'url': 'http://vimeo.com/album/2632481',
+ 'url': 'https://vimeo.com/album/2632481',
'info_dict': {
'id': '2632481',
'title': 'Staff Favorites: November 2013',
@@ -527,14 +521,14 @@ class VimeoAlbumIE(VimeoChannelIE):
def _real_extract(self, url):
album_id = self._match_id(url)
- return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+ return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
_TESTS = [{
- 'url': 'http://vimeo.com/groups/rolexawards',
+ 'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
'id': 'rolexawards',
'title': 'Rolex Awards for Enterprise',
@@ -548,13 +542,13 @@ class VimeoGroupsIE(VimeoAlbumIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
class VimeoReviewIE(InfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -566,7 +560,7 @@ class VimeoReviewIE(InfoExtractor):
}
}, {
'note': 'video player needs Referer',
- 'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
'info_dict': {
'id': '91613211',
@@ -588,11 +582,11 @@ class VimeoReviewIE(InfoExtractor):
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
IE_NAME = 'vimeo:watchlater'
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
- _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+ _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
_TESTS = [{
- 'url': 'http://vimeo.com/home/watchlater',
+ 'url': 'https://vimeo.com/home/watchlater',
'only_matching': True,
}]
@@ -612,7 +606,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
class VimeoLikesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
_TEST = {
@@ -640,8 +634,8 @@ class VimeoLikesIE(InfoExtractor):
description = self._html_search_meta('description', webpage)
def _get_page(idx):
- page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
- self.http_scheme(), user_id, idx + 1)
+ page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+ user_id, idx + 1)
webpage = self._download_webpage(
page_url, user_id,
note='Downloading page %d/%d' % (idx + 1, page_count))
diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py
index b294767c5..19f8762ae 100644
--- a/youtube_dl/extractor/yam.py
+++ b/youtube_dl/extractor/yam.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
from ..utils import (
float_or_none,
month_by_abbreviation,
+ ExtractorError,
)
@@ -28,23 +29,45 @@ class YamIE(InfoExtractor):
}
}, {
# An external video hosted on YouTube
- 'url': 'http://mymedia.yam.com/m/3598173',
- 'md5': '0238ceec479c654e8c2f1223755bf3e9',
+ 'url': 'http://mymedia.yam.com/m/3599430',
+ 'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
'info_dict': {
- 'id': 'pJ2Deys283c',
+ 'id': 'CNpEoQlrIgA',
'ext': 'mp4',
- 'upload_date': '20150202',
+ 'upload_date': '20150306',
'uploader': '新莊社大瑜伽社',
- 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+ 'description': 'md5:11e2e405311633ace874f2e6226c8b17',
'uploader_id': '2323agoy',
- 'title': '外婆的澎湖灣KTV-潘安邦',
- }
+ 'title': '20090412陽明山二子坪-1',
+ },
+ 'skip': 'Video does not exist',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3598173',
+ 'info_dict': {
+ 'id': '3598173',
+ 'ext': 'mp4',
+ },
+ 'skip': 'cause Yam system error',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3599437',
+ 'info_dict': {
+ 'id': '3599437',
+ 'ext': 'mp4',
+ },
+ 'skip': 'invalid YouTube URL',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
+ # Check for errors
+ system_msg = self._html_search_regex(
+ r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
+ default=None)
+ if system_msg:
+ raise ExtractorError(system_msg, expected=True)
+
# Is it hosted externally on YouTube?
youtube_url = self._html_search_regex(
r'<embed src="(http://www.youtube.com/[^"]+)"',
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
new file mode 100644
index 000000000..f4c0f5702
--- /dev/null
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -0,0 +1,127 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ def _get_track_url(self, storage_dir, track_id):
+ data = self._download_json(
+ 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
+ % storage_dir,
+ track_id, 'Downloading track location JSON')
+
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
+ storage = storage_dir.split('.')
+
+ return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
+ % (data['host'], key, data['ts'] + data['path'], storage[1]))
+
+ def _get_track_info(self, track):
+ return {
+ 'id': track['id'],
+ 'ext': 'mp3',
+ 'url': self._get_track_url(track['storageDir'], track['id']),
+ 'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ }
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+ 'filesize': 4628061,
+ 'duration': 193.04,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+
+ return self._get_track_info(track)
+
+
+class YandexMusicAlbumIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+ },
+ 'playlist_count': 50,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ album = self._download_json(
+ 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+ album_id, 'Downloading album JSON')
+
+ entries = [self._get_track_info(track) for track in album['volumes'][0]]
+
+ title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'Что слушают Enter Shikari',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 6,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
+ playlist_id)['pageData']['playlist']
+
+ entries = [self._get_track_info(track) for track in playlist['tracks']]
+
+ return self.playlist_result(
+ entries, compat_str(playlist_id),
+ playlist['title'], playlist.get('description'))
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3690f8021..27c8c4453 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1532,7 +1532,7 @@ class YoutubeSearchURLIE(InfoExtractor):
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
- r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
+ r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)