aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py4
-rw-r--r--youtube_dl/extractor/__init__.py12
-rw-r--r--youtube_dl/extractor/baidu.py55
-rw-r--r--youtube_dl/extractor/bbc.py37
-rw-r--r--youtube_dl/extractor/canalplus.py59
-rw-r--r--youtube_dl/extractor/cbsnews.py4
-rw-r--r--youtube_dl/extractor/ccc.py35
-rw-r--r--youtube_dl/extractor/common.py25
-rw-r--r--youtube_dl/extractor/discovery.py39
-rw-r--r--youtube_dl/extractor/dreisat.py59
-rw-r--r--youtube_dl/extractor/einthusan.py33
-rw-r--r--youtube_dl/extractor/espn.py37
-rw-r--r--youtube_dl/extractor/fox.py39
-rw-r--r--youtube_dl/extractor/ivi.py95
-rw-r--r--youtube_dl/extractor/mtv.py21
-rw-r--r--youtube_dl/extractor/nextmovie.py30
-rw-r--r--youtube_dl/extractor/nick.py63
-rw-r--r--youtube_dl/extractor/nowtv.py1
-rw-r--r--youtube_dl/extractor/ora.py75
-rw-r--r--youtube_dl/extractor/pandoratv.py78
-rw-r--r--youtube_dl/extractor/qqmusic.py35
-rw-r--r--youtube_dl/extractor/regiotv.py62
-rw-r--r--youtube_dl/extractor/revision3.py127
-rw-r--r--youtube_dl/extractor/ruutu.py5
-rw-r--r--youtube_dl/extractor/testtube.py90
-rw-r--r--youtube_dl/extractor/tlc.py23
-rw-r--r--youtube_dl/extractor/tvland.py64
-rw-r--r--youtube_dl/extractor/twentymin.py73
-rw-r--r--youtube_dl/extractor/udemy.py25
-rw-r--r--youtube_dl/extractor/videomore.py56
-rw-r--r--youtube_dl/extractor/yahoo.py34
-rw-r--r--youtube_dl/extractor/youku.py10
-rw-r--r--youtube_dl/extractor/zdf.py15
-rw-r--r--youtube_dl/utils.py31
-rw-r--r--youtube_dl/version.py2
35 files changed, 1080 insertions, 373 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 50425b8d7..3b2be3159 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1791,6 +1791,10 @@ class YoutubeDL(object):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '(unsupported) '
+ if fdict.get('language'):
+ if res:
+ res += ' '
+ res += '[%s]' % fdict['language']
if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index fe3c32874..19cea5e99 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -203,6 +203,7 @@ from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .fourtube import FourTubeIE
+from .fox import FOXIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
from .foxsports import FoxSportsIE
@@ -433,6 +434,7 @@ from .nextmedia import (
NextMediaActionNewsIE,
AppleDailyIE,
)
+from .nextmovie import NextMovieIE
from .nfb import NFBIE
from .nfl import NFLIE
from .nhl import (
@@ -440,6 +442,7 @@ from .nhl import (
NHLNewsIE,
NHLVideocenterIE,
)
+from .nick import NickIE
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninegag import NineGagIE
from .noco import NocoIE
@@ -489,12 +492,14 @@ from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
+from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
ORFIPTVIE,
)
+from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
@@ -549,8 +554,10 @@ from .rai import (
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
from .restudy import RestudyIE
from .reverbnation import ReverbNationIE
+from .revision3 import Revision3IE
from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
@@ -680,7 +687,6 @@ from .telemb import TeleMBIE
from .teletask import TeleTaskIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
-from .testtube import TestTubeIE
from .tf1 import TF1IE
from .theintercept import TheInterceptIE
from .theonion import TheOnionIE
@@ -692,7 +698,7 @@ from .thesixtyone import TheSixtyOneIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
-from .tlc import TlcIE, TlcDeIE
+from .tlc import TlcDeIE
from .tmz import (
TMZIE,
TMZArticleIE,
@@ -735,10 +741,12 @@ from .tvc import (
TVCArticleIE,
)
from .tvigle import TvigleIE
+from .tvland import TVLandIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
from .twentytwotracks import (
TwentyTwoTracksIE,
TwentyTwoTracksGenreIE
diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py
index e37ee4440..76b21e596 100644
--- a/youtube_dl/extractor/baidu.py
+++ b/youtube_dl/extractor/baidu.py
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..utils import unescapeHTML
class BaiduVideoIE(InfoExtractor):
@@ -14,8 +14,8 @@ class BaiduVideoIE(InfoExtractor):
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
'info_dict': {
'id': '1069',
- 'title': '中华小当家 TV版 (全52集)',
- 'description': 'md5:395a419e41215e531c857bb037bbaf80',
+ 'title': '中华小当家 TV版国语',
+ 'description': 'md5:51be07afe461cf99fa61231421b5397c',
},
'playlist_count': 52,
}, {
@@ -25,45 +25,32 @@ class BaiduVideoIE(InfoExtractor):
'title': 're:^奔跑吧兄弟',
'description': 'md5:1bf88bad6d850930f542d51547c089b8',
},
- 'playlist_mincount': 3,
+ 'playlist_mincount': 12,
}]
+ def _call_api(self, path, category, playlist_id, note):
+ return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
+ path, category, playlist_id), playlist_id, note)
+
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
- category = category2 = mobj.group('type')
+ category, playlist_id = re.match(self._VALID_URL, url).groups()
if category == 'show':
- category2 = 'tvshow'
-
- webpage = self._download_webpage(url, playlist_id)
-
- playlist_title = self._html_search_regex(
- r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
- 'playlist title', group='title')
- playlist_description = self._html_search_regex(
- r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
- playlist_id, 'playlist description')
+ category = 'tvshow'
+ if category == 'tv':
+ category = 'tvplay'
- site = self._html_search_regex(
- r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
- 'primary provider site')
- api_result = self._download_json(
- 'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
- category, category2, playlist_id, site),
- playlist_id, 'Get playlist links')
+ playlist_detail = self._call_api(
+ 'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
- entries = []
- for episode in api_result[0]['episodes']:
- episode_id = '%s_%s' % (playlist_id, episode['episode'])
+ playlist_title = playlist_detail['title']
+ playlist_description = unescapeHTML(playlist_detail.get('intro'))
- redirect_page = self._download_webpage(
- compat_urlparse.urljoin(url, episode['url']), episode_id,
- note='Download Baidu redirect page')
- real_url = self._html_search_regex(
- r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
+ episodes_detail = self._call_api(
+ 'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
- entries.append(self.url_result(
- real_url, video_title=episode['single_title']))
+ entries = [self.url_result(
+ episode['url'], video_title=episode['title']
+ ) for episode in episodes_detail['videos']]
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 923273fb2..ce99a34ab 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -23,7 +23,17 @@ class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pb][\da-z]{7}'
- _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?bbc\.co\.uk/
+ (?:
+ programmes/(?!articles/)|
+ iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
+ music/clips[/#]|
+ radio/player/
+ )
+ (?P<id>%s)
+ ''' % _ID_REGEX
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
@@ -114,14 +124,14 @@ class BBCCoUkIE(InfoExtractor):
},
'skip': 'Episode is no longer available on BBC iPlayer Radio',
}, {
- 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
+ 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
'note': 'Audio',
'info_dict': {
- 'id': 'p02frcch',
+ 'id': 'p022h44j',
'ext': 'flv',
- 'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
- 'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
- 'duration': 3507,
+ 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
+ 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
+ 'duration': 227,
},
'params': {
# rtmp download
@@ -172,13 +182,12 @@ class BBCCoUkIE(InfoExtractor):
}, {
# iptv-all mediaset fails with geolocation however there is no geo restriction
# for this programme at all
- 'url': 'http://www.bbc.co.uk/programmes/b06bp7lf',
+ 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
'info_dict': {
- 'id': 'b06bp7kf',
+ 'id': 'b06rkms3',
'ext': 'flv',
- 'title': "Annie Mac's Friday Night, B.Traits sits in for Annie",
- 'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.',
- 'duration': 10800,
+ 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
+ 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
},
'params': {
# rtmp download
@@ -193,6 +202,9 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
+ 'only_matching': True,
}
]
@@ -469,7 +481,8 @@ class BBCCoUkIE(InfoExtractor):
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
- title = self._og_search_title(webpage)
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
description = self._search_regex(
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
webpage, 'description', default=None)
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 004372f8d..25b2d4efe 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -10,13 +10,14 @@ from ..utils import (
unified_strdate,
url_basename,
qualities,
+ int_or_none,
)
class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
- _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
+ _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
_SITE_ID_MAP = {
'canalplus.fr': 'cplus',
'piwiplus.fr': 'teletoon',
@@ -26,10 +27,10 @@ class CanalplusIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
- 'md5': 'b3481d7ca972f61e37420798d0a9d934',
+ 'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
'info_dict': {
'id': '1263092',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Le Zapping - 13/05/15',
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
'upload_date': '20150513',
@@ -56,10 +57,10 @@ class CanalplusIE(InfoExtractor):
'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
- 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
+ 'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
'info_dict': {
'id': '1213714',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
'upload_date': '20150211',
@@ -82,15 +83,16 @@ class CanalplusIE(InfoExtractor):
webpage, 'video id', group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
- doc = self._download_xml(info_url, video_id, 'Downloading video XML')
+ video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
- video_info = [video for video in doc if video.find('ID').text == video_id][0]
- media = video_info.find('MEDIA')
- infos = video_info.find('INFOS')
+ if isinstance(video_data, list):
+ video_data = [video for video in video_data if video.get('ID') == video_id][0]
+ media = video_data['MEDIA']
+ infos = video_data['INFOS']
- preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
+ preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
- fmt_url = next(iter(media.find('VIDEOS'))).text
+ fmt_url = next(iter(media.get('VIDEOS')))
if '/geo' in fmt_url.lower():
response = self._request_webpage(
HEADRequest(fmt_url), video_id,
@@ -101,35 +103,42 @@ class CanalplusIE(InfoExtractor):
expected=True)
formats = []
- for fmt in media.find('VIDEOS'):
- format_url = fmt.text
+ for format_id, format_url in media['VIDEOS'].items():
if not format_url:
continue
- format_id = fmt.tag
if format_id == 'HLS':
formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', preference=preference(format_id)))
+ format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
elif format_id == 'HDS':
formats.extend(self._extract_f4m_formats(
- format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id)))
+ format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
- 'url': format_url,
+ # the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
+ 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
'format_id': format_id,
'preference': preference(format_id),
})
self._sort_formats(formats)
+ thumbnails = [{
+ 'id': image_id,
+ 'url': image_url,
+ } for image_id, image_url in media.get('images', {}).items()]
+
+ titrage = infos['TITRAGE']
+
return {
'id': video_id,
'display_id': display_id,
- 'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
- infos.find('TITRAGE/SOUS_TITRE').text),
- 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
- 'thumbnail': media.find('IMAGES/GRAND').text,
- 'description': infos.find('DESCRIPTION').text,
- 'view_count': int(infos.find('NB_VUES').text),
- 'like_count': int(infos.find('NB_LIKES').text),
- 'comment_count': int(infos.find('NB_COMMENTS').text),
+ 'title': '%s - %s' % (titrage['TITRE'],
+ titrage['SOUS_TITRE']),
+ 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')),
+ 'thumbnails': thumbnails,
+ 'description': infos.get('DESCRIPTION'),
+ 'duration': int_or_none(infos.get('DURATION')),
+ 'view_count': int_or_none(infos.get('NB_VUES')),
+ 'like_count': int_or_none(infos.get('NB_LIKES')),
+ 'comment_count': int_or_none(infos.get('NB_COMMENTS')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index f9a64a0a2..d211ec23b 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -5,6 +5,7 @@ import re
import json
from .common import InfoExtractor
+from ..utils import remove_start
class CBSNewsIE(InfoExtractor):
@@ -62,6 +63,7 @@ class CBSNewsIE(InfoExtractor):
uri = item.get('media' + format_id + 'URI')
if not uri:
continue
+ uri = remove_start(uri, '{manifest:none}')
fmt = {
'url': uri,
'format_id': format_id,
@@ -70,6 +72,8 @@ class CBSNewsIE(InfoExtractor):
play_path = re.sub(
r'{slistFilePath}', '',
uri.split('<break>')[-1].split('{break}')[-1])
+ play_path = re.sub(
+ r'{manifest:.+}.*$', '', play_path)
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + play_path,
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py
index 6924eac70..e94b1e35b 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ parse_duration,
qualities,
unified_strdate,
)
@@ -12,21 +13,25 @@ from ..utils import (
class CCCIE(InfoExtractor):
IE_NAME = 'media.ccc.de'
- _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
'info_dict': {
- 'id': '20131228183',
+ 'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
- 'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
+ 'description': 'md5:80be298773966f66d56cb11260b879af',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
- 'upload_date': '20131229',
+ 'upload_date': '20131228',
+ 'duration': 3660,
}
- }
+ }, {
+ 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -40,21 +45,25 @@ class CCCIE(InfoExtractor):
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
- r"(?s)<p class='description'>(.*?)</p>",
+ r"(?s)<h3>About</h3>(.+?)<h3>",
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
- r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
+ r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
+ duration = parse_duration(self._html_search_regex(
+ r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
+ webpage, 'duration', fatal=False, group='duration'))
matches = re.finditer(r'''(?xs)
- <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
+ <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
+ <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
- <a\s+href='(?P<torrent_url>[^']+\.torrent)'
+ <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = []
for m in matches:
@@ -62,12 +71,15 @@ class CCCIE(InfoExtractor):
format_id = self._search_regex(
r'.*/([a-z0-9_-]+)/[^/]*$',
m.group('http_url'), 'format id', default=None)
+ if format_id:
+ format_id = m.group('lang') + '-' + format_id
vcodec = 'h264' if 'h264' in format_id else (
'none' if format_id in ('mp3', 'opus') else None
)
formats.append({
'format_id': format_id,
'format': format,
+ 'language': m.group('lang'),
'url': m.group('http_url'),
'vcodec': vcodec,
'preference': preference(format_id),
@@ -95,5 +107,6 @@ class CCCIE(InfoExtractor):
'thumbnail': thumbnail,
'view_count': view_count,
'upload_date': upload_date,
+ 'duration': duration,
'formats': formats,
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 655207447..0719c7bcd 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -108,8 +108,9 @@ class InfoExtractor(object):
-2 or smaller for less than default.
< -1000 to hide the format (if there is
another one which is strictly better)
- * language_preference Is this in the correct requested
- language?
+ * language Language code, e.g. "de" or "en-US".
+ * language_preference Is this in the language mentioned in
+ the URL?
10 if it's what the URL is about,
-1 for default (don't know),
-10 otherwise, other values reserved for now.
@@ -200,6 +201,26 @@ class InfoExtractor(object):
end_time: Time in seconds where the reproduction should end, as
specified in the URL.
+ The following fields should only be used when the video belongs to some logical
+ chapter or section:
+
+ chapter: Name or title of the chapter the video belongs to.
+ chapter_number: Number of the chapter the video belongs to, as an integer.
+ chapter_id: Id of the chapter the video belongs to, as a unicode string.
+
+ The following fields should only be used when the video is an episode of some
+ series or programme:
+
+ series: Title of the series or programme the video episode belongs to.
+ season: Title of the season the video episode belongs to.
+ season_number: Number of the season the video episode belongs to, as an integer.
+ season_id: Id of the season the video episode belongs to, as a unicode string.
+ episode: Title of the video episode. Unlike mandatory video title field,
+ this field should denote the exact title of the video episode
+ without any kind of decoration.
+ episode_number: Number of the video episode within a season, as an integer.
+ episode_id: Id of the video episode, as a unicode string.
+
Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information.
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index d6723ecf2..ce680a9f3 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -9,7 +9,17 @@ from ..compat import compat_str
class DiscoveryIE(InfoExtractor):
- _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
+ _VALID_URL = r'''(?x)http://(?:www\.)?(?:
+ discovery|
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc|
+ velocity
+ )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''
_TESTS = [{
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'info_dict': {
@@ -21,8 +31,8 @@ class DiscoveryIE(InfoExtractor):
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
' back.'),
'duration': 156,
- 'timestamp': 1303099200,
- 'upload_date': '20110418',
+ 'timestamp': 1302032462,
+ 'upload_date': '20110405',
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -33,27 +43,38 @@ class DiscoveryIE(InfoExtractor):
'id': 'mythbusters-the-simpsons',
'title': 'MythBusters: The Simpsons',
},
- 'playlist_count': 9,
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
+ 'info_dict': {
+ 'id': '78326',
+ 'ext': 'mp4',
+ 'title': 'Longfin Eels: Maneaters?',
+ 'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
+ 'upload_date': '20140725',
+ 'timestamp': 1406246400,
+ 'duration': 116,
+ },
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- info = self._download_json(url + '?flat=1', video_id)
+ display_id = self._match_id(url)
+ info = self._download_json(url + '?flat=1', display_id)
video_title = info.get('playlist_title') or info.get('video_title')
entries = [{
'id': compat_str(video_info['id']),
'formats': self._extract_m3u8_formats(
- video_info['src'], video_id, ext='mp4',
+ video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
note='Download m3u8 information for video %d' % (idx + 1)),
'title': video_info['title'],
'description': video_info.get('description'),
'duration': parse_duration(video_info.get('video_length')),
- 'webpage_url': video_info.get('href'),
+ 'webpage_url': video_info.get('href') or video_info.get('url'),
'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')),
} for idx, video_info in enumerate(info['playlist'])]
- return self.playlist_result(entries, video_id, video_title)
+ return self.playlist_result(entries, display_id, video_title)
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 8ac8587be..028144f20 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unified_strdate,
-)
+from .zdf import ZDFIE
-class DreiSatIE(InfoExtractor):
+class DreiSatIE(ZDFIE):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TESTS = [
@@ -35,53 +31,4 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
- details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
-
- status_code = details_doc.find('./status/statuscode')
- if status_code is not None and status_code.text != 'ok':
- code = status_code.text
- if code == 'notVisibleAnymore':
- message = 'Video %s is not available' % video_id
- else:
- message = '%s returned error: %s' % (self.IE_NAME, code)
- raise ExtractorError(message, expected=True)
-
- thumbnail_els = details_doc.findall('.//teaserimage')
- thumbnails = [{
- 'width': int(te.attrib['key'].partition('x')[0]),
- 'height': int(te.attrib['key'].partition('x')[2]),
- 'url': te.text,
- } for te in thumbnail_els]
-
- information_el = details_doc.find('.//information')
- video_title = information_el.find('./title').text
- video_description = information_el.find('./detail').text
-
- details_el = details_doc.find('.//details')
- video_uploader = details_el.find('./channel').text
- upload_date = unified_strdate(details_el.find('./airtime').text)
-
- format_els = details_doc.findall('.//formitaet')
- formats = [{
- 'format_id': fe.attrib['basetype'],
- 'width': int(fe.find('./width').text),
- 'height': int(fe.find('./height').text),
- 'url': fe.find('./url').text,
- 'filesize': int(fe.find('./filesize').text),
- 'video_bitrate': int(fe.find('./videoBitrate').text),
- } for fe in format_els
- if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
-
- self._sort_formats(formats)
-
- return {
- '_type': 'video',
- 'id': video_id,
- 'title': video_title,
- 'formats': formats,
- 'description': video_description,
- 'thumbnails': thumbnails,
- 'thumbnail': thumbnails[-1]['url'],
- 'uploader': video_uploader,
- 'upload_date': upload_date,
- }
+ return self.extract_from_xml_url(video_id, details_url)
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index 5dfea0d39..f7339702c 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -1,9 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ remove_start,
+ sanitized_Request,
+)
class EinthusanIE(InfoExtractor):
@@ -34,27 +37,33 @@ class EinthusanIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
+ video_id = self._match_id(url)
+
+ request = sanitized_Request(url)
+ request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
+ webpage = self._download_webpage(request, video_id)
+
+ title = self._html_search_regex(
+ r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
+ webpage, 'title')
- video_title = self._html_search_regex(
- r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
+ video_id = self._search_regex(
+ r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
- video_url = self._html_search_regex(
- r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
- webpage, 'video url')
+ video_url = self._download_webpage(
+ 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
+ % video_id, video_id)
description = self._html_search_meta('description', webpage)
thumbnail = self._html_search_regex(
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
webpage, "thumbnail url", fatal=False)
if thumbnail is not None:
- thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
+ thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
return {
'id': video_id,
- 'title': video_title,
+ 'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py
index e6f8f0337..3762d8748 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -1,18 +1,30 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import remove_end
class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
- _WORKING = False
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
- 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
- 'description': '',
+ 'title': '30 for 30 Shorts: Judging Jewell',
+ 'description': None,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
+ 'url': 'http://espn.go.com/video/clip?id=2743663',
+ 'info_dict': {
+ 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
+ 'ext': 'mp4',
+ 'title': 'Must-See Moments: Best of the MLS season',
},
'params': {
# m3u8 download
@@ -44,12 +56,23 @@ class ESPNIE(InfoExtractor):
r'class="video-play-button"[^>]+data-id="(\d+)',
webpage, 'video id')
+ cms = 'espn'
+ if 'data-source="intl"' in webpage:
+ cms = 'intl'
+ player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
- 'https://espn.go.com/video/iframe/twitter/?id=%s' % video_id, video_id)
+ player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
- return self.url_result(
- 'ooyalaexternal:espn:%s:%s' % (video_id, pcode),
- 'OoyalaExternal')
+ title = remove_end(
+ self._og_search_title(webpage),
+ '- ESPN Video').strip()
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
+ 'ie_key': 'OoyalaExternal',
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py
new file mode 100644
index 000000000..ab97b3196
--- /dev/null
+++ b/youtube_dl/extractor/fox.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class FOXIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.fox.com/watch/255180355939/7684182528',
+ 'info_dict': {
+ 'id': '255180355939',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer: Gotham',
+ 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
+ 'duration': 129,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ release_url = self._parse_json(self._search_regex(
+ r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
+ video_id)['release_url'] + '&manifest=m3u'
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(release_url, {'force_smil_url': True}),
+ 'id': video_id,
+ }
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 029878d24..d0f00cdea 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -7,6 +7,7 @@ import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ int_or_none,
sanitized_Request,
)
@@ -27,7 +28,7 @@ class IviIE(InfoExtractor):
'title': 'Иван Васильевич меняет профессию',
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
'duration': 5498,
- 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
},
@@ -38,33 +39,25 @@ class IviIE(InfoExtractor):
'info_dict': {
'id': '9549',
'ext': 'mp4',
- 'title': 'Двое из ларца - Серия 1',
+ 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
+ 'series': 'Двое из ларца',
+ 'season': 'Сезон 1',
+ 'season_number': 1,
+ 'episode': 'Дело Гольдберга (1 часть)',
+ 'episode_number': 1,
'duration': 2655,
- 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
}
]
# Sorted by quality
- _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
-
- # Sorted by size
- _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
-
- def _extract_description(self, html):
- m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
- return m.group('description') if m is not None else None
-
- def _extract_comment_count(self, html):
- m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
- return int(m.group('commentcount')) if m is not None else 0
+ _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
def _real_extract(self, url):
video_id = self._match_id(url)
- api_url = 'http://api.digitalaccess.ru/api/json/'
-
data = {
'method': 'da.content.get',
'params': [
@@ -76,11 +69,10 @@ class IviIE(InfoExtractor):
]
}
- request = sanitized_Request(api_url, json.dumps(data))
-
- video_json_page = self._download_webpage(
+ request = sanitized_Request(
+ 'http://api.digitalaccess.ru/api/json/', json.dumps(data))
+ video_json = self._download_json(
request, video_id, 'Downloading video JSON')
- video_json = json.loads(video_json_page)
if 'error' in video_json:
error = video_json['error']
@@ -95,35 +87,51 @@ class IviIE(InfoExtractor):
formats = [{
'url': x['url'],
'format_id': x['content_format'],
- 'preference': self._known_formats.index(x['content_format']),
- } for x in result['files'] if x['content_format'] in self._known_formats]
+ 'preference': self._KNOWN_FORMATS.index(x['content_format']),
+ } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS]
self._sort_formats(formats)
- if not formats:
- raise ExtractorError('No media links available for %s' % video_id)
-
- duration = result['duration']
- compilation = result['compilation']
title = result['title']
+ duration = int_or_none(result.get('duration'))
+ compilation = result.get('compilation')
+ episode = title if compilation else None
+
title = '%s - %s' % (compilation, title) if compilation is not None else title
- previews = result['preview']
- previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
- thumbnail = previews[-1]['url'] if len(previews) > 0 else None
+ thumbnails = [{
+ 'url': preview['url'],
+ 'id': preview.get('content_format'),
+ } for preview in result.get('preview', []) if preview.get('url')]
+
+ webpage = self._download_webpage(url, video_id)
+
+ season = self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
+ webpage, 'season', default=None)
+ season_number = int_or_none(self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
+ webpage, 'season number', default=None))
+
+ episode_number = int_or_none(self._search_regex(
+ r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
+ webpage, 'episode number', default=None))
- video_page = self._download_webpage(url, video_id, 'Downloading video page')
- description = self._extract_description(video_page)
- comment_count = self._extract_comment_count(video_page)
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
return {
'id': video_id,
'title': title,
- 'thumbnail': thumbnail,
+ 'series': compilation,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
'description': description,
'duration': duration,
- 'comment_count': comment_count,
'formats': formats,
}
@@ -149,8 +157,11 @@ class IviCompilationIE(InfoExtractor):
}]
def _extract_entries(self, html, compilation_id):
- return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
- for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
+ return [
+ self.url_result(
+ 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
+ for serie in re.findall(
+ r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -158,7 +169,8 @@ class IviCompilationIE(InfoExtractor):
season_id = mobj.group('seasonid')
if season_id is not None: # Season link
- season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
+ season_page = self._download_webpage(
+ url, compilation_id, 'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
playlist_title = self._html_search_meta('title', season_page, 'title')
entries = self._extract_entries(season_page, compilation_id)
@@ -166,8 +178,9 @@ class IviCompilationIE(InfoExtractor):
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
playlist_id = compilation_id
playlist_title = self._html_search_meta('title', compilation_page, 'title')
- seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
- if len(seasons) == 0: # No seasons in this compilation
+ seasons = re.findall(
+ r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
+ if not seasons: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index d887583e6..e8bb527b8 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -167,14 +167,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
'description': description,
}
+ def _get_feed_query(self, uri):
+ data = {'uri': uri}
+ if self._LANG:
+ data['lang'] = self._LANG
+ return compat_urllib_parse.urlencode(data)
+
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
- data = compat_urllib_parse.urlencode({'uri': uri})
- info_url = feed_url + '?'
- if self._LANG:
- info_url += 'lang=%s&' % self._LANG
- info_url += data
+ info_url = feed_url + '?' + self._get_feed_query(uri)
return self._get_videos_info_from_url(info_url, video_id)
def _get_videos_info_from_url(self, url, video_id):
@@ -184,9 +186,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
- def _real_extract(self, url):
- title = url_basename(url)
- webpage = self._download_webpage(url, title)
+ def _extract_mgid(self, webpage):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -207,7 +207,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
+ return mgid
+ def _real_extract(self, url):
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ mgid = self._extract_mgid(webpage)
videos_info = self._get_videos_info(mgid)
return videos_info
diff --git a/youtube_dl/extractor/nextmovie.py b/youtube_dl/extractor/nextmovie.py
new file mode 100644
index 000000000..657ae77a0
--- /dev/null
+++ b/youtube_dl/extractor/nextmovie.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+from ..compat import compat_urllib_parse
+
+
+class NextMovieIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nextmovie.com'
+ _VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
+ _FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
+ _TESTS = [{
+ 'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
+ 'md5': '09a9199f2f11f10107d04fcb153218aa',
+ 'info_dict': {
+ 'id': '961726',
+ 'ext': 'mp4',
+ 'title': 'The Muppets\' Gravity',
+ },
+ }]
+
+ def _get_feed_query(self, uri):
+ return compat_urllib_parse.urlencode({
+ 'feed': '1505',
+ 'mgid': uri,
+ })
+
+ def _real_extract(self, url):
+ mgid = self._match_id(url)
+ return self._get_videos_info(mgid)
diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py
new file mode 100644
index 000000000..b62819ae5
--- /dev/null
+++ b/youtube_dl/extractor/nick.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+from ..compat import compat_urllib_parse
+
+
+class NickIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nick.com'
+ _VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
+ _TESTS = [{
+ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
+ 'playlist': [
+ {
+ 'md5': '6e5adc1e28253bbb1b28ab05403dd4d4',
+ 'info_dict': {
+ 'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+
+ }
+ },
+ {
+ 'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce',
+ 'info_dict': {
+ 'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+
+ }
+ },
+ {
+ 'md5': 'efffe1728a234b2b0d2f2b343dd1946f',
+ 'info_dict': {
+ 'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ }
+ },
+ {
+ 'md5': '1ec6690733ab9f41709e274a1d5c7556',
+ 'info_dict': {
+ 'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ }
+ },
+ ],
+ }]
+
+ def _get_feed_query(self, uri):
+ return compat_urllib_parse.urlencode({
+ 'feed': 'nick_arc_player_prime',
+ 'mgid': uri,
+ })
+
+ def _extract_mgid(self, webpage):
+ return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index fd107aca2..916a102bf 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -71,6 +71,7 @@ class NowTVBaseIE(InfoExtractor):
class NowTVIE(NowTVBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py
new file mode 100644
index 000000000..9c4255a2d
--- /dev/null
+++ b/youtube_dl/extractor/ora.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_attribute,
+ qualities,
+ unescapeHTML,
+)
+
+
+class OraTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)'
+ _TEST = {
+ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
+ 'md5': 'fa33717591c631ec93b04b0e330df786',
+ 'info_dict': {
+ 'id': '50178',
+ 'ext': 'mp4',
+ 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
+ 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
+ 'duration': 1477,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = self._search_regex(
+ r'"current"\s*:\s*({[^}]+?})', webpage, 'current video')
+ m3u8_url = self._search_regex(
+ r'"hls_stream"\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ # simular to GameSpotIE
+ m3u8_path = compat_urlparse.urlparse(m3u8_url).path
+ QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
+ available_qualities = self._search_regex(
+ QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',')
+ http_path = m3u8_path[1:].split('/', 1)[1]
+ http_template = re.sub(QUALITIES_RE, r'%s', http_path)
+ http_template = http_template.replace('.csmil/master.m3u8', '')
+ http_template = compat_urlparse.urljoin(
+ 'http://videocdn-pmd.ora.tv/', http_template)
+ preference = qualities(
+ ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080'])
+ for q in available_qualities:
+ formats.append({
+ 'url': http_template % q,
+ 'format_id': q,
+ 'preference': preference(q),
+ })
+ self._sort_formats(formats)
+ else:
+ return self.url_result(self._search_regex(
+ r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
+
+ return {
+ 'id': self._search_regex(
+ r'"video_id"\s*:\s*(\d+)', video_data, 'video id'),
+ 'display_id': display_id,
+ 'title': unescapeHTML(self._og_search_title(webpage)),
+ 'description': get_element_by_attribute(
+ 'class', 'video_txt_decription', webpage),
+ 'thumbnail': self._proto_relative_url(self._search_regex(
+ r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
+ 'duration': int(self._search_regex(
+ r'"duration"\s*:\s*(\d+)', video_data, 'duration')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py
new file mode 100644
index 000000000..8d49f5c4a
--- /dev/null
+++ b/youtube_dl/extractor/pandoratv.py
@@ -0,0 +1,78 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ parse_duration,
+ str_to_int,
+)
+
+
+class PandoraTVIE(InfoExtractor):
+ IE_NAME = 'pandora.tv'
+ IE_DESC = '판도라TV'
+ _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
+ _TEST = {
+ 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
+ 'info_dict': {
+ 'id': '53294230',
+ 'ext': 'flv',
+ 'title': '頭を撫でてくれる?',
+ 'description': '頭を撫でてくれる?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 39,
+ 'upload_date': '20151218',
+ 'uploader': 'カワイイ動物まとめ',
+ 'uploader_id': 'mikakim',
+ 'view_count': int,
+ 'like_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('prgid', [None])[0]
+ user_id = qs.get('ch_userid', [None])[0]
+ if any(not f for f in (video_id, user_id,)):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ data = self._download_json(
+ 'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'
+ % (user_id, video_id), video_id)
+
+ info = data['data']['rows']['vod_play_info']['result']
+
+ formats = []
+ for format_id, format_url in info.items():
+ if not format_url:
+ continue
+ height = self._search_regex(
+ r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
+ if not height:
+ continue
+ formats.append({
+ 'format_id': '%sp' % height,
+ 'url': format_url,
+ 'height': int(height),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['subject'],
+ 'description': info.get('body'),
+ 'thumbnail': info.get('thumbnail') or info.get('poster'),
+ 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
+ 'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
+ 'uploader': info.get('nickname'),
+ 'uploader_id': info.get('upload_userid'),
+ 'view_count': str_to_int(info.get('hit')),
+ 'like_count': str_to_int(info.get('likecnt')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
index 1ba3bbddf..45a3c41c5 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -11,6 +11,7 @@ from ..utils import (
strip_jsonp,
unescapeHTML,
clean_html,
+ ExtractorError,
)
@@ -177,7 +178,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
'info_dict': {
'id': '001BLpXF2DyJe2',
'title': '林俊杰',
- 'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
+ 'description': 'md5:870ec08f7d8547c29c93010899103751',
},
'playlist_count': 12,
}
@@ -272,7 +273,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
'url': 'http://y.qq.com/#type=toplist&p=top_3',
'info_dict': {
'id': 'top_3',
- 'title': 'QQ音乐巅峰榜·欧美',
+ 'title': '巅峰榜·欧美',
'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'
'计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'
'歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:'
@@ -315,7 +316,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_DESC = 'QQ音乐 - 歌单'
_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://y.qq.com/#type=taoge&id=3462654915',
'info_dict': {
'id': '3462654915',
@@ -323,7 +324,16 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
},
'playlist_count': 40,
- }
+ 'skip': 'playlist gone',
+ }, {
+ 'url': 'http://y.qq.com/#type=taoge&id=1374105607',
+ 'info_dict': {
+ 'id': '1374105607',
+ 'title': '易入人心的华语民谣',
+ 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。',
+ },
+ 'playlist_count': 20,
+ }]
def _real_extract(self, url):
list_id = self._match_id(url)
@@ -331,14 +341,21 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
list_json = self._download_json(
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
% list_id, list_id, 'Download list page',
- transform_source=strip_jsonp)['cdlist'][0]
-
+ transform_source=strip_jsonp)
+ if not len(list_json.get('cdlist', [])):
+ if list_json.get('code'):
+ raise ExtractorError(
+ 'QQ Music said: error %d in fetching playlist info' % list_json['code'],
+ expected=True)
+ raise ExtractorError('Unable to get playlist info')
+
+ cdlist = list_json['cdlist'][0]
entries = [
self.url_result(
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
- ) for song in list_json['songlist']
+ ) for song in cdlist['songlist']
]
- list_name = list_json.get('dissname')
- list_description = clean_html(unescapeHTML(list_json.get('desc')))
+ list_name = cdlist.get('dissname')
+ list_description = clean_html(unescapeHTML(cdlist.get('desc')))
return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py
new file mode 100644
index 000000000..e250a52f0
--- /dev/null
+++ b/youtube_dl/extractor/regiotv.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ sanitized_Request,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class RegioTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.regio-tv.de/video/395808.html',
+ 'info_dict': {
+ 'id': '395808',
+ 'ext': 'mp4',
+ 'title': 'Wir in Ludwigsburg',
+ 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!',
+ }
+ }, {
+ 'url': 'http://www.regio-tv.de/video/395808',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ key = self._search_regex(
+ r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key')
+ title = self._og_search_title(webpage)
+
+ SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>'
+
+ request = sanitized_Request(
+ 'http://v.telvi.de/',
+ SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
+ video_data = self._download_xml(request, video_id, 'Downloading video XML')
+
+ NS_MAP = {
+ 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
+ 'soap': 'http://schemas.xmlsoap.org/soap/envelope/',
+ }
+
+ video_url = xpath_text(
+ video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True)
+ thumbnail = xpath_text(
+ video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail')
+ description = self._og_search_description(
+ webpage) or self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py
new file mode 100644
index 000000000..b1b8800b9
--- /dev/null
+++ b/youtube_dl/extractor/revision3.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ unescapeHTML,
+ qualities,
+)
+
+
+class Revision3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
+ _TESTS = [{
+ 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
+ 'md5': 'd94a72d85d0a829766de4deb8daaf7df',
+ 'info_dict': {
+ 'id': '73034',
+ 'display_id': 'technobuffalo/5-google-predictions-for-2016',
+ 'ext': 'webm',
+ 'title': '5 Google Predictions for 2016',
+ 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
+ 'upload_date': '20151228',
+ 'timestamp': 1451325600,
+ 'duration': 187,
+ 'uploader': 'TechnoBuffalo',
+ 'uploader_id': 'technobuffalo',
+ }
+ }, {
+ 'url': 'http://testtube.com/brainstuff',
+ 'info_dict': {
+ 'id': '251',
+ 'title': 'BrainStuff',
+ 'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
+ },
+ 'playlist_mincount': 93,
+ }, {
+ 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
+ 'info_dict': {
+ 'id': '60163',
+ 'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
+ 'duration': 275,
+ 'ext': 'webm',
+ 'title': '5 Weird Ways Plants Can Eat Animals',
+ 'description': 'Why have some plants evolved to eat meat?',
+ 'upload_date': '20150120',
+ 'timestamp': 1421763300,
+ 'uploader': 'DNews',
+ 'uploader_id': 'dnews',
+ },
+ }]
+ _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
+ _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
+
+ def _real_extract(self, url):
+ domain, display_id = re.match(self._VALID_URL, url).groups()
+ page_info = self._download_json(
+ self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
+
+ if page_info['data']['type'] == 'episode':
+ episode_data = page_info['data']
+ video_id = compat_str(episode_data['video']['data']['id'])
+ video_data = self._download_json(
+ 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
+ video_id)['items'][0]
+
+ formats = []
+ for vcodec, media in video_data['media'].items():
+ for quality_id, quality in media.items():
+ if quality_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ quality['url'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': quality['url'],
+ 'format_id': '%s-%s' % (vcodec, quality_id),
+ 'tbr': int_or_none(quality.get('bitrate')),
+ 'vcodec': vcodec,
+ })
+ self._sort_formats(formats)
+
+ preference = qualities(['mini', 'small', 'medium', 'large'])
+ thumbnails = [{
+ 'url': image_url,
+ 'id': image_id,
+ 'preference': preference(image_id)
+ } for image_id, image_url in video_data.get('images', {}).items()]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': unescapeHTML(video_data['title']),
+ 'description': unescapeHTML(video_data.get('summary')),
+ 'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
+ 'author': episode_data.get('author'),
+ 'uploader': video_data.get('show', {}).get('name'),
+ 'uploader_id': video_data.get('show', {}).get('slug'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+ else:
+ show_data = page_info['show']['data']
+ episodes_data = page_info['episodes']['data']
+ num_episodes = page_info['meta']['totalEpisodes']
+ processed_episodes = 0
+ entries = []
+ page_num = 1
+ while True:
+ entries.extend([self.url_result(
+ 'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+ processed_episodes += len(episodes_data)
+ if processed_episodes == num_episodes:
+ break
+ page_num += 1
+ episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
+ domain, display_id + '/' + compat_str(page_num), domain),
+ display_id)['episodes']['data']
+
+ return self.playlist_result(
+ entries, compat_str(show_data['id']),
+ show_data.get('name'), show_data.get('summary'))
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index 41fddc375..ffea438cc 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -75,9 +75,12 @@ class RuutuIE(InfoExtractor):
preference = -1 if proto == 'rtmp' else 1
label = child.get('label')
tbr = int_or_none(child.get('bitrate'))
+ format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
formats.append({
- 'format_id': '%s-%s' % (proto, label if label else tbr),
+ 'format_id': format_id,
'url': video_url,
'width': width,
'height': height,
diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py
deleted file mode 100644
index 26655d690..000000000
--- a/youtube_dl/extractor/testtube.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- qualities,
-)
-
-
-class TestTubeIE(InfoExtractor):
- _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
- _TESTS = [{
- 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
- 'info_dict': {
- 'id': '60163',
- 'display_id': '5-weird-ways-plants-can-eat-animals',
- 'duration': 275,
- 'ext': 'webm',
- 'title': '5 Weird Ways Plants Can Eat Animals',
- 'description': 'Why have some plants evolved to eat meat?',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'DNews',
- 'uploader_id': 'dnews',
- },
- }, {
- 'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
- 'info_dict': {
- 'id': 'fAGfJ4YjVus',
- 'ext': 'mp4',
- 'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
- 'uploader': 'Science Channel',
- 'uploader_id': 'ScienceChannel',
- 'upload_date': '20150203',
- 'description': 'md5:e61374030015bae1d2e22f096d4769d6',
- }
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
- webpage, 'youtube iframe', default=None)
- if youtube_url:
- return self.url_result(youtube_url, 'Youtube', video_id=display_id)
-
- video_id = self._search_regex(
- r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
- webpage, 'video ID')
-
- all_info = self._download_json(
- 'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
- video_id)
- info = all_info['items'][0]
-
- formats = []
- for vcodec, fdatas in info['media'].items():
- for name, fdata in fdatas.items():
- formats.append({
- 'format_id': '%s-%s' % (vcodec, name),
- 'url': fdata['url'],
- 'vcodec': vcodec,
- 'tbr': fdata.get('bitrate'),
- })
- self._sort_formats(formats)
-
- duration = int_or_none(info.get('duration'))
- images = info.get('images')
- thumbnails = None
- preference = qualities(['mini', 'small', 'medium', 'large'])
- if images:
- thumbnails = [{
- 'id': thumbnail_id,
- 'url': img_url,
- 'preference': preference(thumbnail_id)
- } for thumbnail_id, img_url in images.items()]
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': info['title'],
- 'description': info.get('summary'),
- 'thumbnails': thumbnails,
- 'uploader': info.get('show', {}).get('name'),
- 'uploader_id': info.get('show', {}).get('slug'),
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
index d6d038a8d..adc05ed5f 100644
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -4,32 +4,9 @@ import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
-from .discovery import DiscoveryIE
from ..compat import compat_urlparse
-class TlcIE(DiscoveryIE):
- IE_NAME = 'tlc.com'
- _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
-
- # DiscoveryIE has _TESTS
- _TESTS = [{
- 'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
- 'info_dict': {
- 'id': '104493',
- 'ext': 'mp4',
- 'title': 'Too Big to Fly',
- 'description': 'Buddy has taken on a high flying task.',
- 'duration': 119,
- 'timestamp': 1393365060,
- 'upload_date': '20140225',
- },
- 'params': {
- 'skip_download': True, # requires ffmpef
- },
- }]
-
-
class TlcDeIE(InfoExtractor):
IE_NAME = 'tlc.de'
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py
new file mode 100644
index 000000000..b73279dec
--- /dev/null
+++ b/youtube_dl/extractor/tvland.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class TVLandIE(MTVServicesInfoExtractor):
+ IE_NAME = 'tvland.com'
+ _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
+ _TESTS = [{
+ 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
+ 'playlist': [
+ {
+ 'md5': '227e9723b9669c05bf51098b10287aa7',
+ 'info_dict': {
+ 'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd',
+ 'ext': 'mp4',
+ 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5',
+ }
+ },
+ {
+ 'md5': '9fa2b764ec0e8194fb3ebb01a83df88b',
+ 'info_dict': {
+ 'id': 'f4279548-6e13-40dd-92e8-860d27289197',
+ 'ext': 'mp4',
+ 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5',
+ }
+ },
+ {
+ 'md5': 'fde4c3bccd7cc7e3576b338734153cec',
+ 'info_dict': {
+ 'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334',
+ 'ext': 'mp4',
+ 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5',
+ }
+ },
+ {
+ 'md5': '247f6780cda6891f2e49b8ae2b10e017',
+ 'info_dict': {
+ 'id': '9146ecf5-b15a-4d78-879c-6679b77f4960',
+ 'ext': 'mp4',
+ 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5',
+ }
+ },
+ {
+ 'md5': 'fd269f33256e47bad5eb6c40de089ff6',
+ 'info_dict': {
+ 'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7',
+ 'ext': 'mp4',
+ 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5',
+ }
+ }
+ ],
+ }, {
+ 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
+ 'md5': 'e2c6389401cf485df26c79c247b08713',
+ 'info_dict': {
+ 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
+ 'ext': 'mp4',
+ 'title': 'Younger|Younger: Hilary Duff - Little Lies',
+ 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269'
+ },
+ }]
diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
new file mode 100644
index 000000000..ca7d953b8
--- /dev/null
+++ b/youtube_dl/extractor/twentymin.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class TwentyMinutenIE(InfoExtractor):
+ IE_NAME = '20min'
+ _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
+ _TESTS = [{
+ # regular video
+ 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
+ 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
+ 'info_dict': {
+ 'id': '469148',
+ 'ext': 'flv',
+ 'title': '85 000 Franken für 15 perfekte Minuten',
+ 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
+ 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
+ }
+ }, {
+ # news article with video
+ 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
+ 'md5': 'cd4cbb99b94130cff423e967cd275e5e',
+ 'info_dict': {
+ 'id': '469408',
+ 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
+ 'ext': 'flv',
+ 'title': '«Wir müssen mutig nach vorne schauen»',
+ 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
+ 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
+ }
+ }, {
+ 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<h1>.*?<span>(.+?)</span></h1>',
+ webpage, 'title', default=None)
+ if not title:
+ title = remove_end(re.sub(
+ r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py
index ce7e211fc..1df636779 100644
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -244,10 +244,25 @@ class UdemyCourseIE(UdemyIE):
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
course_id, 'Downloading course curriculum')
- entries = [
- self.url_result(
- 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
- for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video'
- ]
+ entries = []
+ chapter, chapter_number = None, None
+ for asset in response:
+ asset_type = asset.get('assetType') or asset.get('asset_type')
+ if asset_type == 'Video':
+ asset_id = asset.get('id')
+ if asset_id:
+ entry = {
+ '_type': 'url_transparent',
+ 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
+ 'ie_key': UdemyIE.ie_key(),
+ }
+ if chapter_number:
+ entry['chapter_number'] = chapter_number
+ if chapter:
+ entry['chapter'] = chapter
+ entries.append(entry)
+ elif asset.get('type') == 'chapter':
+ chapter_number = asset.get('index') or asset.get('object_index')
+ chapter = asset.get('title')
return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py
index 16078ac19..a66d6de23 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@@ -23,12 +23,57 @@ class VideomoreIE(InfoExtractor):
'ext': 'flv',
'title': 'В гостях Алексей Чумаков и Юлия Ковальчук',
'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.',
+ 'series': 'Кино в деталях',
+ 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
+ 'episode_number': None,
+ 'season': 'Сезон 2015',
+ 'season_number': 5,
'thumbnail': 're:^https?://.*\.jpg',
'duration': 2910,
'age_limit': 16,
'view_count': int,
},
}, {
+ 'url': 'http://videomore.ru/embed/259974',
+ 'info_dict': {
+ 'id': '259974',
+ 'ext': 'flv',
+ 'title': '80 серия',
+ 'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.',
+ 'series': 'Молодежка',
+ 'episode': '80 серия',
+ 'episode_number': 40,
+ 'season': '2 сезон',
+ 'season_number': 2,
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 2809,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
+ 'info_dict': {
+ 'id': '341073',
+ 'ext': 'flv',
+ 'title': 'Команда проиграла из-за Бакина?',
+ 'description': 'Молодежка 3 сезон скоро',
+ 'series': 'Молодежка',
+ 'episode': 'Команда проиграла из-за Бакина?',
+ 'episode_number': None,
+ 'season': 'Промо',
+ 'season_number': 99,
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 29,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://videomore.ru/elki_3?track_id=364623',
'only_matching': True,
}, {
@@ -81,10 +126,21 @@ class VideomoreIE(InfoExtractor):
'url': thumbnail,
} for thumbnail in data.get('big_thumbnail_urls', [])]
+ series = data.get('project_title')
+ episode = data.get('title')
+ episode_number = int_or_none(data.get('episode_of_season') or None)
+ season = data.get('season_title')
+ season_number = int_or_none(data.get('season_pos') or None)
+
return {
'id': video_id,
'title': title,
'description': description,
+ 'series': series,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'season': season,
+ 'season_number': season_number,
'thumbnails': thumbnails,
'timestamp': timestamp,
'duration': duration,
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index fca5ddc69..4a492f784 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -155,7 +155,16 @@ class YahooIE(InfoExtractor):
'description': 'md5:8fc39608213295748e1e289807838c97',
'duration': 1646,
},
- }
+ }, {
+ # it uses an alias to get the video_id
+ 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html',
+ 'info_dict': {
+ 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737',
+ 'ext': 'mp4',
+ 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking',
+ 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
+ },
+ },
]
def _real_extract(self, url):
@@ -199,13 +208,22 @@ class YahooIE(InfoExtractor):
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
default=None)
if items_json is None:
- CONTENT_ID_REGEXES = [
- r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
- r'"first_videoid"\s*:\s*"([^"]+)"',
- r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
- ]
- video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
+ alias = self._search_regex(
+ r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
+ if alias is not None:
+ alias_info = self._download_json(
+ 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
+ display_id, 'Downloading alias info')
+ video_id = alias_info[0]['id']
+ else:
+ CONTENT_ID_REGEXES = [
+ r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
+ r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+ r'"first_videoid"\s*:\s*"([^"]+)"',
+ r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
+ ]
+ video_id = self._search_regex(
+ CONTENT_ID_REGEXES, webpage, 'content ID')
else:
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 3a3432be8..f767fa15f 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -2,6 +2,9 @@
from __future__ import unicode_literals
import base64
+import random
+import string
+import time
from .common import InfoExtractor
from ..compat import (
@@ -141,6 +144,11 @@ class YoukuIE(InfoExtractor):
return video_urls_dict
+ @staticmethod
+ def get_ysuid():
+ return '%d%s' % (int(time.time()), ''.join([
+ random.choice(string.ascii_letters) for i in range(3)]))
+
def get_hd(self, fm):
hd_id_dict = {
'3gp': '0',
@@ -189,6 +197,8 @@ class YoukuIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
+
def retrieve_data(req_url, note):
headers = {
'Referer': req_url,
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 2a1f2f6d1..c619a75e2 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -13,6 +13,7 @@ from ..utils import (
determine_ext,
qualities,
float_or_none,
+ ExtractorError,
)
@@ -59,7 +60,6 @@ class ZDFIE(InfoExtractor):
'ext': 'flv',
'format_id': '%s-%d' % (proto, bitrate),
'tbr': bitrate,
- 'protocol': proto,
})
self._sort_formats(formats)
return formats
@@ -70,6 +70,15 @@ class ZDFIE(InfoExtractor):
note='Downloading video info',
errnote='Failed to download video info')
+ status_code = doc.find('./status/statuscode')
+ if status_code is not None and status_code.text != 'ok':
+ code = status_code.text
+ if code == 'notVisibleAnymore':
+ message = 'Video %s is not available' % video_id
+ else:
+ message = '%s returned error: %s' % (self.IE_NAME, code)
+ raise ExtractorError(message, expected=True)
+
title = doc.find('.//information/title').text
description = xpath_text(doc, './/information/detail', 'description')
duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
@@ -129,10 +138,10 @@ class ZDFIE(InfoExtractor):
video_url, video_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id='hds', fatal=False))
+ video_url, video_id, f4m_id=format_id, fatal=False))
else:
proto = format_m.group('proto').lower()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 0ed6c45c8..da4ec7f20 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -70,6 +70,21 @@ ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
+KNOWN_EXTENSIONS = (
+ 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+ 'flv', 'f4v', 'f4a', 'f4b',
+ 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+ 'mkv', 'mka', 'mk3d',
+ 'avi', 'divx',
+ 'mov',
+ 'asf', 'wmv', 'wma',
+ '3gp', '3g2',
+ 'mp3',
+ 'flac',
+ 'ape',
+ 'wav',
+ 'f4f', 'f4m', 'm3u8', 'smil')
+
def preferredencoding():
"""Get preferred encoding.
@@ -942,20 +957,8 @@ def determine_ext(url, default_ext='unknown_video'):
guess = url.partition('?')[0].rpartition('.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
- elif guess.rstrip('/') in (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil'):
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
return guess.rstrip('/')
else:
return default_ext
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index a62baa305..790bd5b3b 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.12.29'
+__version__ = '2016.01.01'