aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py22
-rw-r--r--youtube_dl/downloader/common.py4
-rw-r--r--youtube_dl/downloader/f4m.py6
-rw-r--r--youtube_dl/extractor/__init__.py26
-rw-r--r--youtube_dl/extractor/acast.py70
-rw-r--r--youtube_dl/extractor/adobetv.py165
-rw-r--r--youtube_dl/extractor/audimedia.py4
-rw-r--r--youtube_dl/extractor/bbc.py13
-rw-r--r--youtube_dl/extractor/beeg.py24
-rw-r--r--youtube_dl/extractor/bilibili.py31
-rw-r--r--youtube_dl/extractor/brightcove.py17
-rw-r--r--youtube_dl/extractor/byutv.py5
-rw-r--r--youtube_dl/extractor/canal13cl.py48
-rw-r--r--youtube_dl/extractor/clipfish.py64
-rw-r--r--youtube_dl/extractor/cliphunter.py43
-rw-r--r--youtube_dl/extractor/cnet.py69
-rw-r--r--youtube_dl/extractor/common.py6
-rw-r--r--youtube_dl/extractor/dailymotion.py27
-rw-r--r--youtube_dl/extractor/facebook.py4
-rw-r--r--youtube_dl/extractor/faz.py36
-rw-r--r--youtube_dl/extractor/fc2.py4
-rw-r--r--youtube_dl/extractor/fktv.py34
-rw-r--r--youtube_dl/extractor/flickr.py106
-rw-r--r--youtube_dl/extractor/footyroom.py1
-rw-r--r--youtube_dl/extractor/funimation.py193
-rw-r--r--youtube_dl/extractor/gameinformer.py43
-rw-r--r--youtube_dl/extractor/generic.py23
-rw-r--r--youtube_dl/extractor/googledrive.py88
-rw-r--r--youtube_dl/extractor/gputechconf.py55
-rw-r--r--youtube_dl/extractor/groupon.py2
-rw-r--r--youtube_dl/extractor/howcast.py1
-rw-r--r--youtube_dl/extractor/hypem.py16
-rw-r--r--youtube_dl/extractor/infoq.py85
-rw-r--r--youtube_dl/extractor/iqiyi.py5
-rw-r--r--youtube_dl/extractor/keezmovies.py38
-rw-r--r--youtube_dl/extractor/metacafe.py6
-rw-r--r--youtube_dl/extractor/mixcloud.py3
-rw-r--r--youtube_dl/extractor/movshare.py27
-rw-r--r--youtube_dl/extractor/nba.py93
-rw-r--r--youtube_dl/extractor/nbc.py11
-rw-r--r--youtube_dl/extractor/ndr.py4
-rw-r--r--youtube_dl/extractor/noco.py34
-rw-r--r--youtube_dl/extractor/novamov.py108
-rw-r--r--youtube_dl/extractor/nowness.py14
-rw-r--r--youtube_dl/extractor/nowtv.py5
-rw-r--r--youtube_dl/extractor/nowvideo.py28
-rw-r--r--youtube_dl/extractor/ooyala.py175
-rw-r--r--youtube_dl/extractor/pbs.py174
-rw-r--r--youtube_dl/extractor/pladform.py9
-rw-r--r--youtube_dl/extractor/pluralsight.py28
-rw-r--r--youtube_dl/extractor/rutube.py9
-rw-r--r--youtube_dl/extractor/safari.py4
-rw-r--r--youtube_dl/extractor/skynewsarabia.py6
-rw-r--r--youtube_dl/extractor/sohu.py1
-rw-r--r--youtube_dl/extractor/srf.py26
-rw-r--r--youtube_dl/extractor/teachingchannel.py1
-rw-r--r--youtube_dl/extractor/tele13.py81
-rw-r--r--youtube_dl/extractor/tf1.py16
-rw-r--r--youtube_dl/extractor/theplatform.py14
-rw-r--r--youtube_dl/extractor/toggle.py194
-rw-r--r--youtube_dl/extractor/trilulilu.py127
-rw-r--r--youtube_dl/extractor/vevo.py76
-rw-r--r--youtube_dl/extractor/vice.py1
-rw-r--r--youtube_dl/extractor/videoweed.py26
-rw-r--r--youtube_dl/extractor/vimeo.py34
-rw-r--r--youtube_dl/extractor/vk.py11
-rw-r--r--youtube_dl/extractor/wdr.py58
-rw-r--r--youtube_dl/extractor/wimp.py19
-rw-r--r--youtube_dl/extractor/youku.py122
-rw-r--r--youtube_dl/extractor/youtube.py44
-rw-r--r--youtube_dl/jsinterp.py2
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py3
-rw-r--r--youtube_dl/update.py16
-rw-r--r--youtube_dl/utils.py37
-rw-r--r--youtube_dl/version.py2
75 files changed, 2188 insertions, 839 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 9a8c7da05..50425b8d7 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,7 +47,9 @@ from .utils import (
DEFAULT_OUTTMPL,
determine_ext,
DownloadError,
+ encode_compat_str,
encodeFilename,
+ error_to_compat_str,
ExtractorError,
format_bytes,
formatSeconds,
@@ -495,7 +497,7 @@ class YoutubeDL(object):
tb = ''
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
- tb += compat_str(traceback.format_exc())
+ tb += encode_compat_str(traceback.format_exc())
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
@@ -674,14 +676,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
- except ExtractorError as de: # An error we somewhat expected
- self.report_error(compat_str(de), de.format_traceback())
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
- self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
+ self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
@@ -1110,6 +1112,12 @@ class YoutubeDL(object):
'contain the video, try using '
'"-f %s+%s"' % (format_2, format_1))
return
+ # Formats must be opposite (video+audio)
+ if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+ self.report_error(
+ 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+ % (format_1, format_2))
+ return
output_ext = (
formats_info[0]['ext']
if self.params.get('merge_output_format') is None
@@ -1453,7 +1461,7 @@ class YoutubeDL(object):
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + compat_str(err))
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
return
if self.params.get('writedescription', False):
@@ -1504,7 +1512,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, compat_str(err.cause)))
+ (sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@@ -2033,4 +2041,4 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
- (t['url'], compat_str(err)))
+ (t['url'], error_to_compat_str(err)))
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index b8bf8daf8..beae8c4d0 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -5,9 +5,9 @@ import re
import sys
import time
-from ..compat import compat_str
from ..utils import (
encodeFilename,
+ error_to_compat_str,
decodeArgument,
format_bytes,
timeconvert,
@@ -186,7 +186,7 @@ class FileDownloader(object):
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % compat_str(err))
+ self.report_error('unable to rename file: %s' % error_to_compat_str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 6170cc155..aaf0c49c8 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -15,6 +15,7 @@ from ..compat import (
)
from ..utils import (
encodeFilename,
+ fix_xml_ampersands,
sanitize_open,
struct_pack,
struct_unpack,
@@ -288,7 +289,10 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
man_url = urlh.geturl()
- manifest = urlh.read()
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/rg3/youtube-dl/issues/7823)
+ manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 5b842204f..add1df023 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -3,9 +3,15 @@ from __future__ import unicode_literals
from .abc import ABCIE
from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
from .addanime import AddAnimeIE
from .adobetv import (
AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
@@ -71,7 +77,6 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
-from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
@@ -199,7 +204,9 @@ from .francetv import (
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
+from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
+from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
GameOneIE,
@@ -223,9 +230,11 @@ from .globo import (
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
+from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
+from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
from .hearthisat import HearThisAtIE
@@ -351,7 +360,6 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
-from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@@ -417,7 +425,13 @@ from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
-from .novamov import NovaMovIE
+from .novamov import (
+ NovaMovIE,
+ WholeCloudIE,
+ NowVideoIE,
+ VideoWeedIE,
+ CloudTimeIE,
+)
from .nowness import (
NownessIE,
NownessPlaylistIE,
@@ -427,7 +441,6 @@ from .nowtv import (
NowTVIE,
NowTVListIE,
)
-from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
NPOLiveIE,
@@ -636,6 +649,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
+from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
@@ -664,6 +678,7 @@ from .tnaflix import (
EMPFlixIE,
MovieFapIE,
)
+from .toggle import ToggleIE
from .thvideo import (
THVideoIE,
THVideoPlaylistIE
@@ -738,7 +753,6 @@ from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
-from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
@@ -840,7 +854,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
- YoutubeUserPlaylistsIE,
+ YoutubePlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
new file mode 100644
index 000000000..be7913bc7
--- /dev/null
+++ b/youtube_dl/extractor/acast.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class ACastBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://www.acast.com/api/'
+
+
+class ACastIE(ACastBaseIE):
+ IE_NAME = 'acast'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
+ 'md5': 'ada3de5a1e3a2a381327d749854788bb',
+ 'info_dict': {
+ 'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
+ 'ext': 'mp3',
+ 'title': '"Where Are You?": Taipei 101, Taiwan',
+ 'timestamp': 1196172000000,
+ 'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
+ 'duration': 211,
+ }
+ }
+
+ def _real_extract(self, url):
+ channel, display_id = re.match(self._VALID_URL, url).groups()
+ cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
+
+ return {
+ 'id': compat_str(cast_data['id']),
+ 'display_id': display_id,
+ 'url': cast_data['blings'][0]['audio'],
+ 'title': cast_data['name'],
+ 'description': cast_data.get('description'),
+ 'thumbnail': cast_data.get('image'),
+ 'timestamp': int_or_none(cast_data.get('publishingDate')),
+ 'duration': int_or_none(cast_data.get('duration')),
+ }
+
+
+class ACastChannelIE(ACastBaseIE):
+ IE_NAME = 'acast:channel'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler',
+ 'info_dict': {
+ 'id': '50544219-29bb-499e-a083-6087f4cb7797',
+ 'title': 'Condé Nast Traveler Podcast',
+ 'description': 'md5:98646dee22a5b386626ae31866638fbd',
+ },
+ 'playlist_mincount': 20,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
+ casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
+ entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
+
+ return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 5e43adc51..8753ee2cf 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -1,23 +1,32 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
+ int_or_none,
float_or_none,
ISO639Utils,
+ determine_ext,
)
-class AdobeTVIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
+class AdobeTVBaseIE(InfoExtractor):
+ _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': {
- 'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
+ 'id': '10981',
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
@@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- player = self._parse_json(
- self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
- video_id)
+ language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
- title = player.get('title') or self._search_regex(
- r'data-title="([^"]+)"', webpage, 'title')
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- upload_date = unified_strdate(
- self._html_search_meta('datepublished', webpage, 'upload date'))
-
- duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration') or
- self._search_regex(
- r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
- webpage, 'duration', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
- webpage, 'view count'))
+ video_data = self._download_json(
+ self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
+ urlname)['data'][0]
formats = [{
- 'url': source['src'],
- 'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
- 'tbr': source.get('bitrate'),
- } for source in player['sources']]
+ 'url': source['url'],
+ 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ } for source in video_data['videos']]
self._sort_formats(formats)
return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'view_count': view_count,
+ 'id': compat_str(video_data['id']),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
}
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ def _parse_page_data(self, page_data):
+ return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
+
+ def _extract_playlist_entries(self, url, display_id):
+ page = self._download_json(url, display_id)
+ entries = self._parse_page_data(page['data'])
+ for page_num in range(2, page['paging']['pages'] + 1):
+ entries.extend(self._parse_page_data(
+ self._download_json(url + '&page=%d' % page_num, display_id)['data']))
+ return entries
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['urls'][0]
+
+ def _real_extract(self, url):
+ language, show_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&show_urlname=%s' % (language, show_urlname)
+
+ show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
+ compat_str(show_data['id']),
+ show_data['show_name'],
+ show_data['show_description'])
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['url']
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
+ if category_urlname:
+ query += '&category_urlname=%s' % category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
+ channel_urlname)
+
+
class AdobeTVVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
@@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- player_params = self._parse_json(self._search_regex(
- r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
- video_id)
+ video_data = self._download_json(url + '?format=json', video_id)
formats = [{
+ 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
'url': source['src'],
- 'width': source.get('width'),
- 'height': source.get('height'),
- 'tbr': source.get('bitrate'),
- } for source in player_params['sources']]
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('bitrate')),
+ } for source in video_data['sources']]
+ self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000)
- for source in player_params['sources']]))
+ for source in video_data['sources']]))
subtitles = {}
- for translation in player_params.get('translations', []):
+ for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles:
subtitles[lang_id] = []
@@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
- 'title': player_params['title'],
- 'description': self._og_search_description(webpage),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data['video'].get('poster'),
'duration': duration,
'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py
index cad503510..b0b089dee 100644
--- a/youtube_dl/extractor/audimedia.py
+++ b/youtube_dl/extractor/audimedia.py
@@ -31,8 +31,8 @@ class AudiMediaIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
-
- raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload');
+
+ raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 7fb80aa38..691aecc0d 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -47,9 +47,8 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
- 'title': 'Kaleidoscope, Leonard Cohen',
+ 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
'description': 'The Canadian poet and songwriter reflects on his musical career.',
- 'duration': 1740,
},
'params': {
# rtmp download
@@ -112,7 +111,8 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
- }
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
}, {
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
'note': 'Audio',
@@ -454,6 +454,7 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page')
programme_id = None
+ duration = None
tviplayer = self._search_regex(
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
@@ -473,7 +474,9 @@ class BBCCoUkIE(InfoExtractor):
title = self._og_search_title(webpage)
description = self._search_regex(
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
- webpage, 'description', fatal=False)
+ webpage, 'description', default=None)
+ if not description:
+ description = self._html_search_meta('description', webpage)
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
@@ -587,6 +590,7 @@ class BBCIE(BBCCoUkIE):
'ext': 'mp4',
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
'duration': 56,
+ 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
},
'params': {
'skip_download': True,
@@ -729,6 +733,7 @@ class BBCIE(BBCCoUkIE):
# article with multiple videos embedded with playlist.sxml (e.g.
# http://www.bbc.com/sport/0/football/34475836)
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
+ playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
if playlists:
entries = [
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index e63c2ac00..c8d921daf 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -34,15 +34,29 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
- 'http://beeg.com/api/v3/video/%s' % video_id, video_id)
+ 'http://beeg.com/api/v5/video/%s' % video_id, video_id)
+
+ def split(o, e):
+ def cut(s, x):
+ n.append(s[:x])
+ return s[x:]
+ n = []
+ r = len(o) % e
+ if r > 0:
+ o = cut(o, r)
+ while len(o) > e:
+ o = cut(o, e)
+ n.append(o)
+ return n
def decrypt_key(key):
- # Reverse engineered from http://static.beeg.com/cpl/1067.js
- a = '8RPUUCS35ZWp3ADnKcSmpH71ZusrROo'
+ # Reverse engineered from http://static.beeg.com/cpl/1105.js
+ a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
e = compat_urllib_parse_unquote(key)
- return ''.join([
- compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 25)
+ o = ''.join([
+ compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
for n in range(len(e))])
+ return ''.join(split(o, 3)[::-1])
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 1c3644587..59beb11bc 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -2,16 +2,14 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_etree_fromstring,
-)
+from ..compat import compat_str
from ..utils import (
int_or_none,
unescapeHTML,
ExtractorError,
+ xpath_text,
)
@@ -58,27 +56,22 @@ class BiliBiliIE(InfoExtractor):
cid = view_data['cid']
title = unescapeHTML(view_data['title'])
- page = self._download_webpage(
+ doc = self._download_xml(
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
cid,
'Downloading page %s/%s' % (page_num, view_data['pages'])
)
- try:
- err_info = json.loads(page)
- raise ExtractorError(
- 'BiliBili said: ' + err_info['error_text'], expected=True)
- except ValueError:
- pass
- doc = compat_etree_fromstring(page)
+ if xpath_text(doc, './result') == 'error':
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
entries = []
for durl in doc.findall('./durl'):
- size = durl.find('./filesize|./size')
+ size = xpath_text(durl, ['./filesize', './size'])
formats = [{
'url': durl.find('./url').text,
- 'filesize': int_or_none(size.text) if size else None,
+ 'filesize': int_or_none(size),
'ext': 'flv',
}]
backup_urls = durl.find('./backup_url')
@@ -88,21 +81,21 @@ class BiliBiliIE(InfoExtractor):
formats.reverse()
entries.append({
- 'id': '%s_part%s' % (cid, durl.find('./order').text),
+ 'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
'title': title,
- 'duration': int_or_none(durl.find('./length').text) // 1000,
+ 'duration': int_or_none(xpath_text(durl, './length'), 1000),
'formats': formats,
})
info = {
- 'id': str(cid),
+ 'id': compat_str(cid),
'title': title,
'description': view_data.get('description'),
'thumbnail': view_data.get('pic'),
'uploader': view_data.get('author'),
'timestamp': int_or_none(view_data.get('created')),
- 'view_count': view_data.get('play'),
- 'duration': int_or_none(doc.find('./timelength').text),
+ 'view_count': int_or_none(view_data.get('play')),
+ 'duration': int_or_none(xpath_text(doc, './timelength')),
}
if len(entries) == 1:
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index f5ebae1e6..03a4f446e 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -355,7 +355,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
- _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
@@ -387,14 +387,24 @@ class BrightcoveNewIE(InfoExtractor):
'params': {
'skip_download': True,
}
+ }, {
+ # ref: prefixed video id
+ 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
+ 'only_matching': True,
}]
@staticmethod
+ def _extract_url(webpage):
+ urls = BrightcoveNewIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
- # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
+ # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
+ # 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
entries = []
@@ -407,9 +417,10 @@ class BrightcoveNewIE(InfoExtractor):
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
+ # According to [4] data-video-id may be prefixed with ref:
r'''(?sx)
<video[^>]+
- data-video-id=["\'](\d+)["\'][^>]*>.*?
+ data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 3b2de517e..dda98059e 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -14,9 +14,10 @@ class BYUtvIE(InfoExtractor):
'info_dict': {
'id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
- 'description': 'md5:5438d33774b6bdc662f9485a340401cc',
+ 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'title': 'Season 5 Episode 5',
- 'thumbnail': 're:^https?://.*\.jpg$'
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 1486.486,
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/canal13cl.py b/youtube_dl/extractor/canal13cl.py
deleted file mode 100644
index 93241fefe..000000000
--- a/youtube_dl/extractor/canal13cl.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class Canal13clIE(InfoExtractor):
- _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'md5': '4cb1fa38adcad8fea88487a078831755',
- 'info_dict': {
- 'id': '1403022125',
- 'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'ext': 'mp4',
- 'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
- 'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._html_search_meta(
- 'twitter:title', webpage, 'title', fatal=True)
- description = self._html_search_meta(
- 'twitter:description', webpage, 'description')
- url = self._html_search_regex(
- r'articuloVideo = \"(.*?)\"', webpage, 'url')
- real_id = self._search_regex(
- r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
- thumbnail = self._html_search_regex(
- r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
-
- return {
- 'id': real_id,
- 'display_id': display_id,
- 'url': url,
- 'title': title,
- 'description': description,
- 'ext': 'mp4',
- 'thumbnail': thumbnail,
- }
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
index 7af903571..3a47f6fa4 100644
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -1,14 +1,9 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
- determine_ext,
int_or_none,
- js_to_json,
- parse_iso8601,
- remove_end,
+ unified_strdate,
)
@@ -21,48 +16,47 @@ class ClipfishIE(InfoExtractor):
'id': '3966754',
'ext': 'mp4',
'title': 'FIFA 14 - E3 2013 Trailer',
- 'timestamp': 1370938118,
+ 'description': 'Video zu FIFA 14: E3 2013 Trailer',
'upload_date': '20130611',
'duration': 82,
+ 'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_info = self._parse_json(
- js_to_json(self._html_search_regex(
- '(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
- video_id)
+ video_info = self._download_json(
+ 'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
+ video_id)['items'][0]
formats = []
- for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
- ext = determine_ext(video_url)
- if ext == 'm3u8':
- formats.append({
- 'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
- 'ext': 'mp4',
- 'format_id': 'hls',
- })
- else:
- formats.append({
- 'url': video_url,
- 'format_id': ext,
- })
- self._sort_formats(formats)
- title = remove_end(self._og_search_title(webpage), ' - Video')
- thumbnail = self._og_search_thumbnail(webpage)
- duration = int_or_none(video_info.get('length'))
- timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
+ m3u8_url = video_info.get('media_videourl_hls')
+ if m3u8_url:
+ formats.append({
+ 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
+ 'ext': 'mp4',
+ 'format_id': 'hls',
+ })
+
+ mp4_url = video_info.get('media_videourl')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ 'format_id': 'mp4',
+ 'width': int_or_none(video_info.get('width')),
+ 'height': int_or_none(video_info.get('height')),
+ 'tbr': int_or_none(video_info.get('bitrate')),
+ })
return {
'id': video_id,
- 'title': title,
+ 'title': video_info['title'],
+ 'description': video_info.get('descr'),
'formats': formats,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
+ 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
+ 'duration': int_or_none(video_info.get('media_length')),
+ 'upload_date': unified_strdate(video_info.get('pubDate')),
+ 'view_count': int_or_none(video_info.get('media_views'))
}
diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py
index d46592cc5..2996b6b09 100644
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -1,7 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import determine_ext
+from ..utils import int_or_none
_translation_table = {
@@ -42,31 +42,26 @@ class CliphunterIE(InfoExtractor):
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
- fmts = {}
- for fmt in ('mp4', 'flv'):
- fmt_list = self._parse_json(self._search_regex(
- r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
- for f in fmt_list:
- fmts[f['fname']] = _decode(f['sUrl'])
-
- qualities = self._parse_json(self._search_regex(
- r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
+ gexo_files = self._parse_json(
+ self._search_regex(
+ r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
+ video_id)
formats = []
- for fname, url in fmts.items():
- f = {
- 'url': url,
- }
- if fname in qualities:
- qual = qualities[fname]
- f.update({
- 'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
- 'width': qual['w'],
- 'height': qual['h'],
- 'tbr': qual['br'],
- })
- formats.append(f)
-
+ for format_id, f in gexo_files.items():
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ fmt = f.get('fmt')
+ height = f.get('h')
+ format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
+ formats.append({
+ 'url': _decode(video_url),
+ 'format_id': format_id,
+ 'width': int_or_none(f.get('w')),
+ 'height': int_or_none(height),
+ 'tbr': int_or_none(f.get('br')),
+ })
self._sort_formats(formats)
thumbnail = self._search_regex(
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py
index 5dd69bff7..5c3908f72 100644
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -1,15 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
+from .theplatform import ThePlatformIE
+from ..utils import int_or_none
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
-
-class CNETIE(InfoExtractor):
+class CNETIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
@@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):
'ext': 'flv',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
- 'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
+ 'duration': 70,
},
- 'params': {
- 'skip_download': 'requires rtmpdump',
- }
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'info_dict': {
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
'ext': 'flv',
+ 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
- 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
- },
- 'params': {
- 'skip_download': True, # requires rtmpdump
+ 'duration': 1482,
},
}]
@@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
- r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
+ r"data-cnet-video(?:-uvp)?-options='([^']+)'",
webpage, 'data json')
- data = json.loads(data_json)
- vdata = data['video']
- if not vdata:
- vdata = data['videos'][0]
- if not vdata:
- raise ExtractorError('Cannot find video data')
-
- mpx_account = data['config']['players']['default']['mpx_account']
- vid = vdata['files'].get('rtmp', vdata['files']['hds'])
- tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
+ data = self._parse_json(data_json, display_id)
+ vdata = data.get('video') or data['videos'][0]
video_id = vdata['id']
- title = vdata.get('headline')
- if title is None:
- title = vdata.get('title')
- if title is None:
- raise ExtractorError('Cannot find title!')
- thumbnail = vdata.get('image', {}).get('path')
+ title = vdata['title']
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
@@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):
uploader = None
uploader_id = None
+ mpx_account = data['config']['uvpConfig']['default']['mpx_account']
+
+ metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
+ description = vdata.get('description') or metadata.get('description')
+ duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
+
+ formats = []
+ subtitles = {}
+ for (fkey, vid) in vdata['files'].items():
+ if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
+ continue
+ release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
+ if fkey == 'hds':
+ release_url += '&manifest=f4m'
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ self._sort_formats(formats)
+
return {
- '_type': 'url_transparent',
- 'url': tp_link,
'id': video_id,
'display_id': display_id,
'title': title,
+ 'description': description,
+ 'thumbnail': metadata.get('thumbnail'),
+ 'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
- 'thumbnail': thumbnail,
+ 'subtitles': subtitles,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6ab2d68d6..828f58f12 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -30,6 +30,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
determine_ext,
+ error_to_compat_str,
ExtractorError,
fix_xml_ampersands,
float_or_none,
@@ -332,7 +333,8 @@ class InfoExtractor(object):
return False
if errnote is None:
errnote = 'Unable to download webpage'
- errmsg = '%s: %s' % (errnote, compat_str(err))
+
+ errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
@@ -622,7 +624,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
+ self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
return (username, password)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index ab7f3aec4..0c5b6617f 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -7,10 +7,10 @@ import itertools
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
- ExtractorError,
determine_ext,
+ error_to_compat_str,
+ ExtractorError,
int_or_none,
parse_iso8601,
sanitized_Request,
@@ -99,6 +99,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
{
'url': 'http://www.dailymotion.com/video/xhza0o',
'only_matching': True,
+ },
+ # with subtitles
+ {
+ 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
+ 'only_matching': True,
}
]
@@ -122,7 +127,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
webpage, 'comment count', fatal=False))
player_v5 = self._search_regex(
- [r'buildPlayer\(({.+?})\);', r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);'],
+ [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
+ r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
+ r'buildPlayer\(({.+?})\);'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id)
@@ -172,11 +179,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
uploader_id = metadata.get('owner', {}).get('id')
subtitles = {}
- for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
- subtitles[subtitle_lang] = [{
- 'ext': determine_ext(subtitle_url),
- 'url': subtitle_url,
- } for subtitle_url in subtitle.get('urls', [])]
+ subtitles_data = metadata.get('subtitles', {}).get('data', {})
+ if subtitles_data and isinstance(subtitles_data, dict):
+ for subtitle_lang, subtitle in subtitles_data.items():
+ subtitles[subtitle_lang] = [{
+ 'ext': determine_ext(subtitle_url),
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
return {
'id': video_id,
@@ -269,7 +278,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
+ self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 321eec59e..39c481068 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -7,11 +7,11 @@ import socket
from .common import InfoExtractor
from ..compat import (
compat_http_client,
- compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
)
from ..utils import (
+ error_to_compat_str,
ExtractorError,
limit_length,
sanitized_Request,
@@ -116,7 +116,7 @@ class FacebookIE(InfoExtractor):
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning('unable to log in: %s' % compat_str(err))
+ self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
return
def _real_initialize(self):
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index cebdd0193..6f9b003c2 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -2,6 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+)
class FazIE(InfoExtractor):
@@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
- r'writeFLV\(\'(.+?)\',', webpage, 'config xml url')
+ r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
- encodings = config.find('ENCODINGS')
+ encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
- encoding = encodings.find(code)
- if encoding is None:
- continue
- encoding_url = encoding.find('FILENAME').text
- formats.append({
- 'url': encoding_url,
- 'format_id': code.lower(),
- 'quality': pref,
- })
+ encoding = xpath_element(encodings, code)
+ if encoding:
+ encoding_url = xpath_text(encoding, 'FILENAME')
+ if encoding_url:
+ formats.append({
+ 'url': encoding_url,
+ 'format_id': code.lower(),
+ 'quality': pref,
+ 'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
+ })
self._sort_formats(formats)
- descr = self._html_search_regex(
- r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
- 'description': descr,
- 'thumbnail': config.find('STILL/STILL_BIG').text,
+ 'description': description.strip() if description else None,
+ 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
+ 'duration': int_or_none(xpath_text(config, 'DURATION')),
}
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index 92e8c571f..4c81271d3 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -37,8 +37,8 @@ class FC2IE(InfoExtractor):
'params': {
'username': 'ytdl@yt-dl.org',
'password': '(snip)',
- 'skip': 'requires actual password'
- }
+ },
+ 'skip': 'requires actual password',
}, {
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
'only_matching': True,
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index 40ea27895..5f6e65dae 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -1,12 +1,10 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
- ExtractorError,
+ js_to_json,
)
@@ -32,24 +30,22 @@ class FKTVIE(InfoExtractor):
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
- matches = re.search(
- r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
- webpage)
- if matches is None:
- raise ExtractorError('Unable to extract the video')
-
- poster, sources = matches.groups()
- if poster is None:
- self.report_warning('unable to extract thumbnail')
-
- urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
- formats = [{
- 'url': furl,
- 'format_id': determine_ext(furl),
- } for furl in urls]
+ thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
+ sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
+
+ formats = []
+ for source in sources:
+ furl = source.get('src')
+ if furl:
+ formats.append({
+ 'url': furl,
+ 'format_id': determine_ext(furl),
+ })
+ self._sort_formats(formats)
+
return {
'id': episode,
'title': title,
'formats': formats,
- 'thumbnail': poster,
+ 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py
index 91cd46e76..452b27b26 100644
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -1,67 +1,87 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
- find_xpath_attr,
- sanitized_Request,
+ int_or_none,
+ qualities,
)
class FlickrIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
- 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+ 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': {
'id': '5645318632',
- 'ext': 'mp4',
- "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
- "uploader_id": "forestwander-nature-pictures",
- "title": "Dark Hollow Waterfalls"
+ 'ext': 'mpg',
+ 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
+ 'title': 'Dark Hollow Waterfalls',
+ 'duration': 19,
+ 'timestamp': 1303528740,
+ 'upload_date': '20110423',
+ 'uploader_id': '10922353@N03',
+ 'uploader': 'Forest Wander',
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
}
}
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ _API_BASE_URL = 'https://api.flickr.com/services/rest?'
- video_id = mobj.group('id')
- video_uploader_id = mobj.group('uploader_id')
- webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
- req = sanitized_Request(webpage_url)
- req.add_header(
- 'User-Agent',
- # it needs a more recent version
- 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
- webpage = self._download_webpage(req, video_id)
+ def _call_api(self, method, video_id, api_key, note, secret=None):
+ query = {
+ 'photo_id': video_id,
+ 'method': 'flickr.%s' % method,
+ 'api_key': api_key,
+ 'format': 'json',
+ 'nojsoncallback': 1,
+ }
+ if secret:
+ query['secret'] = secret
+ data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
+ if data['stat'] != 'ok':
+ raise ExtractorError(data['message'])
+ return data
- secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
- first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
- first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
+ api_key = self._download_json('https://www.flickr.com/hermes_error_beacon.gne', video_id, 'Downloading api key',)['site_key']
- node_id = find_xpath_attr(
- first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
- 'id').text
+ video_info = self._call_api('photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
+ if video_info['media'] == 'video':
+ streams = self._call_api('video.getStreamInfo', video_id, api_key, 'Downloading streams info', video_info['secret'])['streams']
- second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
- second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
+ preference = qualities(['iphone_wifi', '700', 'appletv', 'orig'])
- self.report_extraction(video_id)
+ formats = []
+ for stream in streams['stream']:
+ stream_type = str(stream.get('type'))
+ formats.append({
+ 'format_id': stream_type,
+ 'url': stream['_content'],
+ 'preference': preference(stream_type),
+ })
+ self._sort_formats(formats)
- stream = second_xml.find('.//STREAM')
- if stream is None:
- raise ExtractorError('Unable to extract video url')
- video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
+ owner = video_info.get('owner', {})
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id': video_uploader_id,
- }
+ return {
+ 'id': video_id,
+ 'title': video_info['title']['_content'],
+ 'description': video_info.get('description', {}).get('_content'),
+ 'formats': formats,
+ 'timestamp': int_or_none(video_info.get('dateuploaded')),
+ 'duration': int_or_none(video_info.get('video', {}).get('duration')),
+ 'uploader_id': owner.get('nsid'),
+ 'uploader': owner.get('realname'),
+ 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
+ 'view_count': int_or_none(video_info.get('views')),
+ 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
+ }
+ else:
+ raise ExtractorError('not a video', expected=True)
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
index 4c7dbca40..370fd006f 100644
--- a/youtube_dl/extractor/footyroom.py
+++ b/youtube_dl/extractor/footyroom.py
@@ -13,6 +13,7 @@ class FootyRoomIE(InfoExtractor):
'title': 'Schalke 04 0 – 2 Real Madrid',
},
'playlist_count': 3,
+ 'skip': 'Video for this match is not available',
}, {
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
'info_dict': {
diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py
new file mode 100644
index 000000000..d1a95d87f
--- /dev/null
+++ b/youtube_dl/extractor/funimation.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ encode_dict,
+ int_or_none,
+ sanitized_Request,
+ ExtractorError,
+ urlencode_postdata
+)
+
+
+class FunimationIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
+
+ _NETRC_MACHINE = 'funimation'
+
+ _TESTS = [{
+ 'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
+ 'info_dict': {
+ 'id': '658',
+ 'display_id': 'breeze',
+ 'ext': 'mp4',
+ 'title': 'Air - 1 - Breeze',
+ 'description': 'md5:1769f43cd5fc130ace8fd87232207892',
+ 'thumbnail': 're:https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
+ 'info_dict': {
+ 'id': '31128',
+ 'display_id': 'role-play',
+ 'ext': 'mp4',
+ 'title': '.hack//SIGN - 1 - Role Play',
+ 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
+ 'thumbnail': 're:https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
+ 'info_dict': {
+ 'id': '9635',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
+ 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
+ 'thumbnail': 're:https?://.*\.(?:jpg|png)',
+ },
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ data = urlencode_postdata(encode_dict({
+ 'email_field': username,
+ 'password_field': password,
+ }))
+ login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ login_page = self._download_webpage(
+ login_request, None, 'Logging in as %s' % username)
+ if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')):
+ return
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>',
+ login_page, 'error messages', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ errors = []
+ formats = []
+
+ ERRORS_MAP = {
+ 'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
+ 'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
+ 'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
+ 'ERROR_VIDEO_EXPIRED': 'videoExpired',
+ 'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
+ 'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
+ 'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
+ 'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
+ 'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
+ 'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
+ }
+
+ USER_AGENTS = (
+ # PC UA is served with m3u8 that provides some bonus lower quality formats
+ ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
+ # Mobile UA allows to extract direct links and also does not fail when
+ # PC UA fails with hulu error (e.g.
+ # http://www.funimation.com/shows/hacksign/videos/official/role-play)
+ ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
+ )
+
+ for kind, user_agent in USER_AGENTS:
+ request = sanitized_Request(url)
+ request.add_header('User-Agent', user_agent)
+ webpage = self._download_webpage(
+ request, display_id, 'Downloading %s webpage' % kind)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+playersData\s*=\s*(\[.+?\]);\n',
+ webpage, 'players data'),
+ display_id)[0]['playlist']
+
+ items = next(item['items'] for item in playlist if item.get('items'))
+ item = next(item for item in items if item.get('itemAK') == display_id)
+
+ error_messages = {}
+ video_error_messages = self._search_regex(
+ r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
+ webpage, 'error messages', default=None)
+ if video_error_messages:
+ error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
+ if error_messages_json:
+ for _, error in error_messages_json.items():
+ type_ = error.get('type')
+ description = error.get('description')
+ content = error.get('content')
+ if type_ == 'text' and description and content:
+ error_message = ERRORS_MAP.get(description)
+ if error_message:
+ error_messages[error_message] = content
+
+ for video in item.get('videoSet', []):
+ auth_token = video.get('authToken')
+ if not auth_token:
+ continue
+ funimation_id = video.get('FUNImationID') or video.get('videoId')
+ preference = 1 if video.get('languageMode') == 'dub' else 0
+ if not auth_token.startswith('?'):
+ auth_token = '?%s' % auth_token
+ for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
+ format_url = video.get('%sUrl' % quality)
+ if not format_url:
+ continue
+ if not format_url.startswith(('http', '//')):
+ errors.append(format_url)
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
+ preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ tbr = int_or_none(self._search_regex(
+ r'-(\d+)[Kk]', format_url, 'tbr', default=None))
+ formats.append({
+ 'url': format_url + auth_token,
+ 'format_id': '%s-http-%dp' % (funimation_id, height),
+ 'height': height,
+ 'tbr': tbr,
+ 'preference': preference,
+ })
+
+ if not formats and errors:
+ raise ExtractorError(
+ '%s returned error: %s'
+ % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
+ expected=True)
+
+ self._sort_formats(formats)
+
+ title = item['title']
+ artist = item.get('artist')
+ if artist:
+ title = '%s - %s' % (artist, title)
+ description = self._og_search_description(webpage) or item.get('description')
+ thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
+ video_id = item.get('itemId') or display_id
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py
new file mode 100644
index 000000000..25870c131
--- /dev/null
+++ b/youtube_dl/extractor/gameinformer.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class GameInformerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
+ _TEST = {
+ 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
+ 'info_dict': {
+ 'id': '4515472681001',
+ 'ext': 'm3u8',
+ 'title': 'Replay - Animal Crossing',
+ 'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
+ 'timestamp': 1443457610706,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
+ json_data = self._download_json(
+ bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
+ display_id)
+
+ return {
+ 'id': compat_str(json_data['id']),
+ 'display_id': display_id,
+ 'url': json_data['IOSRenditions'][0]['url'],
+ 'title': json_data['name'],
+ 'description': json_data.get('shortDescription'),
+ 'timestamp': int_or_none(json_data.get('publishedDate')),
+ 'duration': int_or_none(json_data.get('length')),
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f5dd88f54..3c3066e38 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -53,6 +53,8 @@ from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
+from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
@@ -339,6 +341,7 @@ class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
},
'add_ie': ['Ooyala'],
},
@@ -350,6 +353,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
},
'params': {
'skip_download': True,
@@ -960,8 +964,9 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
- 'description': 'VIDEO: Index/Match versus VLOOKUP.',
+ 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
+ 'duration': 191.933,
},
'params': {
# m3u8 downloads
@@ -1496,7 +1501,7 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(mobj.group('ec'))
+ return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@ -1504,7 +1509,7 @@ class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
- embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+ embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -1733,10 +1738,9 @@ class GenericIE(InfoExtractor):
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Pladform')
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
# Look for Playwire embeds
mobj = re.search(
@@ -1761,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
new file mode 100644
index 000000000..f354c9c7a
--- /dev/null
+++ b/youtube_dl/extractor/googledrive.py
@@ -0,0 +1,88 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class GoogleDriveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
+ _TEST = {
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+ 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+ 'info_dict': {
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'duration': 46,
+ }
+ }
+ _FORMATS_EXT = {
+ '5': 'flv',
+ '6': 'flv',
+ '13': '3gp',
+ '17': '3gp',
+ '18': 'mp4',
+ '22': 'mp4',
+ '34': 'flv',
+ '35': 'flv',
+ '36': '3gp',
+ '37': 'mp4',
+ '38': 'mp4',
+ '43': 'webm',
+ '44': 'webm',
+ '45': 'webm',
+ '46': 'webm',
+ '59': 'mp4',
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+ webpage)
+ if mobj:
+ return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+
+ reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+ if reason:
+ raise ExtractorError(reason)
+
+ title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+ duration = int_or_none(self._search_regex(
+ r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
+ fmt_stream_map = self._search_regex(
+ r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
+ fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
+
+ formats = []
+ for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
+ fmt_id, fmt_url = fmt_stream.split('|')
+ resolution = fmt.split('/')[1]
+ width, height = resolution.split('x')
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': fmt_id,
+ 'resolution': resolution,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ 'ext': self._FORMATS_EXT[fmt_id],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/gputechconf.py b/youtube_dl/extractor/gputechconf.py
new file mode 100644
index 000000000..145b55bf3
--- /dev/null
+++ b/youtube_dl/extractor/gputechconf.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+ parse_duration,
+)
+
+
+class GPUTechConfIE(InfoExtractor):
+ _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
+ 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
+ 'info_dict': {
+ 'id': '5156',
+ 'ext': 'mp4',
+ 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
+ 'duration': 1219,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
+ xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
+
+ doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
+
+ metadata = xpath_element(doc, 'metadata')
+ http_host = xpath_text(metadata, 'httpHost', 'http host', True)
+ mbr_videos = xpath_element(metadata, 'MBRVideos')
+
+ formats = []
+ for mbr_video in mbr_videos.findall('MBRVideo'):
+ stream_name = xpath_text(mbr_video, 'streamName')
+ if stream_name:
+ formats.append({
+ 'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
+ 'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': xpath_text(metadata, 'title'),
+ 'duration': parse_duration(xpath_text(metadata, 'endTime')),
+ 'creator': xpath_text(metadata, 'speaker'),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py
index 8b9e0e2f8..63c05b6a6 100644
--- a/youtube_dl/extractor/groupon.py
+++ b/youtube_dl/extractor/groupon.py
@@ -18,6 +18,8 @@ class GrouponIE(InfoExtractor):
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
'ext': 'mp4',
'title': 'Bikram Yoga Huntington Beach | Orange County',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 44.961,
},
}],
'params': {
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py
index 16677f179..e8f51e545 100644
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -16,6 +16,7 @@ class HowcastIE(InfoExtractor):
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
'timestamp': 1276081287,
'upload_date': '20100609',
+ 'duration': 56.823,
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py
index cca3dd498..b3706fe6d 100644
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dl/extractor/hypem.py
@@ -28,15 +28,12 @@ class HypemIE(InfoExtractor):
track_id = self._match_id(url)
data = {'ax': 1, 'ts': time.time()}
- data_encoded = compat_urllib_parse.urlencode(data)
- complete_url = url + "?" + data_encoded
- request = sanitized_Request(complete_url)
+ request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data))
response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url')
- cookie = urlh.headers.get('Set-Cookie', '')
html_tracks = self._html_search_regex(
- r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
+ r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
response, 'tracks')
try:
track_list = json.loads(html_tracks)
@@ -46,15 +43,14 @@ class HypemIE(InfoExtractor):
key = track['key']
track_id = track['id']
- artist = track['artist']
title = track['song']
- serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = sanitized_Request(
- serve_url, '', {'Content-Type': 'application/json'})
- request.add_header('cookie', cookie)
+ 'http://hypem.com/serve/source/%s/%s' % (track_id, key),
+ '', {'Content-Type': 'application/json'})
song_data = self._download_json(request, track_id, 'Downloading metadata')
- final_url = song_data["url"]
+ final_url = song_data['url']
+ artist = track.get('artist')
return {
'id': track_id,
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index 71cfd12c5..016af2084 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,3 +1,5 @@
+# coding: utf-8
+
from __future__ import unicode_literals
import base64
@@ -5,8 +7,9 @@ import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
- compat_urlparse,
+ compat_parse_qs,
)
+from ..utils import determine_ext
class InfoQIE(InfoExtractor):
@@ -16,7 +19,7 @@ class InfoQIE(InfoExtractor):
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
'info_dict': {
- 'id': '12-jan-pythonthings',
+ 'id': 'A-Few-of-My-Favorite-Python-Things',
'ext': 'mp4',
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
'title': 'A Few of My Favorite [Python] Things',
@@ -24,40 +27,84 @@ class InfoQIE(InfoExtractor):
}, {
'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
'only_matching': True,
+ }, {
+ 'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
+ 'md5': '4918d0cca1497f2244572caf626687ef',
+ 'info_dict': {
+ 'id': 'openstack-continued-delivery',
+ 'title': 'OpenStack持续交付之路',
+ 'ext': 'flv',
+ 'description': 'md5:308d981fb28fa42f49f9568322c683ff',
+ },
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ def _extract_bokecc_videos(self, webpage, video_id):
+ # TODO: bokecc.com is a Chinese video cloud platform
+ # It should have an independent extractor but I don't have other
+ # examples using bokecc
+ player_params_str = self._html_search_regex(
+ r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
+ webpage, 'player params', default=None)
- video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
- video_description = self._html_search_meta('description', webpage, 'description')
+ player_params = compat_parse_qs(player_params_str)
+
+ info_xml = self._download_xml(
+ 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
+ player_params['siteid'][0], player_params['vid'][0]), video_id)
+
+ return [{
+ 'format_id': 'bokecc',
+ 'url': quality.find('./copy').attrib['playurl'],
+ 'preference': int(quality.attrib['value']),
+ } for quality in info_xml.findall('./video/quality')]
+ def _extract_rtmp_videos(self, webpage):
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
# Extract video URL
encoded_id = self._search_regex(
- r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
+ r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
+
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
playpath = 'mp4:' + real_id
- video_filename = playpath.split('/')[-1]
- video_id, extension = video_filename.split('.')
-
- http_base = self._search_regex(
- r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
- 'HTTP base URL')
-
- formats = [{
+ return [{
'format_id': 'rtmp',
'url': video_url,
- 'ext': extension,
+ 'ext': determine_ext(playpath),
'play_path': playpath,
- }, {
+ }]
+
+ def _extract_http_videos(self, webpage):
+ http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
+
+ policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
+ signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
+ key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+
+ return [{
'format_id': 'http',
- 'url': compat_urlparse.urljoin(url, http_base) + real_id,
+ 'url': http_video_url,
+ 'http_headers': {
+ 'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
+ policy, signature, key_pair_id),
+ },
}]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+ video_description = self._html_search_meta('description', webpage, 'description')
+
+ if '/cn/' in url:
+ # for China videos, HTTP video URL exists but always fails with 403
+ formats = self._extract_bokecc_videos(webpage, video_id)
+ else:
+ formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
+
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
index 2df1da3f0..c3731a110 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -205,9 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction
- # last update at 2015-10-22 for Zombie::bite
- # '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2]
- enc_key = '2c76de15dcb44bd28ff0927d50d31620'
+ # last update at 2015-12-18 for Zombie::bite
+ enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]
return enc_key
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py
index d79261bb5..126ca13df 100644
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -1,23 +1,25 @@
from __future__ import unicode_literals
-import os
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
-from ..utils import sanitized_Request
+from ..utils import (
+ sanitized_Request,
+ url_basename,
+)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
- 'md5': '6e297b7e789329923fcf83abb67c9289',
+ 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
'info_dict': {
'id': '1214711',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18,
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}
@@ -36,21 +38,29 @@ class KeezMoviesIE(InfoExtractor):
video_title = self._html_search_regex(
r'<h1 [^>]*>([^<]+)', webpage, 'title')
- video_url = self._html_search_regex(
- r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
- path = compat_urllib_parse_urlparse(video_url).path
- extension = os.path.splitext(path)[1][1:]
- format = path.split('/')[4].split('_')[:2]
- format = "-".join(format)
+ flashvars = self._parse_json(self._search_regex(
+ r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
+
+ formats = []
+ for height in (180, 240, 480):
+ if flashvars.get('quality_%dp' % height):
+ video_url = flashvars['quality_%dp' % height]
+ a_format = {
+ 'url': video_url,
+ 'height': height,
+ 'format_id': '%dp' % height,
+ }
+ filename_parts = url_basename(video_url).split('_')
+ if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
+ a_format['tbr'] = int(filename_parts[1][:-1])
+ formats.append(a_format)
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
- 'url': video_url,
- 'ext': extension,
- 'format': format,
- 'format_id': format,
+ 'formats': formats,
'age_limit': age_limit,
+ 'thumbnail': flashvars.get('image_url')
}
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 3c786a36d..67d6271e1 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -154,10 +154,10 @@ class MetacafeIE(InfoExtractor):
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
video_url = None
- mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
+ mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)
if mobj is not None:
mediaURL = compat_urllib_parse_unquote(mobj.group(1))
- video_ext = mediaURL[-3:]
+ video_ext = determine_ext(mediaURL)
# Extract gdaKey if available
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
@@ -229,7 +229,7 @@ class MetacafeIE(InfoExtractor):
age_limit = (
18
- if re.search(r'"contentRating":"restricted"', webpage)
+ if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
else 0)
if isinstance(video_url, list):
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index d47aeceda..c2b7ed9ab 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -64,7 +64,8 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
- song_url = preview_url.replace('/previews/', '/c/originals/')
+ song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
+ song_url = song_url.replace('/previews/', '/c/originals/')
if not self._check_url(song_url, track_id, 'mp3'):
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
if not self._check_url(song_url, track_id, 'm4a'):
diff --git a/youtube_dl/extractor/movshare.py b/youtube_dl/extractor/movshare.py
deleted file mode 100644
index 6101063f2..000000000
--- a/youtube_dl/extractor/movshare.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class MovShareIE(NovaMovIE):
- IE_NAME = 'movshare'
- IE_DESC = 'MovShare'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
-
- _HOST = 'www.movshare.net'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
- _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.movshare.net/video/559e28be54d96',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': '559e28be54d96',
- 'ext': 'flv',
- 'title': 'dissapeared image',
- 'description': 'optical illusion dissapeared image magic illusion',
- }
- }
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 944096e1c..7c6b7841d 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -1,63 +1,102 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
- remove_end,
parse_duration,
+ int_or_none,
+ xpath_text,
+ xpath_attr,
)
class NBAIE(InfoExtractor):
- _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+ _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
- 'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
+ 'md5': '9e7729d3010a9c71506fd1248f74e4f4',
'info_dict': {
- 'id': '0021200253-okc-bkn-recap.nba',
- 'ext': 'mp4',
+ 'id': '0021200253-okc-bkn-recap',
+ 'ext': 'flv',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
+ 'timestamp': 1354638466,
+ 'upload_date': '20121204',
},
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}, {
- 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': {
- 'id': '0041400301-cle-atl-recap.nba',
+ 'id': '0041400301-cle-atl-recap',
'ext': 'mp4',
- 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+ 'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228,
- },
- 'params': {
- 'skip_download': True,
+ 'timestamp': 1432134543,
+ 'upload_date': '20150520',
}
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ if path.startswith('nba/'):
+ path = path[3:]
+ video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
+ video_id = xpath_text(video_info, 'slug')
+ title = xpath_text(video_info, 'headline')
+ description = xpath_text(video_info, 'description')
+ duration = parse_duration(xpath_text(video_info, 'length'))
+ timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
- shortened_video_id = video_id.rpartition('/')[2]
- title = remove_end(
- self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
+ thumbnails = []
+ for image in video_info.find('images'):
+ thumbnails.append({
+ 'id': image.attrib.get('cut'),
+ 'url': image.text,
+ 'width': int_or_none(image.attrib.get('width')),
+ 'height': int_or_none(image.attrib.get('height')),
+ })
- description = self._og_search_description(webpage)
- duration_str = self._html_search_meta(
- 'duration', webpage, 'duration', default=None)
- if not duration_str:
- duration_str = self._html_search_regex(
- r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
- duration = parse_duration(duration_str)
+ formats = []
+ for video_file in video_info.findall('.//file'):
+ video_url = video_file.text
+ if video_url.startswith('/'):
+ continue
+ if video_url.endswith('.m3u8'):
+ m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif video_url.endswith('.f4m'):
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ key = video_file.attrib.get('bitrate')
+ format_info = {
+ 'format_id': key,
+ 'url': video_url,
+ }
+ mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
+ if mobj:
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': int_or_none(mobj.group(3)),
+ })
+ formats.append(format_info)
+ self._sort_formats(formats)
return {
- 'id': shortened_video_id,
- 'url': video_url,
+ 'id': video_id,
'title': title,
'description': description,
'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index e683d24c4..340c922bd 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -3,14 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_HTTPError,
-)
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
find_xpath_attr,
lowercase_escape,
+ smuggle_url,
unescapeHTML,
)
@@ -62,12 +60,13 @@ class NBCIE(InfoExtractor):
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+ r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
- return self.url_result(theplatform_url)
+ return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
class NBCSportsVPlayerIE(InfoExtractor):
@@ -187,7 +186,7 @@ class NBCNewsIE(InfoExtractor):
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
- 'description': compat_str(info.find('caption').text),
+ 'description': info.find('caption').text,
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 16213eed9..894c51399 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -88,10 +88,10 @@ class NDRIE(NDRBaseIE):
'embedURL', webpage, 'embed URL', fatal=True)
description = self._search_regex(
r'<p[^>]+itemprop="description">([^<]+)</p>',
- webpage, 'description', fatal=False)
+ webpage, 'description', default=None) or self._og_search_description(webpage)
timestamp = parse_iso8601(
self._search_regex(
- r'<span itemprop="datePublished" content="([^"]+)">',
+ r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
webpage, 'upload date', fatal=False))
return {
'_type': 'url_transparent',
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py
index 76bd21e6d..d440313d5 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -9,6 +9,7 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
+ compat_urlparse,
)
from ..utils import (
clean_html,
@@ -82,14 +83,21 @@ class NocoIE(InfoExtractor):
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
+ @staticmethod
+ def _ts():
+ return int(time.time() * 1000)
+
def _call_api(self, path, video_id, note, sub_lang=None):
- ts = compat_str(int(time.time() * 1000))
+ ts = compat_str(self._ts() + self._ts_offset)
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
- resp = self._download_json(url, video_id, note)
+ request = sanitized_Request(url)
+ request.add_header('Referer', self._referer)
+
+ resp = self._download_json(request, video_id, note)
if isinstance(resp, dict) and resp.get('error'):
self._raise_error(resp['error'], resp['description'])
@@ -102,8 +110,22 @@ class NocoIE(InfoExtractor):
expected=True)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
+ # Timestamp adjustment offset between server time and local time
+ # must be calculated in order to use timestamps closest to server's
+ # in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
+ webpage = self._download_webpage(url, video_id)
+
+ player_url = self._search_regex(
+ r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
+ webpage, 'noco player', group='player',
+ default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
+
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
+ ts = int_or_none(qs.get('ts', [None])[0])
+ self._ts_offset = ts - self._ts() if ts else 0
+ self._referer = player_url
medias = self._call_api(
'shows/%s/medias' % video_id,
@@ -155,8 +177,8 @@ class NocoIE(InfoExtractor):
'format_id': format_id_extended,
'width': int_or_none(fmt.get('res_width')),
'height': int_or_none(fmt.get('res_lines')),
- 'abr': int_or_none(fmt.get('audiobitrate')),
- 'vbr': int_or_none(fmt.get('videobitrate')),
+ 'abr': int_or_none(fmt.get('audiobitrate'), 1000),
+ 'vbr': int_or_none(fmt.get('videobitrate'), 1000),
'filesize': int_or_none(fmt.get('filesize')),
'format_note': qualities[format_id].get('quality_name'),
'quality': qualities[format_id].get('priority'),
diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py
index 6163e8855..d68c1ad79 100644
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -17,15 +17,16 @@ class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
- _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+ _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
_HOST = 'www.novamov.com'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
- _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
+ _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
+ _URL_TEMPLATE = 'http://%s/video/%s'
_TEST = {
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
@@ -39,20 +40,28 @@ class NovaMovIE(InfoExtractor):
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
}
+ def _check_existence(self, webpage, video_id):
+ if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'http://%s/video/%s' % (self._HOST, video_id)
+ url = self._URL_TEMPLATE % (self._HOST, video_id)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+ self._check_existence(webpage, video_id)
def extract_filekey(default=NO_DEFAULT):
- return self._search_regex(
+ filekey = self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
+ if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
+ return self._search_regex(
+ r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
+ else:
+ return filekey
filekey = extract_filekey(default=None)
@@ -69,6 +78,7 @@ class NovaMovIE(InfoExtractor):
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
+ self._check_existence(webpage, video_id)
filekey = extract_filekey()
@@ -92,3 +102,89 @@ class NovaMovIE(InfoExtractor):
'title': title,
'description': description
}
+
+
+class WholeCloudIE(NovaMovIE):
+ IE_NAME = 'wholecloud'
+ IE_DESC = 'WholeCloud'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
+
+ _HOST = 'www.wholecloud.net'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
+ _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
+
+ _TEST = {
+ 'url': 'http://www.wholecloud.net/video/559e28be54d96',
+ 'md5': 'abd31a2132947262c50429e1d16c1bfd',
+ 'info_dict': {
+ 'id': '559e28be54d96',
+ 'ext': 'flv',
+ 'title': 'dissapeared image',
+ 'description': 'optical illusion dissapeared image magic illusion',
+ }
+ }
+
+
+class NowVideoIE(NovaMovIE):
+ IE_NAME = 'nowvideo'
+ IE_DESC = 'NowVideo'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
+
+ _HOST = 'www.nowvideo.to'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'<h4>([^<]+)</h4>'
+ _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
+
+ _TEST = {
+ 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
+ 'md5': '12c82cad4f2084881d8bc60ee29df092',
+ 'info_dict': {
+ 'id': 'f1d6fce9a968b',
+ 'ext': 'flv',
+ 'title': 'youtubedl test video BaWjenozKc',
+ 'description': 'Description',
+ },
+ }
+
+
+class VideoWeedIE(NovaMovIE):
+ IE_NAME = 'videoweed'
+ IE_DESC = 'VideoWeed'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+
+ _HOST = 'www.videoweed.es'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
+ _URL_TEMPLATE = 'http://%s/file/%s'
+
+ _TEST = {
+ 'url': 'http://www.videoweed.es/file/b42178afbea14',
+ 'md5': 'abd31a2132947262c50429e1d16c1bfd',
+ 'info_dict': {
+ 'id': 'b42178afbea14',
+ 'ext': 'flv',
+ 'title': 'optical illusion dissapeared image magic illusion',
+ 'description': ''
+ },
+ }
+
+
+class CloudTimeIE(NovaMovIE):
+ IE_NAME = 'cloudtime'
+ IE_DESC = 'CloudTime'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'}
+
+ _HOST = 'www.cloudtime.to'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
+
+ _TEST = None
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
index d480fb58c..446f5901c 100644
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,7 +1,10 @@
# encoding: utf-8
from __future__ import unicode_literals
-from .brightcove import BrightcoveLegacyIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -23,9 +26,12 @@ class NownessBaseIE(InfoExtractor):
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
- if bc_url is None:
- raise ExtractorError('Could not find player definition')
- return self.url_result(bc_url, 'BrightcoveLegacy')
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+ bc_url = BrightcoveNewIE._extract_url(player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+ raise ExtractorError('Could not find player definition')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index 67e34b294..fd107aca2 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -71,7 +71,7 @@ class NowTVBaseIE(InfoExtractor):
class NowTVIE(NowTVBaseIE):
- _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
# rtl
@@ -190,6 +190,9 @@ class NowTVIE(NowTVBaseIE):
}, {
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py
deleted file mode 100644
index 57ee3d366..000000000
--- a/youtube_dl/extractor/nowvideo.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class NowVideoIE(NovaMovIE):
- IE_NAME = 'nowvideo'
- IE_DESC = 'NowVideo'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
-
- _HOST = 'www.nowvideo.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
- _TITLE_REGEX = r'<h4>([^<]+)</h4>'
- _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
- 'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
- 'info_dict': {
- 'id': '0mw0yow7b6dxa',
- 'ext': 'flv',
- 'title': 'youtubedl test video _BaW_jenozKc.mp4',
- 'description': 'Description',
- }
- }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index a262a9f6d..8603fd692 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -1,108 +1,78 @@
from __future__ import unicode_literals
import re
-import json
import base64
from .common import InfoExtractor
from ..utils import (
- unescapeHTML,
- ExtractorError,
- determine_ext,
int_or_none,
+ float_or_none,
+ ExtractorError,
+ unsmuggle_url,
)
+from ..compat import compat_urllib_parse
class OoyalaBaseIE(InfoExtractor):
- def _extract_result(self, info, more_info):
- embedCode = info['embedCode']
- video_url = info.get('ipad_url') or info['url']
-
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
- else:
- formats = [{
- 'url': video_url,
- 'ext': 'mp4',
- }]
-
- return {
- 'id': embedCode,
- 'title': unescapeHTML(info['title']),
- 'formats': formats,
- 'description': unescapeHTML(more_info['description']),
- 'thumbnail': more_info['promo'],
+ def _extract(self, content_tree_url, video_id, domain='example.org'):
+ content_tree = self._download_json(content_tree_url, video_id)['content_tree']
+ metadata = content_tree[list(content_tree)[0]]
+ embed_code = metadata['embed_code']
+ pcode = metadata.get('asset_pcode') or embed_code
+ video_info = {
+ 'id': embed_code,
+ 'title': metadata['title'],
+ 'description': metadata.get('description'),
+ 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+ 'duration': float_or_none(metadata.get('duration'), 1000),
}
- def _extract(self, player_url, video_id):
- player = self._download_webpage(player_url, video_id)
- mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
- player, 'mobile player url')
- # Looks like some videos are only available for particular devices
- # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
- # is only available for ipad)
- # Working around with fetching URLs for all the devices found starting with 'unknown'
- # until we succeed or eventually fail for each device.
- devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
- devices.remove('unknown')
- devices.insert(0, 'unknown')
- for device in devices:
- mobile_player = self._download_webpage(
- '%s&device=%s' % (mobile_url, device), video_id,
- 'Downloading mobile player JS for %s device' % device)
- videos_info = self._search_regex(
- r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
- mobile_player, 'info', fatal=False, default=None)
- if videos_info:
- break
-
- if not videos_info:
- formats = []
+ urls = []
+ formats = []
+ for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
auth_data = self._download_json(
- 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id),
- video_id)
-
- cur_auth_data = auth_data['authorization_data'][video_id]
-
- for stream in cur_auth_data['streams']:
- formats.append({
- 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
- 'ext': stream.get('delivery_type'),
- 'format': stream.get('video_codec'),
- 'format_id': stream.get('profile'),
- 'width': int_or_none(stream.get('width')),
- 'height': int_or_none(stream.get('height')),
- 'abr': int_or_none(stream.get('audio_bitrate')),
- 'vbr': int_or_none(stream.get('video_bitrate')),
- })
- if formats:
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': 'Ooyala video',
- }
-
- if not cur_auth_data['authorized']:
- raise ExtractorError(cur_auth_data['message'], expected=True)
-
- if not videos_info:
- raise ExtractorError('Unable to extract info')
- videos_info = videos_info.replace('\\"', '"')
- videos_more_info = self._search_regex(
- r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
- videos_info = json.loads(videos_info)
- videos_more_info = json.loads(videos_more_info)
-
- if videos_more_info.get('lineup'):
- videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': unescapeHTML(videos_more_info['title']),
- 'entries': videos,
- }
- else:
- return self._extract_result(videos_info[0], videos_more_info)
+ 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}),
+ video_id, 'Downloading %s JSON' % supported_format)
+
+ cur_auth_data = auth_data['authorization_data'][embed_code]
+
+ if cur_auth_data['authorized']:
+ for stream in cur_auth_data['streams']:
+ url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
+ if url in urls:
+ continue
+ urls.append(url)
+ delivery_type = stream['delivery_type']
+ if delivery_type == 'hls' or '.m3u8' in url:
+ m3u8_formats = self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif delivery_type == 'hds' or '.f4m' in url:
+ f4m_formats = self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif '.smil' in url:
+ smil_formats = self._extract_smil_formats(url, embed_code, fatal=False)
+ if smil_formats:
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': url,
+ 'ext': stream.get('delivery_type'),
+ 'vcodec': stream.get('video_codec'),
+ 'format_id': delivery_type,
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ 'fps': float_or_none(stream.get('framerate')),
+ })
+ else:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True)
+ self._sort_formats(formats)
+
+ video_info['formats'] = formats
+ return video_info
class OoyalaIE(OoyalaBaseIE):
@@ -117,6 +87,7 @@ class OoyalaIE(OoyalaBaseIE):
'ext': 'mp4',
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ 'duration': 853.386,
},
}, {
# Only available for ipad
@@ -125,7 +96,7 @@ class OoyalaIE(OoyalaBaseIE):
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation',
- 'description': '',
+ 'duration': 194.948,
},
},
{
@@ -136,7 +107,8 @@ class OoyalaIE(OoyalaBaseIE):
'info_dict': {
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
'ext': 'mp4',
- 'title': 'Ooyala video',
+ 'title': 'Divide Tool Path.mp4',
+ 'duration': 204.405,
}
}
]
@@ -151,9 +123,11 @@ class OoyalaIE(OoyalaBaseIE):
ie=cls.ie_key())
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
- player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
- return self._extract(player_url, embed_code)
+ domain = smuggled_data.get('domain')
+ content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
+ return self._extract(content_tree_url, embed_code, domain)
class OoyalaExternalIE(OoyalaBaseIE):
@@ -170,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
.*?&pcode=
)
(?P<pcode>.+?)
- (&|$)
+ (?:&|$)
'''
_TEST = {
@@ -179,7 +153,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
- 'description': '',
+ 'duration': 1302000,
},
'params': {
# m3u8 download
@@ -188,9 +162,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- partner_id = mobj.group('partner_id')
- video_id = mobj.group('id')
- pcode = mobj.group('pcode')
- player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
- return self._extract(player_url, video_id)
+ partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
+ content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
+ return self._extract(content_tree_url, video_id)
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index b787e2a73..744e4a09a 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -15,16 +15,181 @@ from ..utils import (
class PBSIE(InfoExtractor):
+ _STATIONS = (
+ (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
+ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
+ (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
+ (r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
+ (r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
+ (r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
+ (r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
+ (r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
+ (r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
+ # (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
+ # (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
+ # (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
+ (r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
+ (r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
+ (r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
+ (r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
+ (r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
+ (r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
+ (r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
+ (r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
+ (r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
+ (r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
+ (r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
+ (r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
+ (r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
+ (r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
+ (r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
+ (r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
+ (r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
+ (r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
+ (r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
+ (r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
+ # (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
+ # (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
+ # (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
+ (r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
+ (r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
+ (r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
+ (r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
+ (r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
+ (r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
+ (r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
+ (r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
+ (r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
+ (r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
+ # (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
+ (r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
+ (r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
+ (r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
+ (r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
+ (r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
+ (r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
+ (r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
+ (r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
+ (r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
+ # (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
+ (r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
+ (r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
+ (r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
+ (r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
+ (r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
+ (r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
+ (r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
+ (r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
+ # (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
+ (r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
+ (r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
+ (r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
+ (r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
+ (r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
+ (r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
+ (r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
+ (r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
+ # (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
+ (r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
+ (r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
+ (r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
+ (r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
+ (r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
+ (r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
+ (r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
+ (r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
+ (r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
+ (r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
+ (r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
+ (r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
+ # (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
+ (r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
+ (r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
+ (r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
+ (r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
+ (r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
+ (r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
+ (r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
+ (r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
+ (r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
+ (r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
+ (r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
+ (r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
+ (r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
+ # (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
+ # (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
+ # (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
+ (r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
+ (r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
+ (r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
+ (r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
+ (r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
+ (r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
+ (r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
+ # (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
+ (r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
+ (r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
+ (r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
+ (r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
+ (r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
+ (r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
+ (r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
+ (r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
+ (r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
+ (r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
+ (r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
+ (r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
+ (r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
+ (r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
+ (r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
+ (r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
+ # (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
+ (r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
+ (r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
+ (r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
+ (r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
+ (r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
+ (r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
+ (r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
+ (r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
+ (r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
+ (r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
+ (r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
+ # (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
+ (r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
+ (r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
+ # (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
+ (r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
+ (r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
+ (r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
+ (r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
+ (r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
+ # (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
+ # (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
+ (r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
+ (r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
+ # (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
+ # (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
+ # (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
+ (r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
+ (r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
+ (r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
+ )
+
+ IE_NAME = 'pbs'
+ IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1])
+
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
- video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
+ (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
- '''
+ ''' % '|'.join(list(zip(*_STATIONS))[0])
_TESTS = [
{
@@ -174,6 +339,10 @@ class PBSIE(InfoExtractor):
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
+ },
+ {
+ 'url': 'http://watch.knpb.org/video/2365616055/',
+ 'only_matching': True,
}
]
_ERRORS = {
@@ -204,6 +373,7 @@ class PBSIE(InfoExtractor):
MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
r'class="coveplayerid">([^<]+)<', # coveplayer
+ r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
]
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
index 551c8c9f0..bc559d1df 100644
--- a/youtube_dl/extractor/pladform.py
+++ b/youtube_dl/extractor/pladform.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -44,6 +46,13 @@ class PladformIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage)
+ if mobj:
+ return mobj.group('url')
+
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index aa7dbcb63..55c11b3bf 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import re
import json
import random
import collections
@@ -14,6 +15,7 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
+ qualities,
sanitized_Request,
)
@@ -140,15 +142,28 @@ class PluralsightIE(PluralsightBaseIE):
'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640},
'high': {'width': 1024, 'height': 768},
+ 'high-widescreen': {'width': 1280, 'height': 720},
}
+ QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+ quality_key = qualities(QUALITIES_PREFERENCE)
+
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = (
- AllowedQuality('webm', ('high',)),
- AllowedQuality('mp4', ('low', 'medium', 'high',)),
+ AllowedQuality('webm', ['high', ]),
+ AllowedQuality('mp4', ['low', 'medium', 'high', ]),
)
+ # Some courses also offer widescreen resolution for high quality (see
+ # https://github.com/rg3/youtube-dl/issues/7766)
+ widescreen = True if re.search(
+ r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False
+ best_quality = 'high-widescreen' if widescreen else 'high'
+ if widescreen:
+ for allowed_quality in ALLOWED_QUALITIES:
+ allowed_quality.qualities.append(best_quality)
+
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
@@ -157,19 +172,19 @@ class PluralsightIE(PluralsightBaseIE):
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
- req_format_split = req_format.split('-')
+ req_format_split = req_format.split('-', 1)
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
- return (AllowedQuality(req_ext, ('high', )), )
+ return (AllowedQuality(req_ext, (best_quality, )), )
allowed_qualities = guess_allowed_qualities()
formats = []
- for ext, qualities in allowed_qualities:
- for quality in qualities:
+ for ext, qualities_ in allowed_qualities:
+ for quality in qualities_:
f = QUALITIES[quality].copy()
clip_post = {
'a': author,
@@ -205,6 +220,7 @@ class PluralsightIE(PluralsightBaseIE):
'url': clip_url,
'ext': ext,
'format_id': format_id,
+ 'quality': quality_key(quality),
})
formats.append(f)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 6b09550b0..9db62adb1 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -17,9 +17,9 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
- _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
- _TEST = {
+ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
@@ -36,7 +36,10 @@ class RutubeIE(InfoExtractor):
# It requires ffmpeg (m3u8 download)
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 919704261..7de7b7273 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -6,12 +6,12 @@ import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
smuggle_url,
std_headers,
+ urlencode_postdata,
)
@@ -57,7 +57,7 @@ class SafariBaseIE(InfoExtractor):
}
request = sanitized_Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py
index f09fee102..05e1b02ad 100644
--- a/youtube_dl/extractor/skynewsarabia.py
+++ b/youtube_dl/extractor/skynewsarabia.py
@@ -9,7 +9,7 @@ from ..utils import (
)
-class SkyNewArabiaBaseIE(InfoExtractor):
+class SkyNewsArabiaBaseIE(InfoExtractor):
_IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'
def _call_api(self, path, value):
@@ -40,7 +40,7 @@ class SkyNewArabiaBaseIE(InfoExtractor):
}
-class SkyNewsArabiaIE(SkyNewArabiaBaseIE):
+class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)'
_TEST = {
@@ -66,7 +66,7 @@ class SkyNewsArabiaIE(SkyNewArabiaBaseIE):
return self._extract_video_info(video_data)
-class SkyNewsArabiaArticleIE(SkyNewArabiaBaseIE):
+class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
_TESTS = [{
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index daf6ad555..ea8fc258d 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -158,6 +158,7 @@ class SohuIE(InfoExtractor):
'file': clips_url[i],
'new': su[i],
'prod': 'flash',
+ 'rb': 1,
}
if cdnId is not None:
diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py
index 77eec0bc7..16e1bf2d6 100644
--- a/youtube_dl/extractor/srf.py
+++ b/youtube_dl/extractor/srf.py
@@ -11,7 +11,7 @@ from ..utils import (
class SrfIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
+ _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/(?:tv|radio)/[^/]+/(?P<media_type>video|audio)/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': '4cd93523723beff51bb4bee974ee238d',
@@ -36,6 +36,20 @@ class SrfIE(InfoExtractor):
'timestamp': 1373493600,
},
}, {
+ 'url': 'http://www.srf.ch/play/radio/hoerspielarchiv-srf-musikwelle/audio/saegel-ohni-wind-von-jakob-stebler?id=415bf3d3-6429-4de7-968d-95866e37cfbc',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '415bf3d3-6429-4de7-968d-95866e37cfbc',
+ 'display_id': 'saegel-ohni-wind-von-jakob-stebler',
+ 'ext': 'mp3',
+ 'upload_date': '20080518',
+ 'title': '«Sägel ohni Wind» von Jakob Stebler',
+ 'timestamp': 1211112000,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, {
'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'only_matching': True,
}, {
@@ -44,11 +58,13 @@ class SrfIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ media_type = mobj.group('media_type')
+ display_id = mobj.group('display_id') or video_id
video_data = self._download_xml(
- 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+ 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/%s/play/%s.xml' % (media_type, video_id),
display_id)
title = xpath_text(
@@ -64,7 +80,7 @@ class SrfIE(InfoExtractor):
for url_node in item.findall('url'):
quality = url_node.attrib['quality']
full_url = url_node.text
- original_ext = determine_ext(full_url)
+ original_ext = determine_ext(full_url).lower()
format_id = '%s-%s' % (quality, item.attrib['protocol'])
if original_ext == 'f4m':
formats.extend(self._extract_f4m_formats(
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
index 117afa9bf..e0477382c 100644
--- a/youtube_dl/extractor/teachingchannel.py
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -16,6 +16,7 @@ class TeachingChannelIE(InfoExtractor):
'ext': 'mp4',
'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422.255,
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py
new file mode 100644
index 000000000..a363b4d40
--- /dev/null
+++ b/youtube_dl/extractor/tele13.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ js_to_json,
+ qualities,
+ determine_ext,
+)
+
+
+class Tele13IE(InfoExtractor):
+ _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'md5': '4cb1fa38adcad8fea88487a078831755',
+ 'info_dict': {
+ 'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'ext': 'mp4',
+ 'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
+ },
+ 'params': {
+ # HTTP Error 404: Not Found
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
+ 'md5': '867adf6a3b3fef932c68a71d70b70946',
+ 'info_dict': {
+ 'id': 'rOoKv2OMpOw',
+ 'ext': 'mp4',
+ 'title': 'Shooting star seen on 7-Sep-2015',
+ 'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
+ 'uploader': 'Porjai Jaturongkhakun',
+ 'upload_date': '20150906',
+ 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
+ },
+ 'add_ie': ['Youtube'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
+ sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
+
+ preference = qualities(['Móvil', 'SD', 'HD'])
+ formats = []
+ urls = []
+ for f in sources:
+ format_url = f['file']
+ if format_url and format_url not in urls:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif YoutubeIE.suitable(format_url):
+ return self.url_result(format_url, 'Youtube')
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': f.get('label'),
+ 'preference': preference(f.get('label')),
+ 'ext': ext,
+ })
+ urls.append(format_url)
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
+ 'description': self._html_search_meta('description', webpage, 'description'),
+ 'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 3a68eaa80..6890021cf 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
- _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+ _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
_TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
@@ -22,7 +22,7 @@ class TF1IE(InfoExtractor):
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
- 'id': '12043945',
+ 'id': 'le-grand-mysterioso-chuggington-7085291-739',
'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
@@ -32,22 +32,24 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
+ 'skip': 'HTTP Error 410: Gone',
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
}, {
'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
'only_matching': True,
+ }, {
+ 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- embed_url = self._html_search_regex(
- r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed player page')
- wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
+ wat_id = self._html_search_regex(
+ r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
+ webpage, 'wat id', group='id')
wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
return self.url_result(wat_info['media']['url'], 'Wat')
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 1555aa77c..0bf6726b5 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -16,11 +16,12 @@ from ..compat import (
from ..utils import (
determine_ext,
ExtractorError,
- xpath_with_ns,
- unsmuggle_url,
+ float_or_none,
int_or_none,
+ sanitized_Request,
+ unsmuggle_url,
url_basename,
- float_or_none,
+ xpath_with_ns,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -204,7 +205,12 @@ class ThePlatformIE(ThePlatformBaseIE):
smil_url = url
# Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
elif '/guid/' in url:
- webpage = self._download_webpage(url, video_id)
+ headers = {}
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ headers['Referer'] = source_url
+ request = sanitized_Request(url, headers=headers)
+ webpage = self._download_webpage(request, video_id)
smil_url = self._search_regex(
r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
webpage, 'smil url', group='url')
diff --git a/youtube_dl/extractor/toggle.py b/youtube_dl/extractor/toggle.py
new file mode 100644
index 000000000..a47239952
--- /dev/null
+++ b/youtube_dl/extractor/toggle.py
@@ -0,0 +1,194 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class ToggleIE(InfoExtractor):
+ IE_NAME = 'toggle'
+ _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+ 'info_dict': {
+ 'id': '343115',
+ 'ext': 'mp4',
+ 'title': 'Lion Moms Premiere',
+ 'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
+ 'upload_date': '20150910',
+ 'timestamp': 1441858274,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }, {
+ 'note': 'DRM-protected video',
+ 'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
+ 'info_dict': {
+ 'id': '341413',
+ 'ext': 'wvm',
+ 'title': 'Dug\'s Special Mission',
+ 'description': 'md5:e86c6f4458214905c1772398fabc93e0',
+ 'upload_date': '20150827',
+ 'timestamp': 1440644006,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ }
+ }, {
+ # this also tests correct video id extraction
+ 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
+ 'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+ 'info_dict': {
+ 'id': '332861',
+ 'ext': 'mp4',
+ 'title': '28th SEA Games (5 Show) - Episode 11',
+ 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
+ 'upload_date': '20150605',
+ 'timestamp': 1433480166,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ },
+ 'skip': 'm3u8 links are geo-restricted'
+ }, {
+ 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
+ 'only_matching': True,
+ }]
+
+ _FORMAT_PREFERENCES = {
+ 'wvm-STBMain': -10,
+ 'wvm-iPadMain': -20,
+ 'wvm-iPhoneMain': -30,
+ 'wvm-Android': -40,
+ }
+ _API_USER = 'tvpapi_147'
+ _API_PASS = '11111'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, video_id, note='Downloading video page')
+
+ api_user = self._search_regex(
+ r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
+ default=self._API_USER, group='user')
+ api_pass = self._search_regex(
+ r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
+ default=self._API_PASS, group='pass')
+
+ params = {
+ 'initObj': {
+ 'Locale': {
+ 'LocaleLanguage': '',
+ 'LocaleCountry': '',
+ 'LocaleDevice': '',
+ 'LocaleUserState': 0
+ },
+ 'Platform': 0,
+ 'SiteGuid': 0,
+ 'DomainID': '0',
+ 'UDID': '',
+ 'ApiUser': api_user,
+ 'ApiPass': api_pass
+ },
+ 'MediaID': video_id,
+ 'mediaType': 0,
+ }
+
+ req = sanitized_Request(
+ 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
+ json.dumps(params).encode('utf-8'))
+ info = self._download_json(req, video_id, 'Downloading video info json')
+
+ title = info['MediaName']
+
+ formats = []
+ for video_file in info.get('Files', []):
+ video_url, vid_format = video_file.get('URL'), video_file.get('Format')
+ if not video_url or not vid_format:
+ continue
+ ext = determine_ext(video_url)
+ vid_format = vid_format.replace(' ', '')
+ # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id=vid_format,
+ note='Downloading %s m3u8 information' % vid_format,
+ errnote='Failed to download %s m3u8 information' % vid_format,
+ fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif ext in ('mp4', 'wvm'):
+ # wvm are drm-protected files
+ formats.append({
+ 'ext': ext,
+ 'url': video_url,
+ 'format_id': vid_format,
+ 'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
+ 'format_note': 'DRM-protected video' if ext == 'wvm' else None
+ })
+ if not formats:
+ # Most likely because geo-blocked
+ raise ExtractorError('No downloadable videos found', expected=True)
+ self._sort_formats(formats)
+
+ duration = int_or_none(info.get('Duration'))
+ description = info.get('Description')
+ created_at = parse_iso8601(info.get('CreationDate') or None)
+
+ average_rating = float_or_none(info.get('Rating'))
+ view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
+ like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
+
+ thumbnails = []
+ for picture in info.get('Pictures', []):
+ if not isinstance(picture, dict):
+ continue
+ pic_url = picture.get('URL')
+ if not pic_url:
+ continue
+ thumbnail = {
+ 'url': pic_url,
+ }
+ pic_size = picture.get('PicSize', '')
+ m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
+ if m:
+ thumbnail.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': created_at,
+ 'average_rating': average_rating,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index 185accc4b..a800449e9 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -1,80 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
class TriluliluIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
- _TEST = {
- 'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
- 'md5': 'c1450a00da251e2769b74b9005601cac',
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)'
+ _TESTS = [{
+ 'url': 'http://www.trilulilu.ro/big-buck-bunny-1',
+ 'md5': '68da087b676a6196a413549212f60cc6',
'info_dict': {
'id': 'ae2899e124140b',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi',
+ 'uploader_id': 'chipy',
+ 'upload_date': '20120304',
+ 'timestamp': 1330830647,
+ 'uploader': 'chipy',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
},
- }
+ }, {
+ 'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta',
+ 'md5': '929dfb8729dc71750463af88bbbbf4a4',
+ 'info_dict': {
+ 'id': 'f299710e3c91c5',
+ 'ext': 'mp4',
+ 'title': 'Adena ft. Morreti - Inocenta',
+ 'description': 'pop music',
+ 'uploader_id': 'VEVOmixt',
+ 'upload_date': '20151204',
+ 'uploader': 'VEVOmixt',
+ 'timestamp': 1449187937,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id)
- if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
- raise ExtractorError(
- 'This video is not available in your country.', expected=True)
- elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
+ age_limit = 0
+ errors = media_info.get('errors', {})
+ if errors.get('friends'):
raise ExtractorError('This video is private.', expected=True)
+ elif errors.get('geoblock'):
+ raise ExtractorError('This video is not available in your country.', expected=True)
+ elif errors.get('xxx_unlogged'):
+ age_limit = 18
- flashvars_str = self._search_regex(
- r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
+ media_class = media_info.get('class')
+ if media_class not in ('video', 'audio'):
+ raise ExtractorError('not a video or an audio')
- if flashvars_str:
- flashvars = self._parse_json(flashvars_str, display_id)
- else:
- raise ExtractorError(
- 'This page does not contain videos', expected=True)
+ user = media_info.get('user', {})
- if flashvars['isMP3'] == 'true':
- raise ExtractorError(
- 'Audio downloads are currently not supported', expected=True)
+ thumbnail = media_info.get('cover_url')
+ if thumbnail:
+ thumbnail.format(width='1600', height='1200')
- video_id = flashvars['hash']
- title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage, default=None)
-
- format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
- 'video-formats2' % flashvars)
- format_doc = self._download_xml(
- format_url, video_id,
- note='Downloading formats',
- errnote='Error while downloading formats')
-
- video_url_template = (
- 'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
- '&source=site&hash=%(hash)s&username=%(userid)s&'
- 'key=ministhebest&format=%%s&sig=&exp=' %
- flashvars)
- formats = [
- {
- 'format_id': fnode.text.partition('-')[2],
- 'url': video_url_template % fnode.text,
- 'ext': fnode.text.partition('-')[0]
- }
-
- for fnode in format_doc.findall('./formats/format')
- ]
+ # TODO: get correct ext for audio files
+ stream_type = media_info.get('stream_type')
+ formats = [{
+ 'url': media_info['href'],
+ 'ext': stream_type,
+ }]
+ if media_info.get('is_hd'):
+ formats.append({
+ 'format_id': 'hd',
+ 'url': media_info['hrefhd'],
+ 'ext': stream_type,
+ })
+ if media_class == 'audio':
+ formats[0]['vcodec'] = 'none'
+ else:
+ formats[0]['format_id'] = 'sd'
return {
- 'id': video_id,
+ 'id': media_info['identifier'].split('|')[1],
'display_id': display_id,
'formats': formats,
- 'title': title,
- 'description': description,
+ 'title': media_info['title'],
+ 'description': media_info.get('description'),
'thumbnail': thumbnail,
+ 'uploader_id': user.get('username'),
+ 'uploader': user.get('fullname'),
+ 'timestamp': parse_iso8601(media_info.get('published'), ' '),
+ 'duration': int_or_none(media_info.get('duration')),
+ 'view_count': int_or_none(media_info.get('count_views')),
+ 'like_count': int_or_none(media_info.get('count_likes')),
+ 'comment_count': int_or_none(media_info.get('count_comments')),
+ 'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 571289421..02dfd36f4 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_etree_fromstring
+from ..compat import (
+ compat_etree_fromstring,
+ compat_urlparse,
+)
from ..utils import (
ExtractorError,
int_or_none,
@@ -67,6 +70,17 @@ class VevoIE(InfoExtractor):
'params': {
'skip_download': 'true',
}
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'Till I Die - K Camp ft. T.I.',
+ 'duration': 193,
+ },
+ 'expected_warnings': ['Unable to download SMIL file'],
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -81,11 +95,17 @@ class VevoIE(InfoExtractor):
if webpage is False:
self._oauth_token = None
else:
+ if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
+ raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
+
self._oauth_token = self._search_regex(
r'access_token":\s*"([^"]+)"',
webpage, 'access token', fatal=False)
def _formats_from_json(self, video_info):
+ if not video_info:
+ return []
+
last_version = {'version': -1}
for version in video_info['videoVersions']:
# These are the HTTP downloads, other types are for different manifests
@@ -110,9 +130,8 @@ class VevoIE(InfoExtractor):
})
return formats
- def _formats_from_smil(self, smil_xml):
+ def _formats_from_smil(self, smil_doc):
formats = []
- smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
for el in els:
src = el.attrib['src']
@@ -145,14 +164,14 @@ class VevoIE(InfoExtractor):
})
return formats
- def _download_api_formats(self, video_id):
+ def _download_api_formats(self, video_id, video_url):
if not self._oauth_token:
self._downloader.report_warning(
'No oauth token available, skipping API HLS download')
return []
- api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
- video_id, self._oauth_token)
+ api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
+ video_id, self._oauth_token))
api_data = self._download_json(
api_url, video_id,
note='Downloading HLS formats',
@@ -166,18 +185,26 @@ class VevoIE(InfoExtractor):
preference=0)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
+ webpage = None
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
response = self._download_json(json_url, video_id)
- video_info = response['video']
+ video_info = response['video'] or {}
- if not video_info:
+ if not video_info and response.get('statusCode') != 909:
if 'statusMessage' in response:
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
raise ExtractorError('Unable to extract videos')
+ if not video_info:
+ if url.startswith('vevo:'):
+ raise ExtractorError('Please specify full Vevo URL for downloading', expected=True)
+ webpage = self._download_webpage(url, video_id)
+
+ title = video_info.get('title') or self._og_search_title(webpage)
+
formats = self._formats_from_json(video_info)
is_explicit = video_info.get('isExplicit')
@@ -189,11 +216,11 @@ class VevoIE(InfoExtractor):
age_limit = None
# Download via HLS API
- formats.extend(self._download_api_formats(video_id))
+ formats.extend(self._download_api_formats(video_id, url))
# Download SMIL
smil_blocks = sorted((
- f for f in video_info['videoVersions']
+ f for f in video_info.get('videoVersions', [])
if f['sourceType'] == 13),
key=lambda f: f['version'])
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
@@ -205,23 +232,26 @@ class VevoIE(InfoExtractor):
if smil_url_m is not None:
smil_url = smil_url_m
if smil_url:
- smil_xml = self._download_webpage(
- smil_url, video_id, 'Downloading SMIL info', fatal=False)
- if smil_xml:
- formats.extend(self._formats_from_smil(smil_xml))
+ smil_doc = self._download_smil(smil_url, video_id, fatal=False)
+ if smil_doc:
+ formats.extend(self._formats_from_smil(smil_doc))
self._sort_formats(formats)
- timestamp_ms = int_or_none(self._search_regex(
+ timestamp = int_or_none(self._search_regex(
r'/Date\((\d+)\)/',
- video_info['launchDate'], 'launch date', fatal=False))
+ video_info['launchDate'], 'launch date', fatal=False),
+ scale=1000) if video_info else None
+
+ duration = video_info.get('duration') or int_or_none(
+ self._html_search_meta('video:duration', webpage))
return {
'id': video_id,
- 'title': video_info['title'],
+ 'title': title,
'formats': formats,
- 'thumbnail': video_info['imageUrl'],
- 'timestamp': timestamp_ms // 1000,
- 'uploader': video_info['mainArtists'][0]['artistName'],
- 'duration': video_info['duration'],
+ 'thumbnail': video_info.get('imageUrl'),
+ 'timestamp': timestamp,
+ 'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None,
+ 'duration': duration,
'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 01af7a995..3db6286e4 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -15,6 +15,7 @@ class ViceIE(InfoExtractor):
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'mp4',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ 'duration': 725.983,
},
'params': {
# Requires ffmpeg (m3u8 manifest)
diff --git a/youtube_dl/extractor/videoweed.py b/youtube_dl/extractor/videoweed.py
deleted file mode 100644
index ca2e50935..000000000
--- a/youtube_dl/extractor/videoweed.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class VideoWeedIE(NovaMovIE):
- IE_NAME = 'videoweed'
- IE_DESC = 'VideoWeed'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
-
- _HOST = 'www.videoweed.es'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
-
- _TEST = {
- 'url': 'http://www.videoweed.es/file/b42178afbea14',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': 'b42178afbea14',
- 'ext': 'flv',
- 'title': 'optical illusion dissapeared image magic illusion',
- 'description': ''
- },
- }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index f392ccf1c..ce08e6955 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -23,6 +23,7 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
+ parse_filesize,
)
@@ -185,6 +186,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
},
{
+ # contains original format
+ 'url': 'https://vimeo.com/33951933',
+ 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+ 'info_dict': {
+ 'id': '33951933',
+ 'ext': 'mp4',
+ 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
+ 'uploader': 'The DMCI',
+ 'uploader_id': 'dmci',
+ 'upload_date': '20111220',
+ 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
+ },
+ },
+ {
'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status',
'only_matching': True,
@@ -392,6 +407,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
comment_count = None
formats = []
+ download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'})
+ download_data = self._download_json(download_request, video_id, fatal=False)
+ if download_data:
+ source_file = download_data.get('source_file')
+ if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ formats.append({
+ 'url': source_file['download_url'],
+ 'ext': source_file['extension'].lower(),
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_file.get('public_name', 'Original'),
+ 'preference': 1,
+ })
config_files = config['video'].get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []):
video_url = f.get('url')
@@ -408,12 +438,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
- self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
subtitles = {}
text_tracks = config['request'].get('text_tracks')
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index d99a42a9f..90557fa61 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -18,6 +18,7 @@ from ..utils import (
unified_strdate,
)
from .vimeo import VimeoIE
+from .pladform import PladformIE
class VKIE(InfoExtractor):
@@ -164,6 +165,11 @@ class VKIE(InfoExtractor):
# vk wrapper
'url': 'http://www.biqle.ru/watch/847655_160197695',
'only_matching': True,
+ },
+ {
+ # pladform embed
+ 'url': 'https://vk.com/video-76116461_171554880',
+ 'only_matching': True,
}
]
@@ -254,10 +260,13 @@ class VKIE(InfoExtractor):
if vimeo_url is not None:
return self.url_result(vimeo_url)
+ pladform_url = PladformIE._extract_url(info_page)
+ if pladform_url:
+ return self.url_result(pladform_url)
+
m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
if m_rutube is not None:
- self.to_screen('rutube video detected')
rutube_url = self._proto_relative_url(
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index b46802306..ef096cbd2 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -10,8 +10,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- determine_ext,
unified_strdate,
+ qualities,
)
@@ -33,6 +33,7 @@ class WDRIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
@@ -47,6 +48,7 @@ class WDRIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
@@ -71,6 +73,7 @@ class WDRIE(InfoExtractor):
'upload_date': '20140717',
'is_live': False
},
+ 'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
@@ -83,10 +86,10 @@ class WDRIE(InfoExtractor):
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
- 'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
- 'upload_date': '20150212',
+ 'upload_date': '20150101',
'is_live': True
},
'params': {
@@ -150,25 +153,52 @@ class WDRIE(InfoExtractor):
if upload_date:
upload_date = unified_strdate(upload_date)
+ formats = []
+ preference = qualities(['S', 'M', 'L', 'XL'])
+
if video_url.endswith('.f4m'):
- video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
- ext = 'flv'
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
elif video_url.endswith('.smil'):
- fmt = self._extract_smil_formats(video_url, page_id)[0]
- video_url = fmt['url']
- sep = '&' if '?' in video_url else '?'
- video_url += sep
- video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
- ext = fmt['ext']
+ smil_formats = self._extract_smil_formats(video_url, page_id, False, {
+ 'hdcore': '3.3.0',
+ 'plugin': 'aasp-3.3.0.99.43',
+ })
+ if smil_formats:
+ formats.extend(smil_formats)
else:
- ext = determine_ext(video_url)
+ formats.append({
+ 'url': video_url,
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
+ })
+
+ m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None)
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+
+ direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
+ if direct_urls:
+ for quality, video_url in direct_urls:
+ formats.append({
+ 'url': video_url,
+ 'preference': preference(quality),
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
+ })
+
+ self._sort_formats(formats)
description = self._html_search_meta('Description', webpage, 'description')
return {
'id': page_id,
- 'url': video_url,
- 'ext': ext,
+ 'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index e4f50e64c..041ff6c55 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -5,7 +5,7 @@ from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/'
+ _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'ee21217ffd66d058e8b16be340b74883',
@@ -28,18 +28,23 @@ class WimpIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
- webpage, 'video URL')
- if YoutubeIE.suitable(video_url):
- self.to_screen('Found YouTube video')
+
+ youtube_id = self._search_regex(
+ r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
+ webpage, 'video URL', default=None)
+ if youtube_id:
return {
'_type': 'url',
- 'url': video_url,
+ 'url': youtube_id,
'ie_key': YoutubeIE.ie_key(),
}
+ video_url = self._search_regex(
+ r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1',
+ webpage, 'video URL', group='url')
+
return {
'id': video_id,
'url': video_url,
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 69ecc837a..3a3432be8 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -25,8 +25,8 @@ class YoukuIE(InfoExtractor):
'''
_TESTS = [{
+ # MD5 is unstable
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
- 'md5': '5f3af4192eabacc4501508d54a8cabd7',
'info_dict': {
'id': 'XMTc1ODE5Njcy_part1',
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
@@ -42,6 +42,7 @@ class YoukuIE(InfoExtractor):
'title': '武媚娘传奇 85',
},
'playlist_count': 11,
+ 'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
'info_dict': {
@@ -49,7 +50,6 @@ class YoukuIE(InfoExtractor):
'title': '花千骨 04',
},
'playlist_count': 13,
- 'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
'note': 'Video protected with password',
@@ -63,7 +63,7 @@ class YoukuIE(InfoExtractor):
},
}]
- def construct_video_urls(self, data1, data2):
+ def construct_video_urls(self, data):
# get sid, token
def yk_t(s1, s2):
ls = list(range(256))
@@ -81,34 +81,24 @@ class YoukuIE(InfoExtractor):
return bytes(s)
sid, token = yk_t(
- b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
+ b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
).decode('ascii').split('_')
# get oip
- oip = data2['ip']
-
- # get fileid
- string_ls = list(
- 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
- shuffled_string_ls = []
- seed = data1['seed']
- N = len(string_ls)
- for ii in range(N):
- seed = (seed * 0xd3 + 0x754f) % 0x10000
- idx = seed * len(string_ls) // 0x10000
- shuffled_string_ls.append(string_ls[idx])
- del string_ls[idx]
+ oip = data['security']['ip']
fileid_dict = {}
- for format in data1['streamtypes']:
- streamfileid = [
- int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
- fileid = ''.join(
- [shuffled_string_ls[i] for i in streamfileid])
- fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
+ for stream in data['stream']:
+ format = stream.get('stream_type')
+ fileid = stream['stream_fileid']
+ fileid_dict[format] = fileid
def get_fileid(format, n):
- fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
+ number = hex(int(str(n), 10))[2:].upper()
+ if len(number) == 1:
+ number = '0' + number
+ streamfileids = fileid_dict[format]
+ fileid = streamfileids[0:8] + number + streamfileids[10:]
return fileid
# get ep
@@ -123,15 +113,15 @@ class YoukuIE(InfoExtractor):
# generate video_urls
video_urls_dict = {}
- for format in data1['streamtypes']:
+ for stream in data['stream']:
+ format = stream.get('stream_type')
video_urls = []
- for dt in data1['segs'][format]:
- n = str(int(dt['no']))
+ for dt in stream['segs']:
+ n = str(stream['segs'].index(dt))
param = {
- 'K': dt['k'],
+ 'K': dt['key'],
'hd': self.get_hd(format),
'myp': 0,
- 'ts': dt['seconds'],
'ypp': 0,
'ctype': 12,
'ev': 1,
@@ -142,7 +132,7 @@ class YoukuIE(InfoExtractor):
video_url = \
'http://k.youku.com/player/getFlvPath/' + \
'sid/' + sid + \
- '_' + str(int(n) + 1).zfill(2) + \
+ '_00' + \
'/st/' + self.parse_ext_l(format) + \
'/fileid/' + get_fileid(format, n) + '?' + \
compat_urllib_parse.urlencode(param)
@@ -153,23 +143,31 @@ class YoukuIE(InfoExtractor):
def get_hd(self, fm):
hd_id_dict = {
+ '3gp': '0',
+ '3gphd': '1',
'flv': '0',
+ 'flvhd': '0',
'mp4': '1',
+ 'mp4hd': '1',
+ 'mp4hd2': '1',
+ 'mp4hd3': '1',
'hd2': '2',
'hd3': '3',
- '3gp': '0',
- '3gphd': '1'
}
return hd_id_dict[fm]
def parse_ext_l(self, fm):
ext_dict = {
+ '3gp': 'flv',
+ '3gphd': 'mp4',
'flv': 'flv',
+ 'flvhd': 'flv',
'mp4': 'mp4',
+ 'mp4hd': 'mp4',
+ 'mp4hd2': 'flv',
+ 'mp4hd3': 'flv',
'hd2': 'flv',
'hd3': 'flv',
- '3gp': 'flv',
- '3gphd': 'mp4'
}
return ext_dict[fm]
@@ -178,9 +176,13 @@ class YoukuIE(InfoExtractor):
'3gp': 'h6',
'3gphd': 'h5',
'flv': 'h4',
+ 'flvhd': 'h4',
'mp4': 'h3',
+ 'mp4hd': 'h3',
+ 'mp4hd2': 'h4',
+ 'mp4hd3': 'h4',
'hd2': 'h2',
- 'hd3': 'h1'
+ 'hd3': 'h1',
}
return _dict[fm]
@@ -188,45 +190,46 @@ class YoukuIE(InfoExtractor):
video_id = self._match_id(url)
def retrieve_data(req_url, note):
- req = sanitized_Request(req_url)
+ headers = {
+ 'Referer': req_url,
+ }
+ self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
+ req = sanitized_Request(req_url, headers=headers)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
raw_data = self._download_json(req, video_id, note=note)
- return raw_data['data'][0]
+
+ return raw_data['data']
video_password = self._downloader.params.get('videopassword', None)
# request basic data
- basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
+ basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
if video_password:
- basic_data_url += '?password=%s' % video_password
-
- data1 = retrieve_data(
- basic_data_url,
- 'Downloading JSON metadata 1')
- data2 = retrieve_data(
- 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
- 'Downloading JSON metadata 2')
-
- error_code = data1.get('error_code')
- if error_code:
- error = data1.get('error')
- if error is not None and '因版权原因无法观看此视频' in error:
+ basic_data_url += '&pwd=%s' % video_password
+
+ data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
+
+ error = data.get('error')
+ if error:
+ error_note = error.get('note')
+ if error_note is not None and '因版权原因无法观看此视频' in error_note:
raise ExtractorError(
'Youku said: Sorry, this video is available in China only', expected=True)
else:
- msg = 'Youku server reported error %i' % error_code
- if error is not None:
- msg += ': ' + error
+ msg = 'Youku server reported error %i' % error.get('code')
+ if error_note is not None:
+ msg += ': ' + error_note
raise ExtractorError(msg)
- title = data1['title']
+ # get video title
+ title = data['video']['title']
# generate video_urls_dict
- video_urls_dict = self.construct_video_urls(data1, data2)
+ video_urls_dict = self.construct_video_urls(data)
# construct info
entries = [{
@@ -235,10 +238,11 @@ class YoukuIE(InfoExtractor):
'formats': [],
# some formats are not available for all parts, we have to detect
# which one has all
- } for i in range(max(len(v) for v in data1['segs'].values()))]
- for fm in data1['streamtypes']:
+ } for i in range(max(len(v.get('segs')) for v in data['stream']))]
+ for stream in data['stream']:
+ fm = stream.get('stream_type')
video_urls = video_urls_dict[fm]
- for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
+ for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
entry['formats'].append({
'url': video_url,
'format_id': self.get_format_name(fm),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9b39505ba..4aac2cc03 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -26,6 +26,7 @@ from ..compat import (
from ..utils import (
clean_html,
encode_dict,
+ error_to_compat_str,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -33,6 +34,7 @@ from ..utils import (
int_or_none,
orderedSet,
parse_duration,
+ remove_quotes,
remove_start,
sanitized_Request,
smuggle_url,
@@ -395,12 +397,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+ 'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
+ 'creator': 'Icona Pop',
}
},
{
@@ -411,9 +415,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
+ 'creator': 'Justin Timberlake',
'age_limit': 18,
}
},
@@ -492,10 +498,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM',
'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off',
+ 'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
+ 'creator': 'Taylor Swift',
},
'params': {
'youtube_include_dash_manifest': True,
@@ -551,9 +559,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
+ 'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'Some Chords',
},
'expected_warnings': [
'DASH manifest missing',
@@ -701,10 +711,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf',
+ 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
},
'params': {
'skip_download': True,
@@ -892,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False)
except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
+ self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
sub_lang_list = {}
@@ -1308,6 +1320,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
+ m_music = re.search(
+ r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
+ video_webpage)
+ if m_music:
+ video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+ video_creator = clean_html(m_music.group('creator'))
+ else:
+ video_alt_title = video_creator = None
+
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
@@ -1537,7 +1558,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
+ 'creator': video_creator,
'title': video_title,
+ 'alt_title': video_alt_title,
'thumbnail': video_thumbnail,
'description': video_description,
'categories': video_categories,
@@ -1752,6 +1775,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
},
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
+
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -1825,10 +1852,10 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url)
-class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com user playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
- IE_NAME = 'youtube:user:playlists'
+class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
+ IE_DESC = 'YouTube.com user/channel playlists'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
+ IE_NAME = 'youtube:playlists'
_TESTS = [{
'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
@@ -1845,6 +1872,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'id': 'igorkle1',
'title': 'Игорь Клейнер',
},
+ }, {
+ 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
+ 'playlist_mincount': 17,
+ 'info_dict': {
+ 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
+ 'title': 'Chem Player',
+ },
}]
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 2191e8b89..a7440c582 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
- (?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+ (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 5ed723bc6..daca5d814 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -52,7 +52,7 @@ class FFmpegPostProcessor(PostProcessor):
def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
+ prefer_ffmpeg = False
self.basename = None
self.probe_basename = None
@@ -60,6 +60,7 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None
self._versions = None
if self._downloader:
+ prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
location = self._downloader.params.get('ffmpeg_location')
if location is not None:
if not os.path.exists(location):
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index 074eb64a7..995b8ed96 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -9,7 +9,7 @@ import subprocess
import sys
from zipimport import zipimporter
-from .compat import compat_str
+from .utils import encode_compat_str
from .version import __version__
@@ -61,7 +61,7 @@ def update_self(to_screen, verbose, opener):
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
except Exception:
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
return
if newversion == __version__:
@@ -74,7 +74,7 @@ def update_self(to_screen, verbose, opener):
versions_info = json.loads(versions_info)
except Exception:
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if 'signature' not in versions_info:
@@ -123,7 +123,7 @@ def update_self(to_screen, verbose, opener):
urlh.close()
except (IOError, OSError):
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@@ -137,7 +137,7 @@ def update_self(to_screen, verbose, opener):
outf.write(newcontent)
except (IOError, OSError):
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to write the new version')
return
@@ -157,7 +157,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
return # Do not show premature success messages
except (IOError, OSError):
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return
@@ -169,7 +169,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh.close()
except (IOError, OSError):
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@@ -183,7 +183,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
outf.write(newcontent)
except (IOError, OSError):
if verbose:
- to_screen(compat_str(traceback.format_exc()))
+ to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d0606b4bc..1737ac5f6 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1406,6 +1406,15 @@ def remove_end(s, end):
return s
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
+ return s
+
+
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip('/').split('/')[-1]
@@ -1703,6 +1712,10 @@ def encode_dict(d, encoding='utf-8'):
return dict((encode(k), encode(v)) for k, v in d.items())
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+
+
US_RATINGS = {
'G': 0,
'PG': 10,
@@ -1797,6 +1810,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args)
+def error_to_compat_str(err):
+ err_str = str(err)
+ # On python 2 error byte string must be decoded with proper
+ # encoding rather than ascii
+ if sys.version_info[0] < 3:
+ err_str = err_str.decode(preferredencoding())
+ return err_str
+
+
def mimetype2ext(mt):
_, _, res = mt.rpartition('/')
@@ -1967,15 +1989,15 @@ def match_filter_func(filter_str):
def parse_dfxp_time_expr(time_expr):
if not time_expr:
- return 0.0
+ return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))
- mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
if mobj:
- return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
def srt_subtitles_timecode(seconds):
@@ -2011,10 +2033,15 @@ def dfxp2srt(dfxp_data):
raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
- begin_time = parse_dfxp_time_expr(para.attrib['begin'])
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
if not end_time:
- end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
+ if not dur:
+ continue
+ end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index bd0de9f53..01607693e 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.11.27.1'
+__version__ = '2015.12.18'