aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py4
-rw-r--r--youtube_dl/compat.py2
-rw-r--r--youtube_dl/downloader/common.py4
-rw-r--r--youtube_dl/downloader/http.py2
-rw-r--r--youtube_dl/downloader/rtmp.py2
-rw-r--r--youtube_dl/extractor/__init__.py9
-rw-r--r--youtube_dl/extractor/cnn.py5
-rw-r--r--youtube_dl/extractor/dailymotion.py8
-rw-r--r--youtube_dl/extractor/dhm.py73
-rw-r--r--youtube_dl/extractor/douyutv.py57
-rw-r--r--youtube_dl/extractor/dumpert.py56
-rw-r--r--youtube_dl/extractor/eroprofile.py52
-rw-r--r--youtube_dl/extractor/generic.py32
-rw-r--r--youtube_dl/extractor/nbc.py53
-rw-r--r--youtube_dl/extractor/phoenix.py40
-rw-r--r--youtube_dl/extractor/pornhub.py17
-rw-r--r--youtube_dl/extractor/safari.py157
-rw-r--r--youtube_dl/extractor/slideshare.py2
-rw-r--r--youtube_dl/extractor/soundcloud.py4
-rw-r--r--youtube_dl/extractor/teamcoco.py2
-rw-r--r--youtube_dl/extractor/theplatform.py2
-rw-r--r--youtube_dl/extractor/ultimedia.py5
-rw-r--r--youtube_dl/extractor/vessel.py127
-rw-r--r--youtube_dl/extractor/vimeo.py10
-rw-r--r--youtube_dl/extractor/xuite.py14
-rw-r--r--youtube_dl/extractor/yahoo.py15
-rw-r--r--youtube_dl/extractor/youporn.py2
-rw-r--r--youtube_dl/extractor/youtube.py60
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py15
-rw-r--r--youtube_dl/update.py4
-rw-r--r--youtube_dl/utils.py6
-rw-r--r--youtube_dl/version.py2
32 files changed, 743 insertions, 100 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index b5ef5e009..640b8c99d 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1701,10 +1701,10 @@ class YoutubeDL(object):
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index b2bf149ef..973bcd320 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -389,7 +389,7 @@ else:
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
- except:
+ except Exception:
pass
return _terminal_size(columns, lines)
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 8ed5c19a6..a0fc5ead0 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -204,7 +204,7 @@ class FileDownloader(object):
return
try:
os.utime(filename, (time.time(), filetime))
- except:
+ except Exception:
pass
return filetime
@@ -318,7 +318,7 @@ class FileDownloader(object):
)
continuedl_and_exists = (
- self.params.get('continuedl', False) and
+ self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 4047d7167..d136bebd1 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
open_mode = 'wb'
if resume_len != 0:
- if self.params.get('continuedl', False):
+ if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 89e98ae61..ddf5724ae 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
protocol = info_dict.get('rtmp_protocol', None)
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
- continue_dl = info_dict.get('continuedl', False)
+ continue_dl = info_dict.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3011b784d..f3ebe4ee2 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -106,6 +106,7 @@ from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
+from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
@@ -114,6 +115,7 @@ from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
+from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
@@ -309,6 +311,8 @@ from .nba import NBAIE
from .nbc import (
NBCIE,
NBCNewsIE,
+ NBCSportsIE,
+ NBCSportsVPlayerIE,
)
from .ndr import NDRIE
from .ndtv import NDTVIE
@@ -420,6 +424,10 @@ from .rutube import (
)
from .rutv import RUTVIE
from .sandia import SandiaIE
+from .safari import (
+ SafariIE,
+ SafariCourseIE,
+)
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
@@ -552,6 +560,7 @@ from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 90ea07438..0a77e951c 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,9 @@ class CNNIE(InfoExtractor):
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 4f67c3aac..47d58330b 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- request.add_header('Cookie', 'ff=off')
+ request.add_header('Cookie', 'family_filter=off; ff=off')
return request
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ embed_request = self._build_request(embed_url)
+ embed_page = self._download_webpage(
+ embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
new file mode 100644
index 000000000..3ed1f1663
--- /dev/null
+++ b/youtube_dl/extractor/dhm.py
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(
+ './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+ video_url = xpath_text(
+ track, './{http://xspf.org/ns/0/}location',
+ 'video url', fatal=True)
+ thumbnail = xpath_text(
+ track, './{http://xspf.org/ns/0/}image',
+ 'thumbnail')
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
index d7956e6e4..479430c51 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -1,19 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
+import time
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
- 'id': 'iseven',
+ 'id': '17732',
+ 'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'md5:9e525642c25a0a24302869937cf69d17',
+ 'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
@@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor):
'params': {
'skip_download': True,
}
- }
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'uploader_id': '3769985',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+ prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+ room_id, int(time.time()))
+
+ auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
config = self._download_json(
- 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+ 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+ video_id)
data = config['data']
error_code = config.get('error', 0)
- show_status = data.get('show_status')
if error_code is not 0:
- raise ExtractorError(
- 'Server reported error %i' % error_code, expected=True)
+ error_desc = 'Server reported error %i' % error_code
+ if isinstance(data, (compat_str, compat_basestring)):
+ error_desc += ': ' + data
+ raise ExtractorError(error_desc, expected=True)
+ show_status = data.get('show_status')
# 1 = live, 2 = offline
if show_status == '2':
raise ExtractorError(
@@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor):
base_url = data['rtmp_url']
live_path = data['rtmp_live']
- title = self._live_title(data['room_name'])
+ title = self._live_title(unescapeHTML(data['room_name']))
description = data.get('show_details')
thumbnail = data.get('room_src')
@@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor):
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': room_id,
+ 'display_id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
new file mode 100644
index 000000000..e43bc81b2
--- /dev/null
+++ b/youtube_dl/extractor/dumpert.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ files_base64 = self._search_regex(
+ r'data-files="([^"]+)"', webpage, 'data files')
+
+ files = self._parse_json(
+ base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+ video_id)
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ } for format_id, video_url in files.items() if format_id != 'still']
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage) or self._og_search_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage) or self._og_search_description(webpage)
+ thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
index 79e2fbd39..0cbca90b0 100644
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dl/extractor/eroprofile.py
@@ -1,11 +1,17 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
- _TEST = {
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse.urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ raise ExtractorError(
+ 'This video requires login. Please specify a username and password and try again.', expected=True)
+
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 8a49b0b54..2ff002643 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -29,6 +29,7 @@ from ..utils import (
xpath_text,
)
from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
@@ -620,6 +621,16 @@ class GenericIE(InfoExtractor):
'age_limit': 0,
},
},
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ },
+ },
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -629,6 +640,16 @@ class GenericIE(InfoExtractor):
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ },
}
]
@@ -1236,6 +1257,17 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 3645d3033..ecd0ac8b1 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -14,7 +14,7 @@ from ..utils import (
class NBCIE(InfoExtractor):
- _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+ _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
return self.url_result(theplatform_url)
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage)
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
index a20672c0c..46cebc0d7 100644
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
class PhoenixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- }
+ _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+ (?:
+ phoenix/die_sendungen/(?:[^/]+/)?
+ )?
+ (?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.phoenix.de/content/884301',
+ 'md5': 'ed249f045256150c92e72dbb70eadec6',
+ 'info_dict': {
+ 'id': '884301',
+ 'ext': 'mp4',
+ 'title': 'Michael Krons mit Hans-Werner Sinn',
+ 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+ 'upload_date': '20141025',
+ 'uploader': 'Im Dialog',
+ }
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3a27e3789..0c8b731cf 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
}
def _extract_count(self, pattern, webpage, name):
- count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
- if count:
- count = str_to_int(count)
- return count
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
- view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
- r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
new file mode 100644
index 000000000..10251f29e
--- /dev/null
+++ b/youtube_dl/extractor/safari.py
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ # We only need to log in once for courses or individual videos
+ if not self.LOGGED_IN:
+ self._login()
+ SafariBaseIE.LOGGED_IN = True
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(
+ self._ACCOUNT_CREDENTIALS_HINT,
+ expected=True)
+
+ headers = std_headers
+ if 'Referer' not in headers:
+ headers['Referer'] = self._LOGIN_URL
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ 'Downloading login form')
+
+ csrf = self._html_search_regex(
+ r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+ login_page, 'csrf token')
+
+ login_form = {
+ 'csrfmiddlewaretoken': csrf,
+ 'email': username,
+ 'password1': password,
+ 'login': 'Sign In',
+ 'next': '',
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError(
+ 'Login failed; make sure your credentials are correct and try again.',
+ expected=True)
+
+ self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book
+ )/
+ (?P<course_id>\d+)/
+ (?:chapter(?:-content)?/)?
+ (?P<part>part\d+)\.html
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+ 'info_dict': {
+ 'id': '2842601850001',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ },
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ part = mobj.group('part')
+
+ webpage = self._download_webpage(
+ '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+ part)
+
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if not bc_url:
+ raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, 'Safari')
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py
index 9f79ff5c1..0b717a1e4 100644
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+ r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 9d4505972..316b2c90f 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
- _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 7cb06f351..a46a7ecba 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -54,7 +54,7 @@ class TeamcocoIE(InfoExtractor):
embed_url, video_id, 'Downloading embed page')
player_data = self._parse_json(self._search_regex(
- r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
+ r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id)
data = self._parse_json(
base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index feac666f7..0e3e627f4 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == 'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
index 06554a1be..96c809eaf 100644
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
deliver_url = self._search_regex(
@@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor):
title = clean_html((
self._html_search_regex(
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
- webpage, 'title', default=None)
- or self._search_regex(
+ webpage, 'title', default=None) or
+ self._search_regex(
r"var\s+nameVideo\s*=\s*'([^']+)'",
deliver_page, 'title')))
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
new file mode 100644
index 000000000..6215f0642
--- /dev/null
+++ b/youtube_dl/extractor/vessel.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+ _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+ _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+ _NETRC_MACHINE = 'vessel'
+ _TEST = {
+ 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+ 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+ 'info_dict': {
+ 'id': 'HDN7G5UMs',
+ 'ext': 'mp4',
+ 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150317',
+ 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+ 'timestamp': int,
+ },
+ }
+
+ @staticmethod
+ def make_json_request(url, data):
+ payload = json.dumps(data).encode('utf-8')
+ req = compat_urllib_request.Request(url, payload)
+ req.add_header('Content-Type', 'application/json; charset=utf-8')
+ return req
+
+ @staticmethod
+ def find_assets(data, asset_type):
+ for asset in data.get('assets', []):
+ if asset.get('type') == asset_type:
+ yield asset
+
+ def _check_access_rights(self, data):
+ access_info = data.get('__view', {})
+ if not access_info.get('allow_access', True):
+ err_code = access_info.get('error_code') or ''
+ if err_code == 'ITEM_PAID_ONLY':
+ raise ExtractorError(
+ 'This video requires subscription.', expected=True)
+ else:
+ raise ExtractorError(
+ 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ data = {
+ 'client_id': 'web',
+ 'type': 'password',
+ 'user_key': username,
+ 'password': password,
+ }
+ login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+ self._download_webpage(login_request, None, False, 'Wrong login info')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ data = self._parse_json(self._search_regex(
+ r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+ asset_id = data['model']['data']['id']
+
+ req = VesselIE.make_json_request(
+ self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+ data = self._download_json(req, video_id)
+
+ self._check_access_rights(data)
+
+ try:
+ video_asset = next(VesselIE.find_assets(data, 'video'))
+ except StopIteration:
+ raise ExtractorError('No video assets found')
+
+ formats = []
+ for f in video_asset.get('sources', []):
+ if f['name'] == 'hls-index':
+ formats.extend(self._extract_m3u8_formats(
+ f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'format_id': f['name'],
+ 'tbr': f.get('bitrate'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': f['location'],
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for im_asset in VesselIE.find_assets(data, 'image'):
+ thumbnails.append({
+ 'url': im_asset['location'],
+ 'width': im_asset.get('width', 0),
+ 'height': im_asset.get('height', 0),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': data.get('short_description'),
+ 'duration': data.get('duration'),
+ 'comment_count': data.get('comment_count'),
+ 'like_count': data.get('like_count'),
+ 'view_count': data.get('view_count'),
+ 'timestamp': parse_iso8601(data.get('released_at')),
+ }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index bd09652cd..28bcc89cd 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*({.+?});', webpage,
+ 'vimeo config', default=None)
+ if vimeo_config:
+ seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
# Extract the config JSON
try:
try:
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index 4971965f9..81d885fdc 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def base64_decode_utf8(data):
+ return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+ @staticmethod
+ def base64_encode_utf8(data):
+ return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
def _extract_flv_config(self, media_id):
- base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+ base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
- prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+ prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
- encoded_content = base64.b64decode(prop.text).decode('utf-8')
+ encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 97dbac4cc..b777159c5 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,6 +17,8 @@ from ..utils import (
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
}
]
@@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):
items = json.loads(items_json)
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index e4c855ee0..6abe72f73 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor):
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
- except:
+ except ValueError:
raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 27c8c4453..5488101e1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1263,27 +1263,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, title)
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
- if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
+ def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
@@ -1327,6 +1307,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ return self._extract_playlist(playlist_id)
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
@@ -1643,21 +1646,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:recommended'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
- _FEED_NAME = 'watch_later'
- _PLAYLIST_TITLE = 'Youtube Watch Later'
- _PERSONAL_FEED = True
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:history'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index b6f51cfd5..55adf9685 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import io
import os
import subprocess
-import sys
import time
@@ -269,19 +268,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
else:
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
- except:
- etype, e, tb = sys.exc_info()
- if isinstance(e, AudioConversionError):
- msg = 'audio conversion failed: ' + e.msg
- else:
- msg = 'error running ' + self.basename
- raise PostProcessingError(msg)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
- except:
+ except Exception:
self._downloader.report_warning('Cannot update utime of audio file')
information['filepath'] = new_path
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index d8be4049f..de3169eef 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -65,7 +65,7 @@ def update_self(to_screen, verbose):
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
@@ -78,7 +78,7 @@ def update_self(to_screen, verbose):
try:
versions_info = opener.open(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 472d4df41..90e0ed9ab 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -75,7 +75,7 @@ def preferredencoding():
try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
@@ -127,7 +127,7 @@ def write_json_file(obj, fn):
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
@@ -348,7 +348,7 @@ def _htmlentity_transform(entity):
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 039ceadf2..dd93e295a 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.03.24'
+__version__ = '2015.03.28'