aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/FileDownloader.py89
-rw-r--r--youtube_dl/YoutubeDL.py106
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/extractor/__init__.py11
-rw-r--r--youtube_dl/extractor/addanime.py2
-rw-r--r--youtube_dl/extractor/appletrailers.py2
-rw-r--r--youtube_dl/extractor/archiveorg.py2
-rw-r--r--youtube_dl/extractor/arte.py39
-rw-r--r--youtube_dl/extractor/auengine.py2
-rw-r--r--youtube_dl/extractor/bambuser.py2
-rw-r--r--youtube_dl/extractor/bliptv.py39
-rw-r--r--youtube_dl/extractor/bloomberg.py2
-rw-r--r--youtube_dl/extractor/comedycentral.py2
-rw-r--r--youtube_dl/extractor/common.py33
-rw-r--r--youtube_dl/extractor/cspan.py2
-rw-r--r--youtube_dl/extractor/dailymotion.py5
-rw-r--r--youtube_dl/extractor/dreisat.py2
-rw-r--r--youtube_dl/extractor/eighttracks.py2
-rw-r--r--youtube_dl/extractor/exfm.py2
-rw-r--r--youtube_dl/extractor/faz.py2
-rw-r--r--youtube_dl/extractor/fktv.py4
-rw-r--r--youtube_dl/extractor/francetv.py106
-rw-r--r--youtube_dl/extractor/gamekings.py2
-rw-r--r--youtube_dl/extractor/gametrailers.py3
-rw-r--r--youtube_dl/extractor/generic.py31
-rw-r--r--youtube_dl/extractor/hotnewhiphop.py2
-rw-r--r--youtube_dl/extractor/ign.py2
-rw-r--r--youtube_dl/extractor/imdb.py4
-rw-r--r--youtube_dl/extractor/instagram.py2
-rw-r--r--youtube_dl/extractor/jukebox.py2
-rw-r--r--youtube_dl/extractor/liveleak.py2
-rw-r--r--youtube_dl/extractor/livestream.py2
-rw-r--r--youtube_dl/extractor/metacafe.py48
-rw-r--r--youtube_dl/extractor/mixcloud.py11
-rw-r--r--youtube_dl/extractor/muzu.py2
-rw-r--r--youtube_dl/extractor/myspass.py2
-rw-r--r--youtube_dl/extractor/ninegag.py43
-rw-r--r--youtube_dl/extractor/orf.py2
-rw-r--r--youtube_dl/extractor/pbs.py2
-rw-r--r--youtube_dl/extractor/pyvideo.py51
-rw-r--r--youtube_dl/extractor/rutube.py2
-rw-r--r--youtube_dl/extractor/slashdot.py2
-rw-r--r--youtube_dl/extractor/smotri.py108
-rw-r--r--youtube_dl/extractor/soundcloud.py4
-rw-r--r--youtube_dl/extractor/space.py2
-rw-r--r--youtube_dl/extractor/stanfordoc.py22
-rw-r--r--youtube_dl/extractor/tf1.py2
-rw-r--r--youtube_dl/extractor/theplatform.py68
-rw-r--r--youtube_dl/extractor/unistra.py2
-rw-r--r--youtube_dl/extractor/veehd.py2
-rw-r--r--youtube_dl/extractor/vevo.py4
-rw-r--r--youtube_dl/extractor/vice.py2
-rw-r--r--youtube_dl/extractor/viddler.py5
-rw-r--r--youtube_dl/extractor/videofyme.py2
-rw-r--r--youtube_dl/extractor/vimeo.py56
-rw-r--r--youtube_dl/extractor/wat.py2
-rw-r--r--youtube_dl/extractor/wimp.py23
-rw-r--r--youtube_dl/extractor/wistia.py55
-rw-r--r--youtube_dl/extractor/xhamster.py4
-rw-r--r--youtube_dl/extractor/yahoo.py2
-rw-r--r--youtube_dl/extractor/youjizz.py2
-rw-r--r--youtube_dl/extractor/youtube.py173
-rw-r--r--youtube_dl/utils.py14
-rw-r--r--youtube_dl/version.py2
64 files changed, 938 insertions, 295 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 3ff9716b3..47124932f 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -204,11 +204,27 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
+ def _report_progress_status(self, msg, is_last_line=False):
+ fullmsg = u'[download] ' + msg
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(fullmsg)
+ else:
+ if os.name == 'nt':
+ prev_len = getattr(self, '_report_progress_prev_line_length',
+ 0)
+ if prev_len > len(fullmsg):
+ fullmsg += u' ' * (prev_len - len(fullmsg))
+ self._report_progress_prev_line_length = len(fullmsg)
+ clear_line = u'\r'
+ else:
+ clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
+ self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
+ self.to_console_title(u'youtube-dl ' + msg)
+
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
- clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
if eta is not None:
eta_str = self.format_eta(eta)
else:
@@ -218,14 +234,29 @@ class FileDownloader(object):
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
- if self.params.get('progress_with_newline', False):
- self.to_screen(u'[download] %s of %s at %s ETA %s' %
- (percent_str, data_len_str, speed_str, eta_str))
+
+ msg = (u'%s of %s at %s ETA %s' %
+ (percent_str, data_len_str, speed_str, eta_str))
+ self._report_progress_status(msg)
+
+ def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
+ if self.params.get('noprogress', False):
+ return
+ downloaded_str = format_bytes(downloaded_data_len)
+ speed_str = self.format_speed(speed)
+ elapsed_str = FileDownloader.format_seconds(elapsed)
+ msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
+ self._report_progress_status(msg)
+
+ def report_finish(self, data_len_str, tot_time):
+ """Report download finished."""
+ if self.params.get('noprogress', False):
+ self.to_screen(u'[download] Download completed')
else:
- self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
- (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
- self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
- (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
+ self._report_progress_status(
+ (u'100%% of %s in %s' %
+ (data_len_str, self.format_seconds(tot_time))),
+ is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -246,16 +277,7 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
- def report_finish(self, data_len_str, tot_time):
- """Report download finished."""
- if self.params.get('noprogress', False):
- self.to_screen(u'[download] Download completed')
- else:
- clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
- self.to_screen(u'\r%s[download] 100%% of %s in %s' %
- (clear_line, data_len_str, self.format_seconds(tot_time)))
-
- def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
+ def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
@@ -301,11 +323,27 @@ class FileDownloader(object):
'eta': eta,
'speed': speed,
})
- elif self.params.get('verbose', False):
- if not cursor_in_new_line:
- self.to_screen(u'')
- cursor_in_new_line = True
- self.to_screen(u'[rtmpdump] '+line)
+ else:
+ # no percent for live streams
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1))*1024)
+ time_now = time.time()
+ speed = self.calc_speed(start, time_now, downloaded_data_len)
+ self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
+ cursor_in_new_line = False
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ 'speed': speed,
+ })
+ elif self.params.get('verbose', False):
+ if not cursor_in_new_line:
+ self.to_screen(u'')
+ cursor_in_new_line = True
+ self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
@@ -338,6 +376,8 @@ class FileDownloader(object):
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
+ if conn:
+ basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
@@ -479,7 +519,8 @@ class FileDownloader(object):
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None),
- info_dict.get('rtmp_live', False))
+ info_dict.get('rtmp_live', False),
+ info_dict.get('rtmp_conn', None))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 77339dddf..79d5c7e5e 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -22,7 +22,6 @@ if os.name == 'nt':
from .utils import (
compat_cookiejar,
compat_http_client,
- compat_print,
compat_str,
compat_urllib_error,
compat_urllib_request,
@@ -133,6 +132,8 @@ class YoutubeDL(object):
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
+ bidi_workaround: Work around buggy terminals without bidirectional text
+ support, using fridibi
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -156,8 +157,45 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+ self._err_file = sys.stderr
self.params = {} if params is None else params
+ # Pipe messsages through fribidi
+ if params.get('bidi_workaround', False):
+ # fribidi does not support ungetting, so force newlines
+ params['progress_with_newline'] = True
+
+ for fid in ['_screen_file', '_err_file']:
+ class FribidiOut(object):
+ def __init__(self, outfile, errfile):
+ self.outfile = outfile
+ self.process = subprocess.Popen(
+ ['fribidi'],
+ stdin=subprocess.PIPE,
+ stdout=outfile,
+ stderr=errfile)
+
+ def write(self, s):
+ res = self.process.stdin.write(s)
+ self.flush()
+ return res
+
+ def flush(self):
+ return self.process.stdin.flush()
+
+ def isatty(self):
+ return self.outfile.isatty()
+
+ try:
+ vout = FribidiOut(getattr(self, fid), self._err_file)
+ setattr(self, fid, vout)
+ except OSError as ose:
+ if ose.errno == 2:
+ self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ break
+ else:
+ raise
+
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
and not params['restrictfilenames']):
@@ -207,9 +245,13 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
+ return self.to_stdout(message, skip_eol, check_quiet=True)
+
+ def to_stdout(self, message, skip_eol=False, check_quiet=False):
+ """Print message to stdout if not in quiet mode."""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not self.params.get('quiet', False):
+ elif not check_quiet or not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
@@ -221,9 +263,7 @@ class YoutubeDL(object):
self.params['logger'].error(message)
else:
output = message + u'\n'
- if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
- output = output.encode(preferredencoding())
- sys.stderr.write(output)
+ write_string(output, self._err_file)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -294,7 +334,7 @@ class YoutubeDL(object):
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
- if sys.stderr.isatty() and os.name != 'nt':
+ if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
@@ -306,7 +346,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- if sys.stderr.isatty() and os.name != 'nt':
+ if self._err_file.isatty() and os.name != 'nt':
_msg_header = u'\033[0;31mERROR:\033[0m'
else:
_msg_header = u'ERROR:'
@@ -405,7 +445,8 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ process=True):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
@@ -441,7 +482,10 @@ class YoutubeDL(object):
'webpage_url': url,
'extractor_key': ie.ie_key(),
})
- return self.process_ie_result(ie_result, download, extra_info)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -474,8 +518,33 @@ class YoutubeDL(object):
download,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ def make_result(embedded_info):
+ new_result = ie_result.copy()
+ for f in ('_type', 'url', 'ext', 'player_url', 'formats',
+ 'entries', 'urlhandle', 'ie_key', 'duration',
+ 'subtitles', 'annotations', 'format',
+ 'thumbnail', 'thumbnails'):
+ if f in new_result:
+ del new_result[f]
+ if f in embedded_info:
+ new_result[f] = embedded_info[f]
+ return new_result
+ new_result = make_result(info)
+
+ assert new_result.get('_type') != 'url_transparent'
+ if new_result.get('_type') == 'compat_list':
+ new_result['entries'] = [
+ make_result(e) for e in new_result['entries']]
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
-
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -666,22 +735,23 @@ class YoutubeDL(object):
# Forced printings
if self.params.get('forcetitle', False):
- compat_print(info_dict['fulltitle'])
+ self.to_stdout(info_dict['fulltitle'])
if self.params.get('forceid', False):
- compat_print(info_dict['id'])
+ self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
- compat_print(info_dict['url'] + info_dict.get('play_path', u''))
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
- compat_print(info_dict['thumbnail'])
+ self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
- compat_print(info_dict['description'])
+ self.to_stdout(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
- compat_print(filename)
+ self.to_stdout(filename)
if self.params.get('forceformat', False):
- compat_print(info_dict['format'])
+ self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
- compat_print(json.dumps(info_dict))
+ info_dict['_filename'] = filename
+ self.to_stdout(json.dumps(info_dict))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index b0d9a6763..6e9dd68c4 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -204,6 +204,9 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=optparse.SUPPRESS_HELP)
+ general.add_option(
+ '--bidi-workaround', dest='bidi_workaround', action='store_true',
+ help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
selection.add_option('--playlist-start',
@@ -687,6 +690,7 @@ def _real_main(argv=None):
'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
+ 'bidi_workaround': opts.bidi_workaround,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bd996483b..3f740baa1 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -8,6 +8,7 @@ from .arte import (
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVFutureIE,
+ ArteTVDDCIE,
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
@@ -56,7 +57,7 @@ from .flickr import FlickrIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
- France2IE,
+ FranceTVIE,
GenerationQuoiIE
)
from .freesound import FreesoundIE
@@ -102,6 +103,7 @@ from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
+from .ninegag import NineGagIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
@@ -110,6 +112,7 @@ from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
+from .pyvideo import PyvideoIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
@@ -125,6 +128,7 @@ from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
+ SmotriBroadcastIE,
)
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
@@ -144,6 +148,7 @@ from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tf1 import TF1IE
+from .theplatform import ThePlatformIE
from .thisav import ThisAVIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
@@ -168,6 +173,8 @@ from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
+ VimeoAlbumIE,
+ VimeoGroupsIE,
)
from .vine import VineIE
from .viki import VikiIE
@@ -176,6 +183,7 @@ from .wat import WatIE
from .websurg import WeBSurgIE
from .weibo import WeiboIE
from .wimp import WimpIE
+from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
@@ -203,6 +211,7 @@ from .youtube import (
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
+ YoutubeTopListIE,
)
from .zdf import ZDFIE
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index b99d4b966..a3a1b999d 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -13,7 +13,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+ _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 4befff394..a527f10de 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -10,7 +10,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 3ae0aebb1..8bb546410 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -11,7 +11,7 @@ from ..utils import (
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos'
- _VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = {
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 8b62ee774..4b7bef775 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -10,6 +10,7 @@ from ..utils import (
determine_ext,
get_element_by_id,
compat_str,
+ get_element_by_attribute,
)
# There are different sources of video in arte.tv, the extraction process
@@ -17,8 +18,8 @@ from ..utils import (
# add tests.
class ArteTvIE(InfoExtractor):
- _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
- _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
+ _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
+ _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
@@ -142,7 +143,9 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+ return self._extract_from_json_url(json_url, video_id, lang)
+ def _extract_from_json_url(self, json_url, video_id, lang):
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
@@ -257,3 +260,35 @@ class ArteTVFutureIE(ArteTVPlus7IE):
webpage = self._download_webpage(url, anchor_id)
row = get_element_by_id(anchor_id, webpage)
return self._extract_from_webpage(row, anchor_id, lang)
+
+
+class ArteTVDDCIE(ArteTVPlus7IE):
+ IE_NAME = u'arte.tv:ddc'
+ _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
+
+ _TEST = {
+ u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
+ u'file': u'049881-009_PLUS7-D.flv',
+ u'info_dict': {
+ u'title': u'Mit offenen Karten',
+ u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
+ u'upload_date': u'20131207',
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id, lang = self._extract_url_info(url)
+ if lang == 'folge':
+ lang = 'de'
+ elif lang == 'emission':
+ lang = 'fr'
+ webpage = self._download_webpage(url, video_id)
+ scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
+ script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
+ javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
+ json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
+ return self._extract_from_json_url(json_url, video_id, lang)
diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py
index 95c038003..bcccc0b7a 100644
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
}
}
- _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
+ _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py
index b80508efe..d48c0c38d 100644
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@@ -54,7 +54,7 @@ class BambuserIE(InfoExtractor):
class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel'
- _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+ _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 493504f75..5e33a69df 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'):
- request = compat_urllib_request.Request(url)
- response = compat_urllib_request.urlopen(request)
+ response = self._request_webpage(url, None, False)
redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl)
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
- try:
- urlh = compat_urllib_request.urlopen(request)
- if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
- basename = url.split('/')[-1]
- title,ext = os.path.splitext(basename)
- title = title.decode('UTF-8')
- ext = ext.replace('.', '')
- self.report_direct_download(title)
- info = {
- 'id': title,
- 'url': url,
- 'uploader': None,
- 'upload_date': None,
- 'title': title,
- 'ext': ext,
- 'urlhandle': urlh
- }
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
+ urlh = self._request_webpage(request, None, False,
+ u'unable to download video info webpage')
+ if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
+ basename = url.split('/')[-1]
+ title,ext = os.path.splitext(basename)
+ title = title.decode('UTF-8')
+ ext = ext.replace('.', '')
+ self.report_direct_download(title)
+ info = {
+ 'id': title,
+ 'url': url,
+ 'uploader': None,
+ 'upload_date': None,
+ 'title': title,
+ 'ext': ext,
+ 'urlhandle': urlh
+ }
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 3666a780b..755d9c9ef 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
- _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
+ _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_TEST = {
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 53579aa27..a54ce3ee7 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -12,7 +12,7 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+ _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1b049082d..534908a2b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -55,6 +55,9 @@ class InfoExtractor(object):
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
@@ -151,27 +154,38 @@ class InfoExtractor(object):
def IE_NAME(self):
return type(self).__name__[:-2]
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the response handle """
if note is None:
self.report_download_webpage(video_id)
elif note is not False:
- self.to_screen(u'%s: %s' % (video_id, note))
+ if video_id is None:
+ self.to_screen(u'%s' % (note,))
+ else:
+ self.to_screen(u'%s: %s' % (video_id, note))
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
- raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
+ errmsg = u'%s: %s' % (errnote, compat_str(err))
+ if fatal:
+ raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ else:
+ self._downloader.report_warning(errmsg)
+ return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
- urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
+ if urlh is False:
+ assert not fatal
+ return False
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -206,9 +220,14 @@ class InfoExtractor(object):
content = webpage_bytes.decode(encoding, 'replace')
return (content, urlh)
- def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """
- return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
+ res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ if res is False:
+ return res
+ else:
+ content, _ = res
+ return content
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML'):
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index 7bf03c584..d5730684d 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -6,7 +6,7 @@ from ..utils import (
)
class CSpanIE(InfoExtractor):
- _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
+ _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
_TEST = {
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
u'file': u'315139.flv',
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 71f5e03ee..3bd0b862c 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -11,6 +11,7 @@ from ..utils import (
get_element_by_attribute,
get_element_by_id,
orderedSet,
+ str_to_int,
ExtractorError,
)
@@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self._list_available_subtitles(video_id, webpage)
return
+ view_count = str_to_int(self._search_regex(
+ r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
+
return {
'id': video_id,
'formats': formats,
@@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
+ 'view_count': view_count,
}
def _get_available_subtitles(self, video_id, webpage):
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 24ce79425..cb7226f82 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -11,7 +11,7 @@ from ..utils import (
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
- _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+ _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm',
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py
index f21ef8853..88f5526b8 100644
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -10,7 +10,7 @@ from ..utils import (
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
- _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+ _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = {
u"name": u"EightTracks",
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py
index a51d79b08..682901d16 100644
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
IE_NAME = u'exfm'
IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
- _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+ _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
_TESTS = [
{
u'url': u'http://ex.fm/song/eh359',
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index d0dfde694..c6ab6952e 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -9,7 +9,7 @@ from ..utils import (
class FazIE(InfoExtractor):
IE_NAME = u'faz.net'
- _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
+ _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
_TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index dba1a8dc2..d7048c8c1 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -12,7 +12,7 @@ from ..utils import (
class FKTVIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv'
- _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+ _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
_TEST = {
u'url': u'http://fernsehkritik.tv/folge-1',
@@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):
class FKTVPosteckeIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv:postecke'
- _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
+ _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = {
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
u'file': u'0120.flv',
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 6e1971043..ad85bc16d 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -21,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
thumbnail_path = info.find('image').text
return {'id': video_id,
- 'ext': 'mp4',
+ 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
@@ -45,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr'
- _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
+ _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -66,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self._extract_video(video_id)
-class France2IE(FranceTVBaseInfoExtractor):
- IE_NAME = u'france2.fr'
- _VALID_URL = r'''(?x)https?://www\.france2\.fr/
+class FranceTVIE(FranceTVBaseInfoExtractor):
+ IE_NAME = u'francetv'
+ IE_DESC = u'France 2, 3, 4, 5 and Ô'
+ _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?:
- emissions/.*?/videos/(?P<id>\d+)
- | emission/(?P<key>[^/?]+)
+ emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
+ | (emissions?|jt)/(?P<key>[^/?]+)
)'''
- _TEST = {
- u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
- u'file': u'75540104.mp4',
- u'info_dict': {
- u'title': u'13h15, le samedi...',
- u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+ _TESTS = [
+ # france2
+ {
+ u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+ u'file': u'75540104.mp4',
+ u'info_dict': {
+ u'title': u'13h15, le samedi...',
+ u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+ },
+ u'params': {
+ # m3u8 download
+ u'skip_download': True,
+ },
},
- u'params': {
- u'skip_download': True,
+ # france3
+ {
+ u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
+ u'info_dict': {
+ u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
+ u'ext': u'flv',
+ u'title': u'Le scandale du prix des médicaments',
+ u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
},
- }
+ # france4
+ {
+ u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ u'info_dict': {
+ u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ u'ext': u'flv',
+ u'title': u'Hero Corp Making of - Extrait 1',
+ u'description': u'md5:c87d54871b1790679aec1197e73d650a',
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
+ },
+ # france5
+ {
+ u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
+ u'info_dict': {
+ u'id': u'92837968',
+ u'ext': u'mp4',
+ u'title': u'C à dire ?!',
+ u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
+ },
+ u'params': {
+ # m3u8 download
+ u'skip_download': True,
+ },
+ },
+ # franceo
+ {
+ u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
+ u'info_dict': {
+ u'id': u'92327925',
+ u'ext': u'mp4',
+ u'title': u'Infô-Afrique',
+ u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
+ },
+ u'params': {
+ # m3u8 download
+ u'skip_download': True,
+ },
+ u'skip': u'The id changes frequently',
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('key'):
webpage = self._download_webpage(url, mobj.group('key'))
- video_id = self._html_search_regex(
- r'''(?x)<div\s+class="video-player">\s*
+ id_res = [
+ (r'''(?x)<div\s+class="video-player">\s*
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
- class="francetv-video-player">''',
- webpage, u'video ID')
+ class="francetv-video-player">'''),
+ (r'<a id="player_direct" href="http://info\.francetelevisions'
+ '\.fr/\?id-video=([^"/&]+)'),
+ (r'<a class="video" id="ftv_player_(.+?)"'),
+ ]
+ video_id = self._html_search_regex(id_res, webpage, u'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)
diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py
index c91669b0e..a3a5251fe 100644
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor
class GamekingsIE(InfoExtractor):
- _VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
+ _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py
index 3a8bef250..d82a5d4b2 100644
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -4,8 +4,7 @@ from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVServicesInfoExtractor):
- _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
-
+ _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 10ae06263..216e03218 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
- video_title = self._html_search_regex(r'<title>(.*)</title>',
- webpage, u'video title', default=u'video', flags=re.DOTALL)
+ video_title = self._html_search_regex(
+ r'(?s)<title>(.*?)</title>', webpage, u'video title',
+ default=u'video')
+
+ # video uploader is domain name
+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
# Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
@@ -188,7 +193,7 @@ class GenericIE(InfoExtractor):
# Look for embedded YouTube player
matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
@@ -197,13 +202,26 @@ class GenericIE(InfoExtractor):
# Look for embedded Dailymotion player
matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
+ # Look for embedded Wistia player
+ match = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ if match:
+ return {
+ '_type': 'url_transparent',
+ 'url': unescapeHTML(match.group('url')),
+ 'ie_key': 'Wistia',
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'id': video_id,
+ }
+
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
@@ -247,14 +265,9 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
- # video uploader is domain name
- video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
- url, u'video uploader')
-
return {
'id': video_id,
'url': video_url,
'uploader': video_uploader,
- 'upload_date': None,
'title': video_title,
}
diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py
index 3798118a7..0ee74fb38 100644
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
- u"title": u"Freddie Gibbs - Lay It Down"
+ u"title": u'Freddie Gibbs "Lay It Down"'
}
}
diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py
index c52146f7d..57b79a336 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
- _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
+ _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index d8e9712a7..6fb373db2 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -21,7 +21,6 @@ class ImdbIE(InfoExtractor):
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
- u'duration': 151,
}
}
@@ -35,6 +34,7 @@ class ImdbIE(InfoExtractor):
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
+ f_path = f_path.strip()
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
@@ -46,7 +46,6 @@ class ImdbIE(InfoExtractor):
formats.append({
'format_id': f_id,
'url': format_info['url'],
- 'height': int(info['titleObject']['encoding']['selected'][:-1]),
})
return {
@@ -55,5 +54,4 @@ class ImdbIE(InfoExtractor):
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
- 'duration': int(info['titleObject']['title']['duration_seconds']),
}
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 213aac428..660573d02 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor
class InstagramIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
+ _VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
_TEST = {
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
u'file': u'aye83DjauH.mp4',
diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py
index c7bb234fe..592c64e1d 100644
--- a/youtube_dl/extractor/jukebox.py
+++ b/youtube_dl/extractor/jukebox.py
@@ -8,7 +8,7 @@ from ..utils import (
)
class JukeboxIE(InfoExtractor):
- _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
+ _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py
index dd062a14e..5ae57a77c 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -8,7 +8,7 @@ from ..utils import (
class LiveLeakIE(InfoExtractor):
- _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
+ _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'liveleak'
_TEST = {
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 9bc35b115..1dcd1fb2d 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -11,7 +11,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream'
- _VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
+ _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
u'file': u'4719370.mp4',
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 91480ba87..99d3c83a5 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -1,14 +1,10 @@
import re
-import socket
from .common import InfoExtractor
from ..utils import (
- compat_http_client,
compat_parse_qs,
- compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
- compat_str,
determine_ext,
ExtractorError,
)
@@ -69,6 +65,21 @@ class MetacafeIE(InfoExtractor):
u'age_limit': 18,
},
},
+ # cbs video
+ {
+ u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
+ u'info_dict': {
+ u'id': u'0rOxMBabDXN6',
+ u'ext': u'flv',
+ u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
+ u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
+ u'duration': 129,
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
+ },
]
@@ -78,12 +89,8 @@ class MetacafeIE(InfoExtractor):
def _real_initialize(self):
# Retrieve disclaimer
- request = compat_urllib_request.Request(self._DISCLAIMER)
- try:
- self.report_disclaimer()
- compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
+ self.report_disclaimer()
+ self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
# Confirm age
disclaimer_form = {
@@ -92,11 +99,8 @@ class MetacafeIE(InfoExtractor):
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- try:
- self.report_age_confirmation()
- compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+ self.report_age_confirmation()
+ self._download_webpage(request, None, False, u'Unable to confirm age')
def _real_extract(self, url):
# Extract id and simplified title from URL
@@ -106,10 +110,16 @@ class MetacafeIE(InfoExtractor):
video_id = mobj.group(1)
- # Check if video comes from YouTube
- mobj2 = re.match(r'^yt-(.*)$', video_id)
- if mobj2 is not None:
- return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
+ # the video may come from an external site
+ m_external = re.match('^(\w{2})-(.*)$', video_id)
+ if m_external is not None:
+ prefix, ext_id = m_external.groups()
+ # Check if video comes from YouTube
+ if prefix == 'yt':
+ return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
+ # CBS videos use theplatform.com
+ if prefix == 'cb':
+ return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index e2baf44d7..04fa3ac7a 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,13 +1,10 @@
import json
import re
-import socket
from .common import InfoExtractor
from ..utils import (
- compat_http_client,
- compat_urllib_error,
- compat_urllib_request,
unified_strdate,
+ ExtractorError,
)
@@ -31,9 +28,11 @@ class MixcloudIE(InfoExtractor):
"""Returns 1st active url from list"""
for url in url_list:
try:
- compat_urllib_request.urlopen(url)
+ # We only want to know if the request succeed
+ # don't download the whole file
+ self._request_webpage(url, None, False)
return url
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
+ except ExtractorError:
url = None
return None
diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py
index 03e31ea1c..1772b7f9a 100644
--- a/youtube_dl/extractor/muzu.py
+++ b/youtube_dl/extractor/muzu.py
@@ -9,7 +9,7 @@ from ..utils import (
class MuzuTVIE(InfoExtractor):
- _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv'
_TEST = {
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 0067bf134..4becddee6 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -9,7 +9,7 @@ from ..utils import (
class MySpassIE(InfoExtractor):
- _VALID_URL = r'http://www.myspass.de/.*'
+ _VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
u'file': u'11741.mp4',
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py
new file mode 100644
index 000000000..ea986c00e
--- /dev/null
+++ b/youtube_dl/extractor/ninegag.py
@@ -0,0 +1,43 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class NineGagIE(InfoExtractor):
+ IE_NAME = '9gag'
+ _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+
+ _TEST = {
+ u"url": u"http://9gag.tv/v/1912",
+ u"file": u"1912.mp4",
+ u"info_dict": {
+ u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+ u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+ },
+ u'add_ie': [u'Youtube']
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ data_json = self._html_search_regex(r'''(?x)
+ <div\s*id="tv-video"\s*data-video-source="youtube"\s*
+ data-video-meta="([^"]+)"''', webpage, u'video metadata')
+
+ data = json.loads(data_json)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': data['youtubeVideoId'],
+ 'ie_key': 'Youtube',
+ 'id': video_id,
+ 'title': data['title'],
+ 'description': data['description'],
+ 'view_count': int(data['view_count']),
+ 'like_count': int(data['statistic']['like']),
+ 'dislike_count': int(data['statistic']['dislike']),
+ 'thumbnail': data['thumbnail_url'],
+ }
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index cfca2a063..b42eae89a 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -12,7 +12,7 @@ from ..utils import (
)
class ORFIE(InfoExtractor):
- _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+ _VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 65462d867..25f019231 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class PBSIE(InfoExtractor):
- _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
+ _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py
new file mode 100644
index 000000000..33054591b
--- /dev/null
+++ b/youtube_dl/extractor/pyvideo.py
@@ -0,0 +1,51 @@
+import re
+import os
+
+from .common import InfoExtractor
+
+
+class PyvideoIE(InfoExtractor):
+ _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+ _TESTS = [{
+ u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+ u'file': u'24_4WWkSmNo.mp4',
+ u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
+ u'info_dict': {
+ u"title": u"Become a logging expert in 30 minutes",
+ u"description": u"md5:9665350d466c67fb5b1598de379021f7",
+ u"upload_date": u"20130320",
+ u"uploader": u"NextDayVideo",
+ u"uploader_id": u"NextDayVideo",
+ },
+ u'add_ie': ['Youtube'],
+ },
+ {
+ u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+ u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
+ u'info_dict': {
+ u'id': u'2542',
+ u'ext': u'm4v',
+ u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
+
+ if m_youtube is not None:
+ return self.url_result(m_youtube.group(1), 'Youtube')
+
+ title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
+ webpage, u'title', flags=re.DOTALL)
+ video_url = self._search_regex([r'<source src="(.*?)"',
+ r'<dt>Download</dt>.*?<a href="(.+?)"'],
+ webpage, u'video url', flags=re.DOTALL)
+ return {
+ 'id': video_id,
+ 'title': os.path.splitext(title)[0],
+ 'url': video_url,
+ }
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index a18034fe2..e3e9bc07f 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -11,7 +11,7 @@ from ..utils import (
class RutubeIE(InfoExtractor):
- _VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
+ _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
_TEST = {
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py
index f5003c7f9..d68646d24 100644
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
- _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
+ _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'add_ie': ['Ooyala'],
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index f035a3214..4ea89bf85 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -3,11 +3,13 @@
import re
import json
import hashlib
+import uuid
from .common import InfoExtractor
from ..utils import (
- determine_ext,
- ExtractorError
+ compat_urllib_parse,
+ compat_urllib_request,
+ ExtractorError,
)
@@ -250,3 +252,105 @@ class SmotriUserIE(InfoExtractor):
u'user nickname')
return self.playlist_result(entries, user_id, user_nickname)
+
+
+class SmotriBroadcastIE(InfoExtractor):
+ IE_DESC = u'Smotri.com broadcasts'
+ IE_NAME = u'smotri:broadcast'
+ _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ broadcast_id = mobj.group('broadcastid')
+
+ broadcast_url = 'http://' + mobj.group('url')
+ broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page')
+
+ if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
+ raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True)
+
+ # Adult content
+ if re.search(u'EroConfirmText">', broadcast_page) is not None:
+
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(u'Erotic broadcasts allowed only for registered users, '
+ u'use --username and --password options to provide account credentials.', expected=True)
+
+ # Log in
+ login_form_strs = {
+ u'login-hint53': '1',
+ u'confirm_erotic': '1',
+ u'login': username,
+ u'password': password,
+ }
+ # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+ # chokes on unicode
+ login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+ login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+ login_url = broadcast_url + '/?no_redirect=1'
+ request = compat_urllib_request.Request(login_url, login_data)
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ broadcast_page = self._download_webpage(
+ request, broadcast_id, note=u'Logging in and confirming age')
+
+ if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None:
+ raise ExtractorError(u'Unable to log in: bad username or password', expected=True)
+
+ adult_content = True
+ else:
+ adult_content = False
+
+ ticket = self._html_search_regex(
+ u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+ broadcast_page, u'broadcast ticket')
+
+ url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
+
+ broadcast_password = self._downloader.params.get('videopassword', None)
+ if broadcast_password:
+ url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
+
+ broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON')
+
+ try:
+ broadcast_json = json.loads(broadcast_json_page)
+
+ protected_broadcast = broadcast_json['_pass_protected'] == 1
+ if protected_broadcast and not broadcast_password:
+ raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True)
+
+ broadcast_offline = broadcast_json['is_play'] == 0
+ if broadcast_offline:
+ raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True)
+
+ rtmp_url = broadcast_json['_server']
+ if not rtmp_url.startswith('rtmp://'):
+ raise ExtractorError(u'Unexpected broadcast rtmp URL')
+
+ broadcast_playpath = broadcast_json['_streamName']
+ broadcast_thumbnail = broadcast_json['_imgURL']
+ broadcast_title = broadcast_json['title']
+ broadcast_description = broadcast_json['description']
+ broadcaster_nick = broadcast_json['nick']
+ broadcaster_login = broadcast_json['login']
+ rtmp_conn = 'S:%s' % uuid.uuid4().hex
+ except KeyError:
+ if protected_broadcast:
+ raise ExtractorError(u'Bad broadcast password', expected=True)
+ raise ExtractorError(u'Unexpected broadcast JSON')
+
+ return {
+ 'id': broadcast_id,
+ 'url': rtmp_url,
+ 'title': broadcast_title,
+ 'thumbnail': broadcast_thumbnail,
+ 'description': broadcast_description,
+ 'uploader': broadcaster_nick,
+ 'uploader_id': broadcaster_login,
+ 'age_limit': 18 if adult_content else 0,
+ 'ext': 'flv',
+ 'play_path': broadcast_playpath,
+ 'rtmp_live': True,
+ 'rtmp_conn': rtmp_conn
+ }
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 3a19ab172..cb6dedab7 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
- |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
+ |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = u'soundcloud'
@@ -217,7 +217,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+ _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py
diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py
index 0d32a0688..11455e0fa 100644
--- a/youtube_dl/extractor/space.py
+++ b/youtube_dl/extractor/space.py
@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
- _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+ _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py
index b27838bf9..44c52c718 100644
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@@ -1,14 +1,7 @@
import re
-import socket
-import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
- compat_http_client,
- compat_str,
- compat_urllib_error,
- compat_urllib_request,
-
ExtractorError,
orderedSet,
unescapeHTML,
@@ -18,7 +11,7 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor):
IE_NAME = u'stanfordoc'
IE_DESC = u'Stanford Open ClassRoom'
- _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+ _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4',
@@ -45,11 +38,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml'
- try:
- metaXml = compat_urllib_request.urlopen(xmlUrl).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
- mdoc = xml.etree.ElementTree.fromstring(metaXml)
+ mdoc = self._download_xml(xmlUrl, info['id'])
try:
info['title'] = mdoc.findall('./title')[0].text
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@@ -95,12 +84,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
- self.report_download_webpage(info['id'])
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
- try:
- rootpage = compat_urllib_request.urlopen(rootURL).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
+ rootpage = self._download_webpage(rootURL, info['id'],
+ errnote=u'Unable to download course info page')
info['title'] = info['id']
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 772134a12..2c5c88be8 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
- _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
+ _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4',
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
new file mode 100644
index 000000000..61452e47d
--- /dev/null
+++ b/youtube_dl/extractor/theplatform.py
@@ -0,0 +1,68 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_with_ns,
+)
+
+_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
+
+
+class ThePlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
+
+ _TEST = {
+ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
+ u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+ u'info_dict': {
+ u'id': u'e9I_cZgTgIPd',
+ u'ext': u'flv',
+ u'title': u'Blackberry\'s big, bold Z30',
+ u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+ u'duration': 247,
+ },
+ u'params': {
+ # rtmp download
+ u'skip_download': True,
+ },
+ }
+
+ def _get_info(self, video_id):
+ smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
+ 'format=smil&mbr=true'.format(video_id))
+ meta = self._download_xml(smil_url, video_id)
+ info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
+ info_json = self._download_webpage(info_url, video_id)
+ info = json.loads(info_json)
+
+ head = meta.find(_x('smil:head'))
+ body = meta.find(_x('smil:body'))
+ base_url = head.find(_x('smil:meta')).attrib['base']
+ switch = body.find(_x('smil:switch'))
+ formats = []
+ for f in switch.findall(_x('smil:video')):
+ attr = f.attrib
+ formats.append({
+ 'url': base_url,
+ 'play_path': 'mp4:' + attr['src'],
+ 'ext': 'flv',
+ 'width': int(attr['width']),
+ 'height': int(attr['height']),
+ 'vbr': int(attr['system-bitrate']),
+ })
+ formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
+
+ return {
+ 'id': video_id,
+ 'title': info['title'],
+ 'formats': formats,
+ 'description': info['description'],
+ 'thumbnail': info['defaultThumbnailUrl'],
+ 'duration': info['duration']//1000,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ return self._get_info(video_id)
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py
index 516e18914..474610eec 100644
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor
class UnistraIE(InfoExtractor):
- _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
+ _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
index 3a99a29c6..3cf8c853d 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -9,7 +9,7 @@ from ..utils import (
)
class VeeHDIE(InfoExtractor):
- _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://veehd.com/video/4686958',
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 4378b1780..4823992ef 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
"""
- _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
+ _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody to Die For",
- u"duration": 230,
+ u"duration": 230.12,
u"width": 1920,
u"height": 1080,
}
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 6b93afa50..87812d6af 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -6,7 +6,7 @@ from ..utils import ExtractorError
class ViceIE(InfoExtractor):
- _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
+ _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
_TEST = {
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
index 75335dfb8..9328ef4a2 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -2,13 +2,10 @@ import json
import re
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
-)
class ViddlerIE(InfoExtractor):
- _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://www.viddler.com/v/43903784",
u'file': u'43903784.mp4',
diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py
index 912802d9a..f75169041 100644
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@@ -7,7 +7,7 @@ from ..utils import (
)
class VideofyMeIE(InfoExtractor):
- _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
+ _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me'
_TEST = {
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index f27763ae2..fb2bd225a 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+ _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
@@ -196,6 +196,16 @@ class VimeoIE(InfoExtractor):
if mobj is not None:
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+ try:
+ view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count'))
+ like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count'))
+ comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count'))
+ except RegexNotFoundError:
+ # This info is only available in vimeo.com/{id} urls
+ view_count = None
+ like_count = None
+ comment_count = None
+
# Vimeo specific: extract request signature and timestamp
sig = config['request']['signature']
timestamp = config['request']['timestamp']
@@ -242,6 +252,9 @@ class VimeoIE(InfoExtractor):
'description': video_description,
'formats': formats,
'webpage_url': url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
}
@@ -251,11 +264,17 @@ class VimeoChannelIE(InfoExtractor):
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+ def _page_url(self, base_url, pagenum):
+ return '%s/videos/page:%d/' % (base_url, pagenum)
+
+ def _extract_list_title(self, webpage):
+ return self._html_search_regex(self._TITLE_RE, webpage, u'list title')
+
def _extract_videos(self, list_id, base_url):
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage(
- '%s/videos/page:%d/' % (base_url, pagenum),list_id,
+ self._page_url(base_url, pagenum) ,list_id,
u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
@@ -263,11 +282,9 @@ class VimeoChannelIE(InfoExtractor):
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
- list_title = self._html_search_regex(self._TITLE_RE, webpage,
- u'list title')
return {'_type': 'playlist',
'id': list_id,
- 'title': list_title,
+ 'title': self._extract_list_title(webpage),
'entries': entries,
}
@@ -284,7 +301,7 @@ class VimeoUserIE(VimeoChannelIE):
@classmethod
def suitable(cls, url):
- if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url):
+ if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
return False
return super(VimeoUserIE, cls).suitable(url)
@@ -292,3 +309,30 @@ class VimeoUserIE(VimeoChannelIE):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+
+
+class VimeoAlbumIE(VimeoChannelIE):
+ IE_NAME = u'vimeo:album'
+ _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)'
+ _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id = mobj.group('id')
+ return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+
+
+class VimeoGroupsIE(VimeoAlbumIE):
+ IE_NAME = u'vimeo:group'
+ _VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)'
+
+ def _extract_list_title(self, webpage):
+ return self._og_search_title(webpage)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py
index 29c25f0e3..4fab6c6e8 100644
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -11,7 +11,7 @@ from ..utils import (
class WatIE(InfoExtractor):
- _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
+ _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
IE_NAME = 'wat.tv'
_TEST = {
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index b9c3b13f9..82a626e0e 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -11,7 +11,8 @@ class WimpIE(InfoExtractor):
u'file': u'deerfence.flv',
u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
u'info_dict': {
- u"title": u"Watch Till End: Herd of deer jump over a fence."
+ u"title": u"Watch Till End: Herd of deer jump over a fence.",
+ u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
}
}
@@ -19,18 +20,14 @@ class WimpIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
- thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
googleString = base64.b64decode(googleString).decode('ascii')
- final_url = self._search_regex('","(.*?)"', googleString,'final video url')
- ext = final_url.rpartition(u'.')[2]
-
- return [{
- 'id': video_id,
- 'url': final_url,
- 'ext': ext,
- 'title': title,
- 'thumbnail': thumbnail_url,
- }]
+ final_url = self._search_regex('","(.*?)"', googleString, u'final video url')
+ return {
+ 'id': video_id,
+ 'url': final_url,
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
new file mode 100644
index 000000000..e1748c261
--- /dev/null
+++ b/youtube_dl/extractor/wistia.py
@@ -0,0 +1,55 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class WistiaIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
+
+ _TEST = {
+ u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
+ u"file": u"sh7fpupwlt.mov",
+ u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
+ u"info_dict": {
+ u"title": u"cfh_resourceful_zdkh_final_1"
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ data_json = self._html_search_regex(
+ r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
+
+ data = json.loads(data_json)
+
+ formats = []
+ thumbnails = []
+ for atype, a in data['assets'].items():
+ if atype == 'still':
+ thumbnails.append({
+ 'url': a['url'],
+ 'resolution': '%dx%d' % (a['width'], a['height']),
+ })
+ continue
+ if atype == 'preview':
+ continue
+ formats.append({
+ 'format_id': atype,
+ 'url': a['url'],
+ 'width': a['width'],
+ 'height': a['height'],
+ 'filesize': a['size'],
+ 'ext': a['ext'],
+ })
+ formats.sort(key=lambda a: a['filesize'])
+
+ return {
+ 'id': video_id,
+ 'title': data['name'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 279f75e7a..ef9997ee4 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor):
{
u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
u'file': u'2221348.flv',
- u'md5': u'970a94178ca4118c5aa3aaea21211b81',
+ u'md5': u'e767b9475de189320f691f49c679c4c7',
u'info_dict': {
u"upload_date": u"20130914",
u"uploader_id": u"jojo747400",
@@ -46,7 +46,7 @@ class XHamsterIE(InfoExtractor):
return mobj.group('server')+'/key='+mobj.group('file')
def is_hd(webpage):
- return webpage.find('<div class=\'icon iconHD\'>') != -1
+ return webpage.find('<div class=\'icon iconHD\'') != -1
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index e457c4707..5c9c361b9 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -47,7 +47,7 @@ class YahooIE(InfoExtractor):
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
- return self._get_info(info['id'], video_id)
+ return self._get_info(long_id, video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index 1fcc518ac..e971b5b4b 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -7,7 +7,7 @@ from ..utils import (
class YouJizzIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
+ _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
_TEST = {
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
u'file': u'2189178.flv',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 66f5af000..874429b78 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,7 +7,6 @@ import itertools
import json
import os.path
import re
-import socket
import string
import struct
import traceback
@@ -17,9 +16,7 @@ from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_chr,
- compat_http_client,
compat_parse_qs,
- compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
@@ -45,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
- def report_lang(self):
- """Report attempt to set language."""
- self.to_screen(u'Setting language')
-
def _set_language(self):
- request = compat_urllib_request.Request(self._LANG_URL)
- try:
- self.report_lang()
- compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
- return False
- return True
+ return bool(self._download_webpage(
+ self._LANG_URL, None,
+ note=u'Setting language', errnote='unable to set language',
+ fatal=False))
def _login(self):
(username, password) = self._get_login_info()
@@ -67,12 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
- request = compat_urllib_request.Request(self._LOGIN_URL)
- try:
- login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
- return False
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ note=u'Downloading login page',
+ errnote=u'unable to fetch login page', fatal=False)
+ if login_page is False:
+ return
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
login_page, u'Login GALX parameter')
@@ -102,29 +91,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
- request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
- try:
- self.report_login()
- login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
- if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
- self._downloader.report_warning(u'unable to log in: bad username or password')
- return False
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+
+ req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
+ login_results = self._download_webpage(
+ req, None,
+ note=u'Logging in', errnote=u'unable to log in', fatal=False)
+ if login_results is False:
+ return False
+ if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
+ self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _confirm_age(self):
age_form = {
- 'next_url': '/',
- 'action_confirm': 'Confirm',
- }
- request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
- try:
- self.report_age_confirmation()
- compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+ 'next_url': '/',
+ 'action_confirm': 'Confirm',
+ }
+ req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
+
+ self._download_webpage(
+ req, None,
+ note=u'Confirming age', errnote=u'Unable to confirm age')
return True
def _real_initialize(self):
@@ -336,7 +324,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader": u"Philipp Hagemeister",
u"uploader_id": u"phihag",
u"upload_date": u"20121002",
- u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+ u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
@@ -388,10 +376,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
super(YoutubeIE, self).__init__(*args, **kwargs)
self._player_cache = {}
- def report_video_webpage_download(self, video_id):
- """Report attempt to download video webpage."""
- self.to_screen(u'%s: Downloading video webpage' % video_id)
-
def report_video_info_webpage_download(self, video_id):
"""Report attempt to download video info webpage."""
self.to_screen(u'%s: Downloading video info webpage' % video_id)
@@ -1258,15 +1242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = self._extract_id(url)
# Get video webpage
- self.report_video_webpage_download(video_id)
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
- request = compat_urllib_request.Request(url)
- try:
- video_webpage_bytes = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
-
- video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
+ video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@@ -1366,6 +1343,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
+ video_description = re.sub(r'''(?x)
+ <a\s+
+ (?:[a-zA-Z-]+="[^"]+"\s+)*?
+ title="([^"]+)"\s+
+ (?:[a-zA-Z-]+="[^"]+"\s+)*?
+ class="yt-uix-redirect-link"\s*>
+ [^<]+
+ </a>
+ ''', r'\1', video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1374,6 +1360,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
video_description = u''
+ def _extract_count(klass):
+ count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+ if count is not None:
+ return int(count.replace(',', ''))
+ return None
+ like_count = _extract_count(u'likes-count')
+ dislike_count = _extract_count(u'dislikes-count')
+
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
@@ -1506,6 +1500,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
})
return results
@@ -1520,10 +1516,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
\? (?:.*?&)*? (?:p|a|list)=
| p/
)
- ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
+ ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
.*
|
- ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
+ ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
@@ -1545,7 +1541,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id
- url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
+ url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage))
@@ -1573,9 +1569,12 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
+ if playlist_id.startswith('RD'):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
+ if playlist_id.startswith('TL'):
+ raise ExtractorError(u'For downloading YouTube.com top lists, use '
+ u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
# Extract the video ids from the playlist pages
ids = []
@@ -1598,6 +1597,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, playlist_title)
+class YoutubeTopListIE(YoutubePlaylistIE):
+ IE_NAME = u'youtube:toplist'
+ IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+ u' (Example: "yttoplist:music:Top Tracks")')
+ _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel = mobj.group('chann')
+ title = mobj.group('title')
+ query = compat_urllib_parse.urlencode({'title': title})
+ playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
+ channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
+ link = self._html_search_regex(playlist_re, channel_page, u'list')
+ url = compat_urlparse.urljoin('https://www.youtube.com/', link)
+
+ video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
+ ids = []
+ # sometimes the webpage doesn't contain the videos
+ # retry until we get them
+ for i in itertools.count(0):
+ msg = u'Downloading Youtube mix'
+ if i > 0:
+ msg += ', retry #%d' % i
+ webpage = self._download_webpage(url, title, msg)
+ ids = orderedSet(re.findall(video_re, webpage))
+ if ids:
+ break
+ url_results = self._ids_to_results(ids)
+ return self.playlist_result(url_results, playlist_title=title)
+
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
@@ -1623,10 +1654,11 @@ class YoutubeChannelIE(InfoExtractor):
video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
- if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
- autogenerated = True
- else:
- autogenerated = False
+ autogenerated = re.search(r'''(?x)
+ class="[^"]*?(?:
+ channel-header-autogenerated-label|
+ yt-channel-title-autogenerated
+ )[^"]*"''', channel_page) is not None
if autogenerated:
# The videos are contained in a single page
@@ -1728,10 +1760,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
IE_NAME = u'youtube:search'
_SEARCH_KEY = 'ytsearch'
- def report_download_page(self, query, pagenum):
- """Report attempt to download search page with given number."""
- self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
-
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
@@ -1740,16 +1768,15 @@ class YoutubeSearchIE(SearchInfoExtractor):
limit = n
while (50 * pagenum) < limit:
- self.report_download_page(query, pagenum+1)
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
- request = compat_urllib_request.Request(result_url)
- try:
- data = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
- api_response = json.loads(data)['data']
-
- if not 'items' in api_response:
+ data_json = self._download_webpage(
+ result_url, video_id=u'query "%s"' % query,
+ note=u'Downloading page %s' % (pagenum + 1),
+ errnote=u'Unable to download API page')
+ data = json.loads(data_json)
+ api_response = data['data']
+
+ if 'items' not in api_response:
raise ExtractorError(u'[youtube] No video results')
new_ids = list(video['id'] for video in api_response['items'])
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c486ef8ec..5ba06d965 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -17,7 +17,6 @@ import ssl
import socket
import sys
import traceback
-import xml.etree.ElementTree
import zlib
try:
@@ -548,7 +547,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
- if self._tunnel_host:
+ if getattr(self, '_tunnel_host', False):
self.sock = sock
self._tunnel()
try:
@@ -562,11 +561,14 @@ def make_HTTPS_handler(opts_no_check_certificate):
return HTTPSHandlerV3()
else:
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
- context.set_default_verify_paths()
-
context.verify_mode = (ssl.CERT_NONE
if opts_no_check_certificate
else ssl.CERT_REQUIRED)
+ context.set_default_verify_paths()
+ try:
+ context.load_default_certs()
+ except AttributeError:
+ pass # Python < 3.4
return compat_urllib_request.HTTPSHandler(context=context)
class ExtractorError(Exception):
@@ -1021,3 +1023,7 @@ def format_bytes(bytes):
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return u'%.2f%s' % (converted, suffix)
+
+def str_to_int(int_str):
+ int_str = re.sub(r'[,\.]', u'', int_str)
+ return int(int_str)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index f9a339c02..f7f658f49 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.12.03'
+__version__ = '2013.12.09.1'