aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py40
-rw-r--r--youtube_dl/__init__.py93
-rw-r--r--youtube_dl/cache.py3
-rw-r--r--youtube_dl/extractor/__init__.py18
-rw-r--r--youtube_dl/extractor/arte.py35
-rw-r--r--youtube_dl/extractor/audiomack.py69
-rw-r--r--youtube_dl/extractor/bild.py39
-rw-r--r--youtube_dl/extractor/br.py2
-rw-r--r--youtube_dl/extractor/brightcove.py9
-rw-r--r--youtube_dl/extractor/cinemassacre.py17
-rw-r--r--youtube_dl/extractor/cliphunter.py5
-rw-r--r--youtube_dl/extractor/cnn.py2
-rw-r--r--youtube_dl/extractor/common.py20
-rw-r--r--youtube_dl/extractor/condenast.py2
-rw-r--r--youtube_dl/extractor/crunchyroll.py49
-rw-r--r--youtube_dl/extractor/faz.py39
-rw-r--r--youtube_dl/extractor/francetv.py5
-rw-r--r--youtube_dl/extractor/funnyordie.py2
-rw-r--r--youtube_dl/extractor/generic.py109
-rw-r--r--youtube_dl/extractor/glide.py40
-rw-r--r--youtube_dl/extractor/googleplus.py75
-rw-r--r--youtube_dl/extractor/gorillavid.py4
-rw-r--r--youtube_dl/extractor/hark.py48
-rw-r--r--youtube_dl/extractor/heise.py48
-rw-r--r--youtube_dl/extractor/howstuffworks.py10
-rw-r--r--youtube_dl/extractor/huffpost.py3
-rw-r--r--youtube_dl/extractor/justintv.py155
-rw-r--r--youtube_dl/extractor/kickstarter.py23
-rw-r--r--youtube_dl/extractor/kontrtube.py2
-rw-r--r--youtube_dl/extractor/livestream.py3
-rw-r--r--youtube_dl/extractor/lrt.py2
-rw-r--r--youtube_dl/extractor/mitele.py13
-rw-r--r--youtube_dl/extractor/mixcloud.py22
-rw-r--r--youtube_dl/extractor/motherless.py56
-rw-r--r--youtube_dl/extractor/nbc.py7
-rw-r--r--youtube_dl/extractor/ndr.py14
-rw-r--r--youtube_dl/extractor/nhl.py35
-rw-r--r--youtube_dl/extractor/niconico.py17
-rw-r--r--youtube_dl/extractor/pbs.py20
-rw-r--r--youtube_dl/extractor/pornhub.py13
-rw-r--r--youtube_dl/extractor/promptfile.py6
-rw-r--r--youtube_dl/extractor/rtlnow.py2
-rw-r--r--youtube_dl/extractor/ruhd.py15
-rw-r--r--youtube_dl/extractor/sexykarma.py117
-rw-r--r--youtube_dl/extractor/soundcloud.py16
-rw-r--r--youtube_dl/extractor/sportbox.py5
-rw-r--r--youtube_dl/extractor/srmediathek.py43
-rw-r--r--youtube_dl/extractor/syfy.py1
-rw-r--r--youtube_dl/extractor/ted.py23
-rw-r--r--youtube_dl/extractor/telecinco.py19
-rw-r--r--youtube_dl/extractor/theonion.py70
-rw-r--r--youtube_dl/extractor/theplatform.py34
-rw-r--r--youtube_dl/extractor/tumblr.py35
-rw-r--r--youtube_dl/extractor/twitch.py187
-rw-r--r--youtube_dl/extractor/ustream.py2
-rw-r--r--youtube_dl/extractor/vgtv.py6
-rw-r--r--youtube_dl/extractor/viddler.py108
-rw-r--r--youtube_dl/extractor/vidzi.py33
-rw-r--r--youtube_dl/extractor/vimeo.py20
-rw-r--r--youtube_dl/extractor/vine.py2
-rw-r--r--youtube_dl/extractor/vrt.py95
-rw-r--r--youtube_dl/extractor/xtube.py2
-rw-r--r--youtube_dl/extractor/ynet.py2
-rw-r--r--youtube_dl/extractor/youtube.py14
-rw-r--r--youtube_dl/options.py747
-rw-r--r--youtube_dl/postprocessor/__init__.py12
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py78
-rw-r--r--youtube_dl/utils.py100
-rw-r--r--youtube_dl/version.py2
69 files changed, 1977 insertions, 987 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dec0e20e7..28dcc0195 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,6 +24,7 @@ if os.name == 'nt':
from .utils import (
compat_cookiejar,
+ compat_expanduser,
compat_http_client,
compat_str,
compat_urllib_error,
@@ -61,7 +62,7 @@ from .utils import (
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
-from .postprocessor import FFmpegMergerPP
+from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
from .version import __version__
@@ -107,6 +108,8 @@ class YoutubeDL(object):
forcefilename: Force printing final filename.
forceduration: Force printing duration.
forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
@@ -165,6 +168,8 @@ class YoutubeDL(object):
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -447,7 +452,7 @@ class YoutubeDL(object):
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- tmpl = os.path.expanduser(outtmpl)
+ tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
return filename
except ValueError as err:
@@ -568,8 +573,12 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video')
- if self.params.get('extract_flat', False):
- if result_type in ('url', 'url_transparent'):
+ if result_type in ('url', 'url_transparent'):
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+ extract_flat is True):
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(ie_result))
return ie_result
if result_type == 'video':
@@ -897,6 +906,8 @@ class YoutubeDL(object):
if self.params.get('forcejson', False):
info_dict['_filename'] = filename
self.to_stdout(json.dumps(info_dict))
+ if self.params.get('dump_single_json', False):
+ info_dict['_filename'] = filename
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -1015,7 +1026,7 @@ class YoutubeDL(object):
downloaded = []
success = True
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
- if not merger._get_executable():
+ if not merger._executable:
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
@@ -1064,12 +1075,15 @@ class YoutubeDL(object):
for url in url_list:
try:
#It also downloads the videos
- self.extract_info(url)
+ res = self.extract_info(url)
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
self.to_screen('[info] Maximum number of downloaded files reached.')
raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.to_stdout(json.dumps(res))
return self._download_retcode
@@ -1297,8 +1311,18 @@ class YoutubeDL(object):
sys.exc_clear()
except:
pass
- self._write_string('[debug] Python version %s - %s' %
- (platform.python_version(), platform_name()) + '\n')
+ self._write_string('[debug] Python version %s - %s\n' % (
+ platform.python_version(), platform_name()))
+
+ exe_versions = FFmpegPostProcessor.get_versions()
+ exe_str = ', '.join(
+ '%s %s' % (exe, v)
+ for exe, v in sorted(exe_versions.items())
+ if v
+ )
+ if not exe_str:
+ exe_str = 'none'
+ self._write_string('[debug] exe versions: %s\n' % exe_str)
proxy_map = {}
for handler in self._opener.handlers:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 7f2b4dfcc..7dc971884 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -1,86 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-__authors__ = (
- 'Ricardo Garcia Gonzalez',
- 'Danny Colligan',
- 'Benjamin Johnson',
- 'Vasyl\' Vavrychuk',
- 'Witold Baryluk',
- 'Paweł Paprota',
- 'Gergely Imreh',
- 'Rogério Brito',
- 'Philipp Hagemeister',
- 'Sören Schulze',
- 'Kevin Ngo',
- 'Ori Avtalion',
- 'shizeeg',
- 'Filippo Valsorda',
- 'Christian Albrecht',
- 'Dave Vasilevsky',
- 'Jaime Marquínez Ferrándiz',
- 'Jeff Crouse',
- 'Osama Khalid',
- 'Michael Walter',
- 'M. Yasoob Ullah Khalid',
- 'Julien Fraichard',
- 'Johny Mo Swag',
- 'Axel Noack',
- 'Albert Kim',
- 'Pierre Rudloff',
- 'Huarong Huo',
- 'Ismael Mejía',
- 'Steffan \'Ruirize\' James',
- 'Andras Elso',
- 'Jelle van der Waa',
- 'Marcin Cieślak',
- 'Anton Larionov',
- 'Takuya Tsuchida',
- 'Sergey M.',
- 'Michael Orlitzky',
- 'Chris Gahan',
- 'Saimadhav Heblikar',
- 'Mike Col',
- 'Oleg Prutz',
- 'pulpe',
- 'Andreas Schmitz',
- 'Michael Kaiser',
- 'Niklas Laxström',
- 'David Triendl',
- 'Anthony Weems',
- 'David Wagner',
- 'Juan C. Olivares',
- 'Mattias Harrysson',
- 'phaer',
- 'Sainyam Kapoor',
- 'Nicolas Évrard',
- 'Jason Normore',
- 'Hoje Lee',
- 'Adam Thalhammer',
- 'Georg Jähnig',
- 'Ralf Haring',
- 'Koki Takahashi',
- 'Ariset Llerena',
- 'Adam Malcontenti-Wilson',
- 'Tobias Bell',
- 'Naglis Jonaitis',
- 'Charles Chen',
- 'Hassaan Ali',
- 'Dobrosław Żybort',
- 'David Fabijan',
- 'Sebastian Haas',
- 'Alexander Kirk',
- 'Erik Johnson',
- 'Keith Beckman',
- 'Ole Ernst',
- 'Aaron McDaniel (mcd1992)',
- 'Magnus Kolstad',
- 'Hari Padmanaban',
- 'Carlos Ramos',
- '5moufl',
- 'lenaten',
-)
-
__license__ = 'Public Domain'
import codecs
@@ -94,6 +14,7 @@ from .options import (
parseOpts,
)
from .utils import (
+ compat_expanduser,
compat_getpass,
compat_print,
DateRange,
@@ -255,8 +176,6 @@ def _real_main(argv=None):
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
- if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
- parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
# Do not download videos when there are audio-only formats
if opts.extractaudio and not opts.keepvideo and opts.format is None:
@@ -284,8 +203,8 @@ def _real_main(argv=None):
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
u' template'.format(outtmpl))
- any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
- download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+ any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
ydl_opts = {
'usenetrc': opts.usenetrc,
@@ -304,8 +223,9 @@ def _real_main(argv=None):
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
- 'simulate': opts.simulate,
- 'skip_download': (opts.skip_download or opts.simulate or any_printing),
+ 'dump_single_json': opts.dump_single_json,
+ 'simulate': opts.simulate or any_printing,
+ 'skip_download': opts.skip_download,
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
@@ -369,6 +289,7 @@ def _real_main(argv=None):
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'encoding': opts.encoding,
'exec_cmd': opts.exec_cmd,
+ 'extract_flat': opts.extract_flat,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 79ff09f78..ac5925d32 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -9,6 +9,7 @@ import shutil
import traceback
from .utils import (
+ compat_expanduser,
write_json_file,
)
@@ -22,7 +23,7 @@ class Cache(object):
if res is None:
cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl')
- return os.path.expanduser(res)
+ return compat_expanduser(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index dd770fdf1..17ab49283 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -20,12 +20,14 @@ from .arte import (
ArteTVDDCIE,
ArteTVEmbedIE,
)
+from .audiomack import AudiomackIE
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .bild import BildIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
@@ -60,7 +62,10 @@ from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE
-from .crunchyroll import CrunchyrollIE
+from .crunchyroll import (
+ CrunchyrollIE,
+ CrunchyrollShowPlaylistIE
+)
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
@@ -134,6 +139,7 @@ from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
+from .glide import GlideIE
from .globo import GloboIE
from .godtube import GodTubeIE
from .golem import GolemIE
@@ -173,7 +179,6 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
-from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
@@ -316,6 +321,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
+from .sexykarma import SexyKarmaIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
@@ -349,6 +355,7 @@ from .spike import SpikeIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
+from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
@@ -367,10 +374,12 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
+from .telecinco import TelecincoIE
from .telemb import TeleMBIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
from .tf1 import TF1IE
+from .theonion import TheOnionIE
from .theplatform import ThePlatformIE
from .thesixtyone import TheSixtyOneIE
from .thisav import ThisAVIE
@@ -394,6 +403,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE
from .tvp import TvpIE
from .tvplay import TVPlayIE
+from .twitch import TwitchIE
from .ubu import UbuIE
from .udemy import (
UdemyIE,
@@ -419,6 +429,7 @@ from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vidme import VidmeIE
+from .vidzi import VidziIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@@ -438,6 +449,7 @@ from .viki import VikiIE
from .vk import VKIE
from .vodlocker import VodlockerIE
from .vporn import VpornIE
+from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vulture import VultureIE
@@ -487,10 +499,8 @@ from .youtube import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
-
from .zdf import ZDFIE
-
_ALL_CLASSES = [
klass
for name, klass in globals().items()
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index c3d02f85e..b9a9440c0 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -10,8 +10,8 @@ from ..utils import (
unified_strdate,
determine_ext,
get_element_by_id,
- compat_str,
get_element_by_attribute,
+ int_or_none,
)
# There are different sources of video in arte.tv, the extraction process
@@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):
if not upload_date_str:
upload_date_str = player_info.get('VDA', '').split(' ')[0]
+ title = player_info['VTI'].strip()
+ subtitle = player_info.get('VSU', '').strip()
+ if subtitle:
+ title += ' - %s' % subtitle
+
info_dict = {
'id': player_info['VID'],
- 'title': player_info['VTI'],
+ 'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
- all_formats = player_info['VSR'].values()
+ all_formats = []
+ for format_id, format_dict in player_info['VSR'].items():
+ fmt = dict(format_dict)
+ fmt['format_id'] = format_id
+ all_formats.append(fmt)
# Some formats use the m3u8 protocol
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
def _match_lang(f):
@@ -149,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
- quality = ''
- height = format_info.get('height')
- if height is not None:
- quality = compat_str(height)
- bitrate = format_info.get('bitrate')
- if bitrate is not None:
- quality += '-%d' % bitrate
- if format_info.get('versionCode') is not None:
- format_id = '%s-%s' % (quality, format_info['versionCode'])
- else:
- format_id = quality
info = {
- 'format_id': format_id,
- 'format_note': format_info.get('versionLibelle'),
- 'width': format_info.get('width'),
- 'height': height,
+ 'format_id': format_info['format_id'],
+ 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
+ 'width': int_or_none(format_info.get('width')),
+ 'height': int_or_none(format_info.get('height')),
+ 'tbr': int_or_none(format_info.get('bitrate')),
}
if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
new file mode 100644
index 000000000..6232d2cd0
--- /dev/null
+++ b/youtube_dl/extractor/audiomack.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .soundcloud import SoundcloudIE
+from ..utils import ExtractorError
+
+import time
+
+
+class AudiomackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack'
+ _TESTS = [
+ #hosted on audiomack
+ {
+ 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
+ 'info_dict':
+ {
+ 'id' : 'roosh-williams/extraordinary',
+ 'ext': 'mp3',
+ 'title': 'Roosh Williams - Extraordinary'
+ }
+ },
+ #hosted on soundcloud via audiomack
+ {
+ 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
+ 'file': '172419696.mp3',
+ 'info_dict':
+ {
+ 'ext': 'mp3',
+ 'title': 'Young Thug ft Lil Wayne - Take Kare',
+ "upload_date": "20141016",
+ "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
+ "uploader": "Young Thug World"
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_response = self._download_json(
+ "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
+ video_id, time.time()),
+ video_id)
+
+ if "url" not in api_response:
+ raise ExtractorError("Unable to deduce api url of song")
+ realurl = api_response["url"]
+
+ #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+ # - if so, pass the work off to the soundcloud extractor
+ if SoundcloudIE.suitable(realurl):
+ return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+
+ webpage = self._download_webpage(url, video_id)
+ artist = self._html_search_regex(
+ r'<span class="artist">(.*?)</span>', webpage, "artist")
+ songtitle = self._html_search_regex(
+ r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
+ webpage, "title")
+ title = artist + " - " + songtitle
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': realurl,
+ }
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
new file mode 100644
index 000000000..0269d1174
--- /dev/null
+++ b/youtube_dl/extractor/bild.py
@@ -0,0 +1,39 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class BildIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+ IE_DESC = 'Bild.de'
+ _TEST = {
+ 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+ 'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+ 'info_dict': {
+ 'id': '38184146',
+ 'ext': 'mp4',
+ 'title': 'BILD hat sie getestet',
+ 'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+ 'duration': 196,
+ 'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
+ doc = self._download_xml(xml_url, video_id)
+
+ duration = int_or_none(doc.attrib.get('duration'), scale=1000)
+
+ return {
+ 'id': video_id,
+ 'title': doc.attrib['ueberschrift'],
+ 'description': doc.attrib.get('text'),
+ 'url': doc.attrib['src'],
+ 'thumbnail': doc.attrib.get('img'),
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py
index 2e277c8c3..45ba51732 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 294670386..ad22cbafd 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -87,6 +87,15 @@ class BrightcoveIE(InfoExtractor):
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
},
},
+ {
+ # playlist test
+ # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
+ 'info_dict': {
+ 'title': 'Sealife',
+ },
+ 'playlist_mincount': 7,
+ },
]
@classmethod
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index 496271be4..d064a28f9 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
- mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+ mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
@@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
- video_thumbnail = self._search_regex(
- r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
- sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
- videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
+ vidurl = self._search_regex(
+ r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+ vidid = self._search_regex(
+ r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
+ videoserver = self._html_search_regex(
+ r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
+
+ videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
formats = []
- baseurl = sd_url[:sd_url.rfind('/')+1]
+ baseurl = vidurl[:vidurl.rfind('/')+1]
for video in videolist.findall('.//video'):
src = video.get('src')
if not src:
diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py
index d4227e6eb..2edab90a3 100644
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -4,7 +4,6 @@ import json
import re
from .common import InfoExtractor
-from ..utils import int_or_none
_translation_table = {
@@ -39,9 +38,7 @@ class CliphunterIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index dae40c136..78877b1cf 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 450c7dfd6..e1bd6bb49 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -89,6 +89,10 @@ class InfoExtractor(object):
format, irrespective of the file format.
-1 for default (order by other properties),
-2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
* http_referer HTTP Referer header value to set.
* http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
@@ -238,7 +242,6 @@ class InfoExtractor(object):
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """
-
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
@@ -247,6 +250,10 @@ class InfoExtractor(object):
if urlh is False:
assert not fatal
return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+ return (content, urlh)
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -281,6 +288,12 @@ class InfoExtractor(object):
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if os.name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@ -299,7 +312,7 @@ class InfoExtractor(object):
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
- return (content, urlh)
+ return content
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """
@@ -607,12 +620,13 @@ class InfoExtractor(object):
audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
def http_scheme(self):
- """ Either "https:" or "https:", depending on the user's preferences """
+ """ Either "http:" or "https:", depending on the user's preferences """
return (
'http:'
if self._downloader.params.get('prefer_insecure', False)
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py
index ffbe4903b..7a7e79360 100644
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -34,6 +34,8 @@ class CondeNastIE(InfoExtractor):
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+ EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
+
_TEST = {
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
'md5': '1921f713ed48aabd715691f774c451f7',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index f99888ecc..05b21e872 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -24,6 +24,7 @@ from ..aes import (
aes_cbc_decrypt,
inc,
)
+from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor):
@@ -39,6 +40,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
+ 'url': 're:(?!.*&amp)',
},
'params': {
# rtmp
@@ -237,12 +239,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
- streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
- video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
- video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
+ streamdata = self._download_xml(
+ streamdata_req, video_id,
+ note='Downloading media info for %s' % video_format)
+ video_url = streamdata.find('.//host').text
+ video_play_path = streamdata.find('.//file').text
formats.append({
'url': video_url,
- 'play_path': video_play_path,
+ 'play_path': video_play_path,
'ext': 'flv',
'format': video_format,
'format_id': video_format,
@@ -285,3 +289,40 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'subtitles': subtitles,
'formats': formats,
}
+
+
+class CrunchyrollShowPlaylistIE(InfoExtractor):
+ IE_NAME = "crunchyroll:playlist"
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
+
+ _TESTS = [{
+ 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'info_dict': {
+ 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
+ },
+ 'playlist_count': 13,
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, show_id)
+ title = self._html_search_regex(
+ r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
+ webpage, 'title')
+ episode_paths = re.findall(
+ r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
+ webpage)
+ entries = [
+ self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
+ for ep in episode_paths
+ ]
+ entries.reverse()
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': title,
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index c6ab6952e..3c39ca451 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -1,49 +1,48 @@
# encoding: utf-8
-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
-)
class FazIE(InfoExtractor):
- IE_NAME = u'faz.net'
+ IE_NAME = 'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
_TEST = {
- u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
- u'file': u'12610585.mp4',
- u'info_dict': {
- u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
- u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
+ 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
+ 'info_dict': {
+ 'id': '12610585',
+ 'ext': 'mp4',
+ 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
+ 'description': 'md5:1453fbf9a0d041d985a47306192ea253',
},
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- self.to_screen(video_id)
+ video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
- config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
- u'config xml url')
- config = self._download_xml(config_xml_url, video_id,
- u'Downloading config xml')
+ config_xml_url = self._search_regex(
+ r'writeFLV\(\'(.+?)\',', webpage, 'config xml url')
+ config = self._download_xml(
+ config_xml_url, video_id, 'Downloading config xml')
encodings = config.find('ENCODINGS')
formats = []
- for code in ['LOW', 'HIGH', 'HQ']:
+ for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
- 'ext': determine_ext(encoding_url),
'format_id': code.lower(),
+ 'quality': pref,
})
+ self._sort_formats(formats)
- descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
+ descr = self._html_search_regex(
+ r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 0b3374d97..35d7d15e1 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
f4m_format['preference'] = 1
formats.extend(f4m_formats)
elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id))
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
@@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
formats.append({
'url': video_url,
'format_id': format_id,
- 'preference': 2,
+ 'preference': -1,
})
self._sort_formats(formats)
@@ -93,7 +93,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
- 'md5': '9cecf35f99c4079c199e9817882a9a1c',
'info_dict': {
'id': '84981923',
'ext': 'flv',
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index d966e8403..ec6d96ada 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
+ links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
if not links:
raise ExtractorError('No media links available for %s' % video_id)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index dfc2ef4e7..35a7664b2 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -28,6 +28,7 @@ from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
+from .condenast import CondeNastIE
class GenericIE(InfoExtractor):
@@ -324,7 +325,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'age_limit': 18,
'uploader': 'www.handjobhub.com',
- 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
+ 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
}
},
# RSS feed
@@ -379,6 +380,32 @@ class GenericIE(InfoExtractor):
'uploader': 'education-portal.com',
},
},
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ },
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented'
+ ],
+ }
]
def report_following_redirect(self, new_url):
@@ -475,7 +502,8 @@ class GenericIE(InfoExtractor):
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
- assert ':' in default_search
+ if ':' not in default_search:
+ default_search += ':'
return self.url_result(default_search + url)
url, smuggled_data = unsmuggle_url(url)
@@ -490,14 +518,14 @@ class GenericIE(InfoExtractor):
self.to_screen('%s: Requesting header' % video_id)
head_req = HEADRequest(url)
- response = self._request_webpage(
+ head_response = self._request_webpage(
head_req, video_id,
note=False, errnote='Could not send HEAD request to %s' % url,
fatal=False)
- if response is not False:
+ if head_response is not False:
# Check for redirect
- new_url = response.geturl()
+ new_url = head_response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
@@ -505,34 +533,35 @@ class GenericIE(InfoExtractor):
new_url, {'force_videoid': force_videoid})
return self.url_result(new_url)
- # Check for direct link to a video
- content_type = response.headers.get('Content-Type', '')
- m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
- if m:
- upload_date = response.headers.get('Last-Modified')
- if upload_date:
- upload_date = unified_strdate(upload_date)
- return {
- 'id': video_id,
- 'title': os.path.splitext(url_basename(url))[0],
- 'formats': [{
- 'format_id': m.group('format_id'),
- 'url': url,
- 'vcodec': 'none' if m.group('type') == 'audio' else None
- }],
- 'upload_date': upload_date,
- }
+ full_response = None
+ if head_response is False:
+ full_response = self._request_webpage(url, video_id)
+ head_response = full_response
+
+ # Check for direct link to a video
+ content_type = head_response.headers.get('Content-Type', '')
+ m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
+ if m:
+ upload_date = unified_strdate(
+ head_response.headers.get('Last-Modified'))
+ return {
+ 'id': video_id,
+ 'title': os.path.splitext(url_basename(url))[0],
+ 'formats': [{
+ 'format_id': m.group('format_id'),
+ 'url': url,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }],
+ 'upload_date': upload_date,
+ }
if not self._downloader.params.get('test', False) and not is_intentional:
self._downloader.report_warning('Falling back on generic information extractor.')
- try:
+ if full_response:
+ webpage = self._webpage_read_content(full_response, url, video_id)
+ else:
webpage = self._download_webpage(url, video_id)
- except ValueError:
- # since this is the last-resort InfoExtractor, if
- # this error is thrown, it'll be thrown here
- raise ExtractorError('Failed to download URL: %s' % url)
-
self.report_extraction(video_id)
# Is it an RSS feed?
@@ -608,13 +637,13 @@ class GenericIE(InfoExtractor):
if mobj:
player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
- return self.url_result(surl, 'Vimeo')
+ return self.url_result(surl)
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
- r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
- return self.url_result(mobj.group(1), 'Vimeo')
+ return self.url_result(mobj.group(1))
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
@@ -622,7 +651,8 @@ class GenericIE(InfoExtractor):
<iframe[^>]+?src=|
data-video-url=|
<embed[^>]+?src=|
- embedSWF\(?:\s*
+ embedSWF\(?:\s*|
+ new\s+SWFObject\(
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
@@ -651,17 +681,20 @@ class GenericIE(InfoExtractor):
# Look for embedded Wistia player
match = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
+ embed_url = self._proto_relative_url(
+ unescapeHTML(match.group('url')))
return {
'_type': 'url_transparent',
- 'url': unescapeHTML(match.group('url')),
+ 'url': embed_url,
'ie_key': 'Wistia',
'uploader': video_uploader,
'title': video_title,
'id': video_id,
}
- match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+
+ match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
@@ -847,6 +880,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'MLB')
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ webpage)
+ if mobj is not None:
+ return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
+
def check_video(vurl):
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py
new file mode 100644
index 000000000..9561ed5fb
--- /dev/null
+++ b/youtube_dl/extractor/glide.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GlideIE(InfoExtractor):
+ IE_DESC = 'Glide mobile video messages (glide.me)'
+ _VALID_URL = r'https?://share\.glide\.me/(?P<id>[A-Za-z0-9\-=_+]+)'
+ _TEST = {
+ 'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'md5': '4466372687352851af2d131cfaa8a4c7',
+ 'info_dict': {
+ 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'ext': 'mp4',
+ 'title': 'Damon Timm\'s Glide message',
+ 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(
+ r'<title>(.*?)</title>', webpage, 'title')
+ video_url = self.http_scheme() + self._search_regex(
+ r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
+ thumbnail_url = self._search_regex(
+ r'<img id="video-thumbnail" src="(.*?)"',
+ webpage, 'thumbnail url', fatal=False)
+ thumbnail = (
+ thumbnail_url if thumbnail_url is None
+ else self.http_scheme() + thumbnail_url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py
index 07d994b44..fcefe54cd 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -1,13 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-import datetime
import re
+import codecs
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
+from ..utils import unified_strdate
class GooglePlusIE(InfoExtractor):
@@ -19,74 +17,57 @@ class GooglePlusIE(InfoExtractor):
'info_dict': {
'id': 'ZButuJc6CtH',
'ext': 'flv',
+ 'title': '嘆きの天使 降臨',
'upload_date': '20120613',
'uploader': '井上ヨシマサ',
- 'title': '嘆きの天使 降臨',
}
}
def _real_extract(self, url):
- # Extract id from URL
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
# Step 1, Retrieve post webpage to extract further information
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
- self.report_extraction(video_id)
-
- # Extract update date
- upload_date = self._html_search_regex(
+ title = self._og_search_description(webpage).splitlines()[0]
+ upload_date = unified_strdate(self._html_search_regex(
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
- webpage, 'upload date', fatal=False, flags=re.VERBOSE)
- if upload_date:
- # Convert timestring to a format suitable for filename
- upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
- upload_date = upload_date.strftime('%Y%m%d')
-
- # Extract uploader
- uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
- webpage, 'uploader', fatal=False)
-
- # Extract title
- # Get the first line for title
- video_title = self._og_search_description(webpage).splitlines()[0]
+ webpage, 'upload date', fatal=False, flags=re.VERBOSE))
+ uploader = self._html_search_regex(
+ r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
# Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com/'
- video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
+ video_page = self._search_regex(
+ r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
webpage, 'video page URL')
if not video_page.startswith(DOMAIN):
video_page = DOMAIN + video_page
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
- # Extract video links all sizes
- pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
- mobj = re.findall(pattern, webpage)
- if len(mobj) == 0:
- raise ExtractorError('Unable to extract video links')
-
- # Sort in resolution
- links = sorted(mobj)
+ def unicode_escape(s):
+ decoder = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4,}',
+ lambda m: decoder(m.group(0))[0],
+ s)
- # Choose the lowest of the sort, i.e. highest resolution
- video_url = links[-1]
- # Only get the url. The resolution part in the tuple has no use anymore
- video_url = video_url[-1]
- # Treat escaped \u0026 style hex
- try:
- video_url = video_url.decode("unicode_escape")
- except AttributeError: # Python 3
- video_url = bytes(video_url, 'ascii').decode('unicode-escape')
+ # Extract video links all sizes
+ formats = [{
+ 'url': unicode_escape(video_url),
+ 'ext': 'flv',
+ 'width': int(width),
+ 'height': int(height),
+ } for width, height, video_url in re.findall(
+ r'\d+,(\d+),(\d+),"(https?://redirector\.googlevideo\.com.*?)"', webpage)]
+ self._sort_formats(formats)
return {
'id': video_id,
- 'url': video_url,
+ 'title': title,
'uploader': uploader,
'upload_date': upload_date,
- 'title': video_title,
- 'ext': 'flv',
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py
index 45cca1d24..e21e57510 100644
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -46,9 +46,9 @@ class GorillaVidIE(InfoExtractor):
'info_dict': {
'id': '3rso4kdn6f9m',
'ext': 'mp4',
- 'title': 'Micro Pig piglets ready on 16th July 2009',
+ 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': 're:http://.*\.jpg',
- },
+ }
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
index 5bdd08afa..b6cc15b6f 100644
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@@ -1,37 +1,33 @@
# -*- coding: utf-8 -*-
-
-import re
-import json
+from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import determine_ext
+
class HarkIE(InfoExtractor):
- _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+ _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'
_TEST = {
- u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
- u'file': u'mmbzyhkgny.mp3',
- u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
- u'info_dict': {
- u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
- u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
- u'duration': 11,
+ 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+ 'md5': '6783a58491b47b92c7c1af5a77d4cbee',
+ 'info_dict': {
+ 'id': 'mmbzyhkgny',
+ 'ext': 'mp3',
+ 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
+ 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+ 'duration': 11,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
- json_url = "http://www.hark.com/clips/%s.json" %(video_id)
- info_json = self._download_webpage(json_url, video_id)
- info = json.loads(info_json)
- final_url = info['url']
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'http://www.hark.com/clips/%s.json' % video_id, video_id)
- return {'id': video_id,
- 'url' : final_url,
- 'title': info['name'],
- 'ext': determine_ext(final_url),
- 'description': info['description'],
- 'thumbnail': info['image_original'],
- 'duration': info['duration'],
- }
+ return {
+ 'id': video_id,
+ 'url': data['url'],
+ 'title': data['name'],
+ 'description': data.get('description'),
+ 'thumbnail': data.get('image_original'),
+ 'duration': data.get('duration'),
+ }
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py
index f97b1e085..d41c0413f 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
get_meta_content,
+ int_or_none,
parse_iso8601,
)
@@ -28,20 +29,26 @@ class HeiseIE(InfoExtractor):
'timestamp': 1411812600,
'upload_date': '20140927',
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
+ 'thumbnail': 're:https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
- json_url = self._search_regex(
- r'json_url:\s*"([^"]+)"', webpage, 'json URL')
- config = self._download_json(json_url, video_id)
+
+ container_id = self._search_regex(
+ r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
+ webpage, 'container ID')
+ sequenz_id = self._search_regex(
+ r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
+ webpage, 'sequenz ID')
+ data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
+ doc = self._download_xml(data_url, video_id)
info = {
'id': video_id,
- 'thumbnail': config.get('poster'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
'description': self._og_search_description(webpage),
}
@@ -49,32 +56,19 @@ class HeiseIE(InfoExtractor):
title = get_meta_content('fulltitle', webpage)
if title:
info['title'] = title
- elif config.get('title'):
- info['title'] = config['title']
else:
info['title'] = self._og_search_title(webpage)
formats = []
- for t, rs in config['formats'].items():
- if not rs or not hasattr(rs, 'items'):
- self._downloader.report_warning(
- 'formats: {0}: no resolutions'.format(t))
- continue
-
- for height_str, obj in rs.items():
- format_id = '{0}_{1}'.format(t, height_str)
-
- if not obj or not obj.get('url'):
- self._downloader.report_warning(
- 'formats: {0}: no url'.format(format_id))
- continue
-
- formats.append({
- 'url': obj['url'],
- 'format_id': format_id,
- 'height': self._int(height_str, 'height'),
- })
-
+ for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
+ label = source_node.attrib['label']
+ height = int_or_none(self._search_regex(
+ r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
+ formats.append({
+ 'url': source_node.attrib['file'],
+ 'format_note': label,
+ 'height': height,
+ })
self._sort_formats(formats)
info['formats'] = formats
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
index 68684b997..fccc23884 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -28,13 +28,13 @@ class HowStuffWorksIE(InfoExtractor):
}
},
{
- 'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
+ 'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
'info_dict': {
- 'id': '553470',
- 'display_id': 'deadliest-catch-jakes-farewell-pots',
+ 'id': '453464',
+ 'display_id': 'survival-zone-food-and-water-in-the-savanna',
'ext': 'mp4',
- 'title': 'Deadliest Catch: Jake\'s Farewell Pots',
- 'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
+ 'title': 'Survival Zone: Food and Water In the Savanna',
+ 'description': 'md5:7e1c89f6411434970c15fa094170c371',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py
index 94e7cf790..4ccf6b9b8 100644
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -33,8 +33,7 @@ class HuffPostIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
data = self._download_json(api_url, video_id)['data']
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py
deleted file mode 100644
index 27017e89f..000000000
--- a/youtube_dl/extractor/justintv.py
+++ /dev/null
@@ -1,155 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-import json
-import os
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- compat_str,
- ExtractorError,
- formatSeconds,
-)
-
-
-class JustinTVIE(InfoExtractor):
- """Information extractor for justin.tv and twitch.tv"""
- # TODO: One broadcast may be split into multiple videos. The key
- # 'broadcast_id' is the same for all parts, and 'broadcast_part'
- # starts at 1 and increases. Can we treat all parts as one video?
-
- _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
- (?:
- (?P<channelid>[^/]+)|
- (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
- (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
- )
- /?(?:\#.*)?$
- """
- _JUSTIN_PAGE_LIMIT = 100
- IE_NAME = 'justin.tv'
- IE_DESC = 'justin.tv and twitch.tv'
- _TEST = {
- 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
- 'md5': 'ecaa8a790c22a40770901460af191c9a',
- 'info_dict': {
- 'id': '296128360',
- 'ext': 'flv',
- 'upload_date': '20110927',
- 'uploader_id': 25114803,
- 'uploader': 'thegamedevhub',
- 'title': 'Beginner Series - Scripting With Python Pt.1'
- }
- }
-
- # Return count of items, list of *valid* items
- def _parse_page(self, url, video_id, counter):
- info_json = self._download_webpage(
- url, video_id,
- 'Downloading video info JSON on page %d' % counter,
- 'Unable to download video info JSON %d' % counter)
-
- response = json.loads(info_json)
- if type(response) != list:
- error_text = response.get('error', 'unknown error')
- raise ExtractorError('Justin.tv API: %s' % error_text)
- info = []
- for clip in response:
- video_url = clip['video_file_url']
- if video_url:
- video_extension = os.path.splitext(video_url)[1][1:]
- video_date = re.sub('-', '', clip['start_time'][:10])
- video_uploader_id = clip.get('user_id', clip.get('channel_id'))
- video_id = clip['id']
- video_title = clip.get('title', video_id)
- info.append({
- 'id': compat_str(video_id),
- 'url': video_url,
- 'title': video_title,
- 'uploader': clip.get('channel_name', video_uploader_id),
- 'uploader_id': video_uploader_id,
- 'upload_date': video_date,
- 'ext': video_extension,
- })
- return (len(response), info)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- api_base = 'http://api.justin.tv'
- paged = False
- if mobj.group('channelid'):
- paged = True
- video_id = mobj.group('channelid')
- api = api_base + '/channel/archives/%s.json' % video_id
- elif mobj.group('chapterid'):
- chapter_id = mobj.group('chapterid')
-
- webpage = self._download_webpage(url, chapter_id)
- m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
- if not m:
- raise ExtractorError('Cannot find archive of a chapter')
- archive_id = m.group(1)
-
- api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
- doc = self._download_xml(
- api, chapter_id,
- note='Downloading chapter information',
- errnote='Chapter information download failed')
- for a in doc.findall('.//archive'):
- if archive_id == a.find('./id').text:
- break
- else:
- raise ExtractorError('Could not find chapter in chapter information')
-
- video_url = a.find('./video_file_url').text
- video_ext = video_url.rpartition('.')[2] or 'flv'
-
- chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
- chapter_info = self._download_json(
- chapter_api_url, 'c' + chapter_id,
- note='Downloading chapter metadata',
- errnote='Download of chapter metadata failed')
-
- bracket_start = int(doc.find('.//bracket_start').text)
- bracket_end = int(doc.find('.//bracket_end').text)
-
- # TODO determine start (and probably fix up file)
- # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
- #video_url += '?start=' + TODO:start_timestamp
- # bracket_start is 13290, but we want 51670615
- self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
- 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
- info = {
- 'id': 'c' + chapter_id,
- 'url': video_url,
- 'ext': video_ext,
- 'title': chapter_info['title'],
- 'thumbnail': chapter_info['preview'],
- 'description': chapter_info['description'],
- 'uploader': chapter_info['channel']['display_name'],
- 'uploader_id': chapter_info['channel']['name'],
- }
- return info
- else:
- video_id = mobj.group('videoid')
- api = api_base + '/broadcast/by_archive/%s.json' % video_id
-
- entries = []
- offset = 0
- limit = self._JUSTIN_PAGE_LIMIT
- for counter in itertools.count(1):
- page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
- page_count, page_info = self._parse_page(
- page_url, video_id, counter)
- entries.extend(page_info)
- if not paged or page_count != limit:
- break
- offset += limit
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
index 56a76380c..827091e60 100644
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@@ -1,8 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -21,22 +19,17 @@ class KickStarterIE(InfoExtractor):
}, {
'note': 'Embedded video (not using the native kickstarter video service)',
'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
- 'playlist': [
- {
- 'info_dict': {
- 'id': '78704821',
- 'ext': 'mp4',
- 'uploader_id': 'pebble',
- 'uploader': 'Pebble Technology',
- 'title': 'Pebble iOS Notifications',
- }
- }
- ],
+ 'info_dict': {
+ 'id': '78704821',
+ 'ext': 'mp4',
+ 'uploader_id': 'pebble',
+ 'uploader': 'Pebble Technology',
+ 'title': 'Pebble iOS Notifications',
+ }
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py
index 5341ac773..8a73ecfa0 100644
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -34,7 +34,7 @@ class KontrTubeIE(InfoExtractor):
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex(
- r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
+ r'<title>(.+?)</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description')
mobj = re.search(
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 516147417..363a12ad0 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -190,7 +190,8 @@ class LivestreamOriginalIE(InfoExtractor):
'id': video_id,
'title': item.find('title').text,
'url': 'rtmp://extondemand.livestream.com/ondemand',
- 'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
+ 'play_path': 'trans/dv15/mogulus-{0}'.format(path),
+ 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
'ext': 'flv',
'thumbnail': thumbnail_url,
}
diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py
index fca0bfef0..db5df4078 100644
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dl/extractor/lrt.py
@@ -22,7 +22,7 @@ class LRTIE(InfoExtractor):
'id': '54391',
'ext': 'mp4',
'title': 'Septynios Kauno dienos',
- 'description': 'Kauno miesto ir apskrities naujienos',
+ 'description': 'md5:24d84534c7dc76581e59f5689462411a',
'duration': 1783,
},
'params': {
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 979f3d692..6691521e5 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -6,6 +6,7 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
+ compat_urlparse,
get_element_by_attribute,
parse_duration,
strip_jsonp,
@@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):
).replace('\'', '"')
embed_data = json.loads(embed_data_json)
- info_url = embed_data['flashvars']['host']
+ domain = embed_data['mediaUrl']
+ if not domain.startswith('http'):
+ # only happens in telecinco.es videos
+ domain = 'http://' + domain
+ info_url = compat_urlparse.urljoin(
+ domain,
+ compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+ )
info_el = self._download_xml(info_url, episode).find('./video/info')
video_link = info_el.find('videoUrl/link').text
token_query = compat_urllib_parse.urlencode({'id': video_link})
token_info = self._download_json(
- 'http://token.mitele.es/?' + token_query, episode,
+ embed_data['flashvars']['ov_tk'] + '?' + token_query,
+ episode,
transform_source=strip_jsonp
)
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 520f27fca..bb8937c4d 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):
},
}
- def check_urls(self, url_list):
- """Returns 1st active url from list"""
- for url in url_list:
+ def _get_url(self, track_id, template_url):
+ server_count = 30
+ for i in range(server_count):
+ url = template_url % i
try:
# We only want to know if the request succeed
# don't download the whole file
- self._request_webpage(HEADRequest(url), None, False)
+ self._request_webpage(
+ HEADRequest(url), track_id,
+ 'Checking URL %d/%d ...' % (i + 1, server_count + 1))
return url
except ExtractorError:
- url = None
+ pass
return None
- def _get_url(self, template_url):
- return self.check_urls(template_url % i for i in range(30))
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group(1)
@@ -61,16 +61,16 @@ class MixcloudIE(InfoExtractor):
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
- final_song_url = self._get_url(template_url)
+ final_song_url = self._get_url(track_id, template_url)
if final_song_url is None:
self.to_screen('Trying with m4a extension')
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
- final_song_url = self._get_url(template_url)
+ final_song_url = self._get_url(track_id, template_url)
if final_song_url is None:
raise ExtractorError('Unable to extract track url')
PREFIX = (
- r'<div class="cloudcast-play-button-container"'
+ r'<div class="cloudcast-play-button-container[^"]*?"'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 6229b2173..3621ff99e 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -5,20 +5,20 @@ import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
+ str_to_int,
unified_strdate,
)
class MotherlessIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
+ _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
_TESTS = [
{
'url': 'http://motherless.com/AC3FFE1',
- 'md5': '5527fef81d2e529215dad3c2d744a7d9',
+ 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
'info_dict': {
'id': 'AC3FFE1',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
@@ -40,33 +40,51 @@ class MotherlessIE(InfoExtractor):
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
+ },
+ {
+ 'url': 'http://motherless.com/g/cosplay/633979F',
+ 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+ 'info_dict': {
+ 'id': '633979F',
+ 'ext': 'mp4',
+ 'title': 'Turtlette',
+ 'categories': ['superheroine heroine superher'],
+ 'upload_date': '20140827',
+ 'uploader_id': 'shade0230',
+ 'thumbnail': 're:http://.*\.jpg',
+ 'age_limit': 18,
+ }
}
]
- def _real_extract(self,url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
-
- video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
+ title = self._html_search_regex(
+ r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
+ video_url = self._html_search_regex(
+ r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
age_limit = self._rta_search(webpage)
-
- view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
+ view_count = str_to_int(self._html_search_regex(
+ r'<strong>Views</strong>\s+([^<]+)<',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._html_search_regex(
+ r'<strong>Favorited</strong>\s+([^<]+)<',
+ webpage, 'like count', fatal=False))
- upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
+ upload_date = self._html_search_regex(
+ r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
if 'Ago' in upload_date:
days = int(re.search(r'([0-9]+)', upload_date).group(1))
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
else:
upload_date = unified_strdate(upload_date)
- like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
-
comment_count = webpage.count('class="media-comment-contents"')
- uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
+ uploader_id = self._html_search_regex(
+ r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+ webpage, 'uploader_id')
categories = self._html_search_meta('keywords', webpage)
if categories:
@@ -79,8 +97,8 @@ class MotherlessIE(InfoExtractor):
'uploader_id': uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
'categories': categories,
- 'view_count': int_or_none(view_count.replace(',', '')),
- 'like_count': int_or_none(like_count.replace(',', '')),
+ 'view_count': view_count,
+ 'like_count': like_count,
'comment_count': comment_count,
'age_limit': age_limit,
'url': video_url,
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index e75ab7c39..7b5449031 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -26,8 +26,7 @@ class NBCIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
if theplatform_url.startswith('//'):
@@ -57,7 +56,7 @@ class NBCNewsIE(InfoExtractor):
'md5': 'b2421750c9f260783721d898f4c42063',
'info_dict': {
'id': 'I1wpAI_zmhsQ',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
},
@@ -97,6 +96,8 @@ class NBCNewsIE(InfoExtractor):
]
for base_url in base_urls:
+ if not base_url:
+ continue
playlist_url = base_url + '?form=MPXNBCNewsAPI'
all_videos = self._download_json(playlist_url, title)['videos']
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 94d5ba982..add4b3e5d 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -18,16 +18,16 @@ class NDRIE(InfoExtractor):
_TESTS = [
{
- 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
- 'md5': '4a4eeafd17c3058b65f0c8f091355855',
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
+ 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
'note': 'Video file',
'info_dict': {
- 'id': '325',
+ 'id': '25866',
'ext': 'mp4',
- 'title': 'Blaue Bohnen aus Blocken',
- 'description': 'md5:190d71ba2ccddc805ed01547718963bc',
- 'duration': 1715,
- },
+ 'title': 'Kartoffeltage in der Lewitz',
+ 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
+ 'duration': 166,
+ }
},
{
'url': 'http://www.ndr.de/info/audio51535.html',
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py
index 072d9cf8e..82af6e330 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_urllib_parse,
- determine_ext,
unified_strdate,
)
@@ -22,21 +21,23 @@ class NHLBaseInfoExtractor(InfoExtractor):
self.report_extraction(video_id)
initial_video_url = info['publishPoint']
- data = compat_urllib_parse.urlencode({
- 'type': 'fvod',
- 'path': initial_video_url.replace('.mp4', '_sd.mp4'),
- })
- path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
- path_doc = self._download_xml(
- path_url, video_id, 'Downloading final video url')
- video_url = path_doc.find('path').text
+ if info['formats'] == '1':
+ data = compat_urllib_parse.urlencode({
+ 'type': 'fvod',
+ 'path': initial_video_url.replace('.mp4', '_sd.mp4'),
+ })
+ path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
+ path_doc = self._download_xml(
+ path_url, video_id, 'Downloading final video url')
+ video_url = path_doc.find('path').text
+ else:
+ video_url = initial_video_url
join = compat_urlparse.urljoin
return {
'id': video_id,
'title': info['name'],
'url': video_url,
- 'ext': determine_ext(video_url),
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
@@ -46,10 +47,11 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com'
- _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
_TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+ 'md5': 'db704a4ea09e8d3988c85e36cc892d09',
'info_dict': {
'id': '453614',
'ext': 'mp4',
@@ -59,6 +61,17 @@ class NHLIE(NHLBaseInfoExtractor):
'upload_date': '20131006',
},
}, {
+ 'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
+ 'md5': 'd22e82bc592f52d37d24b03531ee9696',
+ 'info_dict': {
+ 'id': '2014020024-628-h',
+ 'ext': 'mp4',
+ 'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
+ 'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
+ 'duration': 0,
+ 'upload_date': '20141011',
+ },
+ }, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True,
}]
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index c0c139b5d..7b85589b7 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -39,18 +39,17 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
- # Determine whether the downloader uses authentication to download video
- _AUTHENTICATE = False
+ # Determine whether the downloader used authentication to download video
+ _AUTHENTICATED = False
def _real_initialize(self):
- if self._downloader.params.get('username', None) is not None:
- self._AUTHENTICATE = True
-
- if self._AUTHENTICATE:
- self._login()
+ self._login()
def _login(self):
(username, password) = self._get_login_info()
+ # No authentication to be performed
+ if not username:
+ return True
# Log in
login_form_strs = {
@@ -68,6 +67,8 @@ class NiconicoIE(InfoExtractor):
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username or password')
return False
+ # Successful login
+ self._AUTHENTICATED = True
return True
def _real_extract(self, url):
@@ -82,7 +83,7 @@ class NiconicoIE(InfoExtractor):
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note='Downloading video info page')
- if self._AUTHENTICATE:
+ if self._AUTHENTICATED:
# Get flv info
flv_info_webpage = self._download_webpage(
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 8f140d626..6118ed5c2 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140122',
}
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+ 'info_dict': {
+ 'id': 'united-states-of-secrets',
+ },
+ 'playlist_count': 2,
}
-
]
def _extract_webpage(self, url):
@@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
webpage, 'upload date', default=None))
+ # tabbed frontline videos
+ tabbed_videos = re.findall(
+ r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+ if tabbed_videos:
+ return tabbed_videos, presumptive_id, upload_date
+
MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
r'class="coveplayerid">([^<]+)<', # coveplayer
@@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id, upload_date = self._extract_webpage(url)
+ if isinstance(video_id, list):
+ entries = [self.url_result(
+ 'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+ for vid_id in video_id]
+ return self.playlist_result(entries, display_id)
+
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 4118ee956..618e8f5dd 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -16,13 +16,14 @@ from ..aes import (
class PornHubIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
+ _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
- 'file': '648719015.mp4',
'md5': '882f488fa1f0026f023f33576004a2ed',
'info_dict': {
- "uploader": "BABES-COM",
+ 'id': '648719015',
+ 'ext': 'mp4',
+ "uploader": "Babes",
"title": "Seductive Indian beauty strips down and fingers her pink pussy",
"age_limit": 18
}
@@ -35,9 +36,7 @@ class PornHubIE(InfoExtractor):
return count
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('videoid')
- url = 'http://www.' + mobj.group('url')
+ video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -45,7 +44,7 @@ class PornHubIE(InfoExtractor):
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
- r'(?s)From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+ r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
index 463e85501..7fcde086c 100644
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -14,7 +14,6 @@ from ..utils import (
class PromptFileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
- _FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
_TEST = {
'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
'md5': 'd1451b6302da7215485837aaea882c4c',
@@ -27,11 +26,10 @@ class PromptFileIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
+ if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index a45884b25..1a41cbe40 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -81,7 +81,7 @@ class RTLnowIE(InfoExtractor):
'id': '99205',
'ext': 'flv',
'title': 'Medicopter 117 - Angst!',
- 'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
+ 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
'upload_date': '20080928',
'duration': 2691,
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
index 55b58e5e6..0e470e73f 100644
--- a/youtube_dl/extractor/ruhd.py
+++ b/youtube_dl/extractor/ruhd.py
@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -21,19 +19,20 @@ class RUHDIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
- r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
+ r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>',
+ webpage, 'title')
description = self._html_search_regex(
- r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
+ r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
+ webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
- r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
+ r'<param name="previewImage" value="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ruhd.ru' + thumbnail
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py
new file mode 100644
index 000000000..c833fc8ee
--- /dev/null
+++ b/youtube_dl/extractor/sexykarma.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ parse_duration,
+ int_or_none,
+)
+
+
+class SexyKarmaIE(InfoExtractor):
+ IE_DESC = 'Sexy Karma and Watch Indian Porn'
+ _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
+ 'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
+ 'info_dict': {
+ 'id': 'yHI70cOyIHt',
+ 'display_id': 'taking-a-quick-pee',
+ 'ext': 'mp4',
+ 'title': 'Taking a quick pee.',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'wildginger7',
+ 'upload_date': '20141007',
+ 'duration': 22,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
+ 'md5': 'dd216c68d29b49b12842b9babe762a5d',
+ 'info_dict': {
+ 'id': '8Id6EZPbuHf',
+ 'display_id': 'pot-pixie-tribute',
+ 'ext': 'mp4',
+ 'title': 'pot_pixie tribute',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'banffite',
+ 'upload_date': '20141013',
+ 'duration': 16,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
+ 'md5': '9afb80675550406ed9a63ac2819ef69d',
+ 'info_dict': {
+ 'id': 'dW2mtctxJfs',
+ 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
+ 'ext': 'mp4',
+ 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'Don',
+ 'upload_date': '20140213',
+ 'duration': 83,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._html_search_regex(
+ r"url: escape\('([^']+)'\)", webpage, 'url')
+
+ title = self._html_search_regex(
+ r'<h2 class="he2"><span>(.*?)</span>',
+ webpage, 'title')
+ thumbnail = self._html_search_regex(
+ r'<span id="container"><img\s+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ uploader = self._html_search_regex(
+ r'class="aupa">\s*(.*?)</a>',
+ webpage, 'uploader')
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
+
+ duration = parse_duration(self._search_regex(
+ r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+ webpage, 'comment count', fatal=False))
+
+ categories = re.findall(
+ r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
+ webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ }
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 4719ba45c..54256e1a2 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -40,14 +40,15 @@ class SoundcloudIE(InfoExtractor):
_TESTS = [
{
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- 'file': '62986583.mp3',
'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
'info_dict': {
- "upload_date": "20121011",
- "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
- "uploader": "E.T. ExTerrestrial Music",
- "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1",
- "duration": 143,
+ 'id': '62986583',
+ 'ext': 'mp3',
+ 'upload_date': '20121011',
+ 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
+ 'uploader': 'E.T. ExTerrestrial Music',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
+ 'duration': 143,
}
},
# not streamable song
@@ -103,7 +104,7 @@ class SoundcloudIE(InfoExtractor):
'id': '128590877',
'ext': 'mp3',
'title': 'Bus Brakes',
- 'description': 'md5:0170be75dd395c96025d210d261c784e',
+ 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples',
'upload_date': '20140109',
'duration': 17,
@@ -140,6 +141,7 @@ class SoundcloudIE(InfoExtractor):
'description': info['description'],
'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
}
formats = []
if info.get('downloadable', False):
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py
index 19cc976e3..becdf658f 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
- int_or_none,
)
@@ -26,7 +25,6 @@ class SportBoxIE(InfoExtractor):
'timestamp': 1411896237,
'upload_date': '20140928',
'duration': 4846,
- 'view_count': int,
},
'params': {
# m3u8 download
@@ -65,8 +63,6 @@ class SportBoxIE(InfoExtractor):
r'<span itemprop="uploadDate">([^<]+)</span>', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._html_search_regex(
r'<meta itemprop="duration" content="PT([^"]+)">', webpage, 'duration', fatal=False))
- view_count = int_or_none(self._html_search_regex(
- r'<span>Просмотров: (\d+)</span>', player, 'view count', fatal=False))
return {
'id': video_id,
@@ -76,6 +72,5 @@ class SportBoxIE(InfoExtractor):
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
- 'view_count': view_count,
'formats': formats,
}
diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py
new file mode 100644
index 000000000..d92d14d65
--- /dev/null
+++ b/youtube_dl/extractor/srmediathek.py
@@ -0,0 +1,43 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class SRMediathekIE(InfoExtractor):
+ IE_DESC = 'Süddeutscher Rundfunk'
+ _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
+ 'info_dict': {
+ 'id': '28455',
+ 'ext': 'mp4',
+ 'title': 'sportarena (26.10.2014)',
+ 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ urls = json.loads(js_to_json(self._search_regex(
+ r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs')))
+ formats = [{'url': url} for url in urls]
+ self._sort_formats(formats)
+
+ title = json.loads(js_to_json(self._search_regex(
+ r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py
index f76b6e2b2..5ca079f88 100644
--- a/youtube_dl/extractor/syfy.py
+++ b/youtube_dl/extractor/syfy.py
@@ -10,7 +10,6 @@ class SyfyIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
- 'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
'info_dict': {
'id': 'NmqMrGnXvmO1',
'ext': 'flv',
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index d5e28efad..cd4af96fd 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):
'title': 'Who are the hackers?',
},
'playlist_mincount': 6,
+ }, {
+ # contains a youtube video
+ 'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_ZG8HBuDjgc',
+ 'ext': 'mp4',
+ 'title': 'Douglas Adams: Parrots the Universe and Everything',
+ 'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
+ 'uploader': 'University of California Television (UCTV)',
+ 'uploader_id': 'UCtelevision',
+ 'upload_date': '20080522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
_NATIVE_FORMATS = {
@@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0]
+ if talk_info.get('external') is not None:
+ self.to_screen('Found video from %s' % talk_info['external']['service'])
+ return {
+ '_type': 'url',
+ 'url': talk_info['external']['uri'],
+ }
+
formats = [{
'url': format_url,
'format_id': format_id,
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
new file mode 100644
index 000000000..db9788c18
--- /dev/null
+++ b/youtube_dl/extractor/telecinco.py
@@ -0,0 +1,19 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .mitele import MiTeleIE
+
+
+class TelecincoIE(MiTeleIE):
+ IE_NAME = 'telecinco.es'
+ _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+
+ _TEST = {
+ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+ 'info_dict': {
+ 'id': 'MDSVID20141015_0058',
+ 'ext': 'mp4',
+ 'title': 'Con Martín Berasategui, hacer un bacalao al ...',
+ 'duration': 662,
+ },
+ }
diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py
new file mode 100644
index 000000000..b65d8e03f
--- /dev/null
+++ b/youtube_dl/extractor/theonion.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TheOnionIE(InfoExtractor):
+ _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+ _TEST = {
+ 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
+ 'md5': '19eaa9a39cf9b9804d982e654dc791ee',
+ 'info_dict': {
+ 'id': '2133',
+ 'ext': 'mp4',
+ 'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
+ 'description': 'md5:cc12448686b5600baae9261d3e180910',
+ 'thumbnail': 're:^https?://.*\.jpg\?\d+$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ article_id = mobj.group('article_id')
+
+ webpage = self._download_webpage(url, article_id)
+
+ video_id = self._search_regex(
+ r'"videoId":\s(\d+),', webpage, 'video ID')
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
+ if not sources:
+ raise ExtractorError(
+ 'No sources found for video %s' % video_id, expected=True)
+
+ formats = []
+ for src, type_ in sources:
+ if type_ == 'video/mp4':
+ formats.append({
+ 'format_id': 'mp4_sd',
+ 'preference': 1,
+ 'url': src,
+ })
+ elif type_ == 'video/webm':
+ formats.append({
+ 'format_id': 'webm_sd',
+ 'preference': 0,
+ 'url': src,
+ })
+ elif type_ == 'application/x-mpegURL':
+ formats.extend(
+ self._extract_m3u8_formats(src, video_id, preference=-1))
+ else:
+ self.report_warning(
+ 'Encountered unexpected format: %s' % type_)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 0be793b1c..a04925633 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -6,6 +6,7 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_str,
+ determine_ext,
ExtractorError,
xpath_with_ns,
)
@@ -34,10 +35,21 @@ class ThePlatformIE(InfoExtractor):
'skip_download': True,
},
}
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ if mobj.group('config'):
+ config_url = url+ '&form=json'
+ config_url = config_url.replace('swf/', 'config/')
+ config_url = config_url.replace('onsite/', 'onsite/config/')
+ config = self._download_json(config_url, video_id, 'Downloading config')
+ smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
+ else:
+ smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
+ 'format=smil&mbr=true'.format(video_id))
- def _get_info(self, video_id, smil_url):
- meta = self._download_xml(smil_url, video_id)
+ meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(
n.attrib['abstract']
@@ -89,10 +101,14 @@ class ThePlatformIE(InfoExtractor):
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int(attr['system-bitrate']) // 1000
+ ext = determine_ext(attr['src'])
+ if ext == 'once':
+ ext = 'mp4'
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
+ 'ext': ext,
})
self._sort_formats(formats)
@@ -104,17 +120,3 @@ class ThePlatformIE(InfoExtractor):
'thumbnail': info['defaultThumbnailUrl'],
'duration': info['duration']//1000,
}
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- if mobj.group('config'):
- config_url = url+ '&form=json'
- config_url = config_url.replace('swf/', 'config/')
- config_url = config_url.replace('onsite/', 'onsite/config/')
- config = self._download_json(config_url, video_id, 'Downloading config')
- smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
- else:
- smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
- 'format=smil&mbr=true'.format(video_id))
- return self._get_info(video_id, smil_url)
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index 306fe8974..40c53ff17 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -4,9 +4,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
class TumblrIE(InfoExtractor):
@@ -18,7 +15,7 @@ class TumblrIE(InfoExtractor):
'id': '54196191430',
'ext': 'mp4',
'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
- 'description': 'md5:dfac39636969fe6bf1caa2d50405f069',
+ 'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
'thumbnail': 're:http://.*\.jpg',
}
}, {
@@ -27,7 +24,7 @@ class TumblrIE(InfoExtractor):
'info_dict': {
'id': '90208453769',
'ext': 'mp4',
- 'title': '5SOS STRUM ;)',
+ 'title': '5SOS STRUM ;]',
'description': 'md5:dba62ac8639482759c8eb10ce474586a',
'thumbnail': 're:http://.*\.jpg',
}
@@ -41,18 +38,12 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage = self._download_webpage(url, video_id)
- re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
- video = re.search(re_video, webpage)
- if video is None:
- raise ExtractorError('Unable to extract video')
- video_url = video.group('video_url')
- ext = video.group('ext')
-
- video_thumbnail = self._search_regex(
- r'posters.*?\[\\x22(.*?)\\x22',
- webpage, 'thumbnail', fatal=False) # We pick the first poster
- if video_thumbnail:
- video_thumbnail = video_thumbnail.replace('\\\\/', '/')
+ iframe_url = self._search_regex(
+ r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
+ webpage, 'iframe url')
+ iframe = self._download_webpage(iframe_url, video_id)
+ video_url = self._search_regex(r'<source src="([^"]+)"',
+ iframe, 'video url')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
@@ -62,9 +53,9 @@ class TumblrIE(InfoExtractor):
return {
'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'description': self._html_search_meta('description', webpage),
- 'thumbnail': video_thumbnail,
- 'ext': ext,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
new file mode 100644
index 000000000..36aa1ad6e
--- /dev/null
+++ b/youtube_dl/extractor/twitch.py
@@ -0,0 +1,187 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class TwitchIE(InfoExtractor):
+ # TODO: One broadcast may be split into multiple videos. The key
+ # 'broadcast_id' is the same for all parts, and 'broadcast_part'
+ # starts at 1 and increases. Can we treat all parts as one video?
+ _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
+ (?:
+ (?P<channelid>[^/]+)|
+ (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
+ (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
+ )
+ /?(?:\#.*)?$
+ """
+ _PAGE_LIMIT = 100
+ _API_BASE = 'https://api.twitch.tv'
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/riotgames/b/577357806',
+ 'info_dict': {
+ 'id': 'a577357806',
+ 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+ 'info_dict': {
+ 'id': 'c5285812',
+ 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://www.twitch.tv/vanillatv',
+ 'info_dict': {
+ 'id': 'vanillatv',
+ 'title': 'VanillaTV',
+ },
+ 'playlist_mincount': 412,
+ }]
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
+ expected=True)
+
+ def _download_json(self, url, video_id, note='Downloading JSON metadata'):
+ response = super(TwitchIE, self)._download_json(url, video_id, note)
+ self._handle_error(response)
+ return response
+
+ def _extract_media(self, item, item_id):
+ ITEMS = {
+ 'a': 'video',
+ 'c': 'chapter',
+ }
+ info = self._extract_info(self._download_json(
+ '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+ 'Downloading %s info JSON' % ITEMS[item]))
+ response = self._download_json(
+ '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+ 'Downloading %s playlist JSON' % ITEMS[item])
+ entries = []
+ chunks = response['chunks']
+ qualities = list(chunks.keys())
+ for num, fragment in enumerate(zip(*chunks.values()), start=1):
+ formats = []
+ for fmt_num, fragment_fmt in enumerate(fragment):
+ format_id = qualities[fmt_num]
+ fmt = {
+ 'url': fragment_fmt['url'],
+ 'format_id': format_id,
+ 'quality': 1 if format_id == 'live' else 0,
+ }
+ m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+ if m:
+ fmt['height'] = int(m.group('height'))
+ formats.append(fmt)
+ self._sort_formats(formats)
+ entry = dict(info)
+ entry['id'] = '%s_%d' % (entry['id'], num)
+ entry['title'] = '%s part %d' % (entry['title'], num)
+ entry['formats'] = formats
+ entries.append(entry)
+ return self.playlist_result(entries, info['id'], info['title'])
+
+ def _extract_info(self, info):
+ return {
+ 'id': info['_id'],
+ 'title': info['title'],
+ 'description': info['description'],
+ 'duration': info['length'],
+ 'thumbnail': info['preview'],
+ 'uploader': info['channel']['display_name'],
+ 'uploader_id': info['channel']['name'],
+ 'timestamp': parse_iso8601(info['recorded_at']),
+ 'view_count': info['views'],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj.group('chapterid'):
+ return self._extract_media('c', mobj.group('chapterid'))
+
+ """
+ webpage = self._download_webpage(url, chapter_id)
+ m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+ if not m:
+ raise ExtractorError('Cannot find archive of a chapter')
+ archive_id = m.group(1)
+
+ api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
+ doc = self._download_xml(
+ api, chapter_id,
+ note='Downloading chapter information',
+ errnote='Chapter information download failed')
+ for a in doc.findall('.//archive'):
+ if archive_id == a.find('./id').text:
+ break
+ else:
+ raise ExtractorError('Could not find chapter in chapter information')
+
+ video_url = a.find('./video_file_url').text
+ video_ext = video_url.rpartition('.')[2] or 'flv'
+
+ chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+ chapter_info = self._download_json(
+ chapter_api_url, 'c' + chapter_id,
+ note='Downloading chapter metadata',
+ errnote='Download of chapter metadata failed')
+
+ bracket_start = int(doc.find('.//bracket_start').text)
+ bracket_end = int(doc.find('.//bracket_end').text)
+
+ # TODO determine start (and probably fix up file)
+ # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
+ #video_url += '?start=' + TODO:start_timestamp
+ # bracket_start is 13290, but we want 51670615
+ self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+ 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+
+ info = {
+ 'id': 'c' + chapter_id,
+ 'url': video_url,
+ 'ext': video_ext,
+ 'title': chapter_info['title'],
+ 'thumbnail': chapter_info['preview'],
+ 'description': chapter_info['description'],
+ 'uploader': chapter_info['channel']['display_name'],
+ 'uploader_id': chapter_info['channel']['name'],
+ }
+ return info
+ """
+ elif mobj.group('videoid'):
+ return self._extract_media('a', mobj.group('videoid'))
+ elif mobj.group('channelid'):
+ channel_id = mobj.group('channelid')
+ info = self._download_json(
+ '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+ channel_id, 'Downloading channel info JSON')
+ channel_name = info.get('display_name') or info.get('name')
+ entries = []
+ offset = 0
+ limit = self._PAGE_LIMIT
+ for counter in itertools.count(1):
+ response = self._download_json(
+ '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
+ % (self._API_BASE, channel_id, offset, limit),
+ channel_id, 'Downloading channel videos JSON page %d' % counter)
+ videos = response['videos']
+ if not videos:
+ break
+ entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
+ offset += limit
+ return self.playlist_result(entries, channel_id, channel_name)
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 994b60a76..cee1ea8f6 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -72,7 +72,7 @@ class UstreamChannelIE(InfoExtractor):
'info_dict': {
'id': '10874166',
},
- 'playlist_mincount': 54,
+ 'playlist_mincount': 17,
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 964470070..d3fa70e0e 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):
'info_dict': {
'id': '84196',
'ext': 'mp4',
- 'title': 'Hevnen er søt episode 10: Abu',
+ 'title': 'Hevnen er søt episode 1:10 - Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 648.000,
@@ -67,9 +67,7 @@ class VGTVIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
data = self._download_json(
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
video_id, 'Downloading media JSON')
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
index 9328ef4a2..0faa729c6 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -1,55 +1,85 @@
-import json
-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
class ViddlerIE(InfoExtractor):
- _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TEST = {
- u"url": u"http://www.viddler.com/v/43903784",
- u'file': u'43903784.mp4',
- u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
- u'info_dict': {
- u"title": u"Video Made Easy",
- u"uploader": u"viddler",
- u"duration": 100.89,
+ "url": "http://www.viddler.com/v/43903784",
+ 'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
+ 'info_dict': {
+ 'id': '43903784',
+ 'ext': 'mp4',
+ "title": "Video Made Easy",
+ 'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
+ "uploader": "viddler",
+ 'timestamp': 1335371429,
+ 'upload_date': '20120425',
+ "duration": 100.89,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- embed_url = mobj.group('domain') + u'/embed/' + video_id
- webpage = self._download_webpage(embed_url, video_id)
-
- video_sources_code = self._search_regex(
- r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
- video_sources = json.loads(video_sources_code.replace("'", '"'))
-
- formats = [{
- 'url': video_url,
- 'format': format_id,
- } for video_url, format_id in video_sources.items()]
-
- title = self._html_search_regex(
- r"title\s*:\s*'([^']*)'", webpage, u'title')
- uploader = self._html_search_regex(
- r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
- duration_s = self._html_search_regex(
- r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
- duration = float(duration_s) if duration_s else None
- thumbnail = self._html_search_regex(
- r"thumbnail\s*:\s*'([^']*)'",
- webpage, u'thumbnail', fatal=False)
+ video_id = self._match_id(url)
+
+ json_url = (
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
+ video_id)
+ data = self._download_json(json_url, video_id)['video']
+
+ formats = []
+ for filed in data['files']:
+ if filed.get('status', 'ready') != 'ready':
+ continue
+ f = {
+ 'format_id': filed['profile_id'],
+ 'format_note': filed['profile_name'],
+ 'url': self._proto_relative_url(filed['url']),
+ 'width': int_or_none(filed.get('width')),
+ 'height': int_or_none(filed.get('height')),
+ 'filesize': int_or_none(filed.get('size')),
+ 'ext': filed.get('ext'),
+ 'source_preference': -1,
+ }
+ formats.append(f)
+
+ if filed.get('cdn_url'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(filed['cdn_url'])
+ f['format_id'] = filed['profile_id'] + '-cdn'
+ f['source_preference'] = 1
+ formats.append(f)
+
+ if filed.get('html5_video_source'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(
+ filed['html5_video_source'])
+ f['format_id'] = filed['profile_id'] + '-html5'
+ f['source_preference'] = 0
+ formats.append(f)
+ self._sort_formats(formats)
+
+ categories = [
+ t.get('text') for t in data.get('tags', []) if 'text' in t]
return {
'_type': 'video',
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
+ 'title': data['title'],
'formats': formats,
+ 'description': data.get('description'),
+ 'timestamp': int_or_none(data.get('upload_time')),
+ 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
+ 'uploader': data.get('author'),
+ 'duration': float_or_none(data.get('length')),
+ 'view_count': int_or_none(data.get('view_count')),
+ 'categories': categories,
}
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
new file mode 100644
index 000000000..669979e13
--- /dev/null
+++ b/youtube_dl/extractor/vidzi.py
@@ -0,0 +1,33 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class VidziIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
+ _TEST = {
+ 'url': 'http://vidzi.tv/cghql9yq6emu.html',
+ 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+ 'info_dict': {
+ 'id': 'cghql9yq6emu',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._html_search_regex(
+ r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
+ title = self._html_search_regex(
+ r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
+ \ No newline at end of file
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index d2c36b58a..d9cad0ea5 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -8,13 +8,11 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
- clean_html,
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
ExtractorError,
- get_element_by_attribute,
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
@@ -56,7 +54,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'''(?x)
- (?P<proto>(?:https?:)?//)?
+ https?://
(?:(?:www|(?P<player>player))\.)?
vimeo(?P<pro>pro)?\.com/
(?!channels/[^/?#]+/?(?:$|[?#])|album/)
@@ -157,6 +155,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': 62,
}
},
+ {
+ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+ 'url': 'https://player.vimeo.com/video/98044508',
+ 'note': 'The js code contains assignments to the same variable as the config',
+ 'info_dict': {
+ 'id': '98044508',
+ 'ext': 'mp4',
+ 'title': 'Pier Solar OUYA Official Trailer',
+ 'uploader': 'Tulio Gonçalves',
+ 'uploader_id': 'user28849593',
+ },
+ },
]
def _verify_video_password(self, url, video_id, webpage):
@@ -244,7 +254,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
# We try to find out to which variable is assigned the config dic
m_variable_name = re.search('(\w)\.video\.id', webpage)
if m_variable_name is not None:
- config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
+ config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config = self._search_regex(config_re, webpage, 'info section',
@@ -502,7 +512,7 @@ class VimeoReviewIE(InfoExtractor):
'info_dict': {
'id': '91613211',
'ext': 'mp4',
- 'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
+ 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
'thumbnail': 're:^https?://.*\.jpg$',
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index e7754158d..42995226e 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -70,7 +70,7 @@ class VineUserIE(InfoExtractor):
'info_dict': {
'id': 'Visa',
},
- 'playlist_mincount': 47,
+ 'playlist_mincount': 46,
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py
new file mode 100644
index 000000000..57ef8dc30
--- /dev/null
+++ b/youtube_dl/extractor/vrt.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class VRTIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
+ _TESTS = [
+ # deredactie.be
+ {
+ 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
+ 'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
+ 'info_dict': {
+ 'id': '2129880',
+ 'ext': 'flv',
+ 'title': 'Het journaal L - 25/10/14',
+ 'description': None,
+ 'timestamp': 1414271750.949,
+ 'upload_date': '20141025',
+ 'duration': 929,
+ }
+ },
+ # sporza.be
+ {
+ 'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
+ 'md5': '11f53088da9bf8e7cfc42456697953ff',
+ 'info_dict': {
+ 'id': '2124639',
+ 'ext': 'flv',
+ 'title': 'Bekijk Extra Time van 20 oktober',
+ 'description': 'md5:83ac5415a4f1816c6a93f8138aef2426',
+ 'timestamp': 1413835980.560,
+ 'upload_date': '20141020',
+ 'duration': 3238,
+ }
+ },
+ # cobra.be
+ {
+ 'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
+ 'md5': '78a2b060a5083c4f055449a72477409d',
+ 'info_dict': {
+ 'id': '2126050',
+ 'ext': 'flv',
+ 'title': 'Bret Easton Ellis in Café Corsari',
+ 'description': 'md5:f699986e823f32fd6036c1855a724ee9',
+ 'timestamp': 1413967500.494,
+ 'upload_date': '20141022',
+ 'duration': 661,
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
+
+ formats = []
+ mobj = re.search(
+ r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
+ webpage)
+ if mobj:
+ formats.extend(self._extract_m3u8_formats(
+ '%s/%s' % (mobj.group('server'), mobj.group('path')),
+ video_id, 'mp4'))
+ mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
+ if mobj:
+ formats.extend(self._extract_f4m_formats(
+ '%s/manifest.f4m' % mobj.group('src'), video_id))
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = float_or_none(self._search_regex(
+ r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
+ duration = float_or_none(self._search_regex(
+ r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ } \ No newline at end of file
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index 273d93d9e..c3bb9b2cf 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -20,7 +20,7 @@ class XTubeIE(InfoExtractor):
'id': 'kVTUy_G222_',
'ext': 'mp4',
'title': 'strange erotica',
- 'description': 'surreal gay themed erotica...almost an ET kind of thing',
+ 'description': 'http://www.xtube.com an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'age_limit': 18,
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py
index 944d7da38..9cd7989cc 100644
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@@ -13,7 +13,6 @@ class YnetIE(InfoExtractor):
_TESTS = [
{
'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
- 'md5': '4b29cb57c3dddd57642b3f051f535b07',
'info_dict': {
'id': 'L-11659-99244',
'ext': 'flv',
@@ -22,7 +21,6 @@ class YnetIE(InfoExtractor):
}
}, {
'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
- 'md5': '8194c2ea221e9a639cac96b6b0753dc5',
'info_dict': {
'id': 'L-8859-84418',
'ext': 'flv',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9041cfa87..40fe4662a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -185,14 +185,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._download_webpage(
req, None,
- note='Confirming age', errnote='Unable to confirm age')
- return True
+ note='Confirming age', errnote='Unable to confirm age',
+ fatal=False)
def _real_initialize(self):
if self._downloader is None:
return
- if not self._set_language():
- return
+ if self._get_login_info()[0] is not None:
+ if not self._set_language():
+ return
if not self._login():
return
self._confirm_age()
@@ -286,6 +287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
@@ -938,7 +940,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest
- if (self._downloader.params.get('youtube_include_dash_manifest', False)):
+ if self._downloader.params.get('youtube_include_dash_manifest', True):
try:
# The DASH manifest used needs to be the one from the original video_webpage.
# The one found in get_video_info seems to be using different signatures.
@@ -1055,7 +1057,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
- 'title': 'Team Fortress 2 (Class-based LP)',
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
},
'playlist_mincount': 26,
}, {
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f651337ad..98e20d549 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -6,6 +6,8 @@ import shlex
import sys
from .utils import (
+ compat_expanduser,
+ compat_getenv,
get_term_width,
write_string,
)
@@ -27,19 +29,19 @@ def parseOpts(overrideArguments=None):
return res
def _readUserConf():
- xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+ xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
- userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
userConf = _readOptions(userConfFile, None)
if userConf is None:
- appdata_dir = os.environ.get('appdata')
+ appdata_dir = compat_getenv('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
@@ -51,11 +53,11 @@ def parseOpts(overrideArguments=None):
if userConf is None:
userConf = _readOptions(
- os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+ os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
- os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+ os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
@@ -75,7 +77,8 @@ def parseOpts(overrideArguments=None):
if len(opts) > 1:
opts.insert(1, ', ')
- if option.takes_value(): opts.append(' %s' % option.metavar)
+ if option.takes_value():
+ opts.append(' %s' % option.metavar)
return "".join(opts)
@@ -87,68 +90,69 @@ def parseOpts(overrideArguments=None):
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
try:
i = opts.index(private_opt)
- opts[i+1] = 'PRIVATE'
+ opts[i + 1] = 'PRIVATE'
except ValueError:
pass
return opts
- max_width = 80
- max_help_position = 80
-
# No need to wrap help messages if we're on a wide console
columns = get_term_width()
- if columns: max_width = columns
+ max_width = columns if columns else 80
+ max_help_position = 80
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
fmt.format_option_strings = _format_option_string
kw = {
- 'version' : __version__,
- 'formatter' : fmt,
- 'usage' : '%prog [options] url [url...]',
- 'conflict_handler' : 'resolve',
+ 'version': __version__,
+ 'formatter': fmt,
+ 'usage': '%prog [options] url [url...]',
+ 'conflict_handler': 'resolve',
}
parser = optparse.OptionParser(**kw)
- # option groups
- general = optparse.OptionGroup(parser, 'General Options')
- selection = optparse.OptionGroup(parser, 'Video Selection')
- authentication = optparse.OptionGroup(parser, 'Authentication Options')
- video_format = optparse.OptionGroup(parser, 'Video Format Options')
- subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
- downloader = optparse.OptionGroup(parser, 'Download Options')
- postproc = optparse.OptionGroup(parser, 'Post-processing Options')
- filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
- workarounds = optparse.OptionGroup(parser, 'Workarounds')
- verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
-
- general.add_option('-h', '--help',
- action='help', help='print this help text and exit')
- general.add_option('-v', '--version',
- action='version', help='print program version and exit')
- general.add_option('-U', '--update',
- action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
- general.add_option('-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
- general.add_option('--abort-on-error',
- action='store_false', dest='ignoreerrors',
- help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
- general.add_option('--dump-user-agent',
- action='store_true', dest='dump_user_agent',
- help='display the current browser identification', default=False)
- general.add_option('--list-extractors',
- action='store_true', dest='list_extractors',
- help='List all supported extractors and the URLs they would handle', default=False)
- general.add_option('--extractor-descriptions',
- action='store_true', dest='list_extractor_descriptions',
- help='Output descriptions of all supported extractors', default=False)
+ general = optparse.OptionGroup(parser, 'General Options')
+ general.add_option(
+ '-h', '--help',
+ action='help',
+ help='print this help text and exit')
+ general.add_option(
+ '-v', '--version',
+ action='version',
+ help='print program version and exit')
+ general.add_option(
+ '-U', '--update',
+ action='store_true', dest='update_self',
+ help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+ general.add_option(
+ '-i', '--ignore-errors',
+ action='store_true', dest='ignoreerrors', default=False,
+ help='continue on download errors, for example to skip unavailable videos in a playlist')
+ general.add_option(
+ '--abort-on-error',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+ general.add_option(
+ '--dump-user-agent',
+ action='store_true', dest='dump_user_agent', default=False,
+ help='display the current browser identification')
general.add_option(
- '--proxy', dest='proxy', default=None, metavar='URL',
+ '--list-extractors',
+ action='store_true', dest='list_extractors', default=False,
+ help='List all supported extractors and the URLs they would handle')
+ general.add_option(
+ '--extractor-descriptions',
+ action='store_true', dest='list_extractor_descriptions', default=False,
+ help='Output descriptions of all supported extractors')
+ general.add_option(
+ '--proxy', dest='proxy',
+ default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option(
- '--socket-timeout', dest='socket_timeout',
- type=float, default=None, help=u'Time to wait before giving up, in seconds')
+ '--socket-timeout',
+ dest='socket_timeout', type=float, default=None,
+ help='Time to wait before giving up, in seconds')
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
@@ -157,7 +161,13 @@ def parseOpts(overrideArguments=None):
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist',
+ default=False,
+ help='Do not extract the videos of a playlist, only list them.')
+ selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -166,245 +176,375 @@ def parseOpts(overrideArguments=None):
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='playlist video to end at (default is last)')
- selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
- selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
- selection.add_option('--max-downloads', metavar='NUMBER',
- dest='max_downloads', type=int, default=None,
- help='Abort after downloading NUMBER files')
- selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
- selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
- selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option(
- '--datebefore', metavar='DATE', dest='datebefore', default=None,
+ '--match-title',
+ dest='matchtitle', metavar='REGEX',
+ help='download only matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--reject-title',
+ dest='rejecttitle', metavar='REGEX',
+ help='skip download for matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--max-downloads',
+ dest='max_downloads', metavar='NUMBER', type=int, default=None,
+ help='Abort after downloading NUMBER files')
+ selection.add_option(
+ '--min-filesize',
+ metavar='SIZE', dest='min_filesize', default=None,
+ help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--max-filesize',
+ metavar='SIZE', dest='max_filesize', default=None,
+ help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--date',
+ metavar='DATE', dest='date', default=None,
+ help='download only videos uploaded in this date')
+ selection.add_option(
+ '--datebefore',
+ metavar='DATE', dest='datebefore', default=None,
help='download only videos uploaded on or before this date (i.e. inclusive)')
selection.add_option(
- '--dateafter', metavar='DATE', dest='dateafter', default=None,
+ '--dateafter',
+ metavar='DATE', dest='dateafter', default=None,
help='download only videos uploaded on or after this date (i.e. inclusive)')
selection.add_option(
- '--min-views', metavar='COUNT', dest='min_views',
- default=None, type=int,
- help="Do not download any videos with less than COUNT views",)
+ '--min-views',
+ metavar='COUNT', dest='min_views', default=None, type=int,
+ help='Do not download any videos with less than COUNT views',)
selection.add_option(
- '--max-views', metavar='COUNT', dest='max_views',
- default=None, type=int,
- help="Do not download any videos with more than COUNT views",)
- selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
- selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
- help='download only videos suitable for the given age',
- default=None, type=int)
- selection.add_option('--download-archive', metavar='FILE',
- dest='download_archive',
- help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+ '--max-views',
+ metavar='COUNT', dest='max_views', default=None, type=int,
+ help='Do not download any videos with more than COUNT views')
selection.add_option(
- '--include-ads', dest='include_ads',
- action='store_true',
- help='Download advertisements as well (experimental)')
+ '--no-playlist',
+ action='store_true', dest='noplaylist', default=False,
+ help='download only the currently playing video')
+ selection.add_option(
+ '--age-limit',
+ metavar='YEARS', dest='age_limit', default=None, type=int,
+ help='download only videos suitable for the given age')
+ selection.add_option(
+ '--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
selection.add_option(
- '--youtube-include-dash-manifest', action='store_true',
- dest='youtube_include_dash_manifest', default=False,
- help='Try to download the DASH manifest on YouTube videos (experimental)')
-
- authentication.add_option('-u', '--username',
- dest='username', metavar='USERNAME', help='account username')
- authentication.add_option('-p', '--password',
- dest='password', metavar='PASSWORD', help='account password')
- authentication.add_option('-2', '--twofactor',
- dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
- authentication.add_option('-n', '--netrc',
- action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
- authentication.add_option('--video-password',
- dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
-
-
- video_format.add_option('-f', '--format',
- action='store', dest='format', metavar='FORMAT', default=None,
- help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
- video_format.add_option('--all-formats',
- action='store_const', dest='format', help='download all available video formats', const='all')
- video_format.add_option('--prefer-free-formats',
- action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
- video_format.add_option('--max-quality',
- action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
- video_format.add_option('-F', '--list-formats',
- action='store_true', dest='listformats', help='list all available formats')
-
- subtitles.add_option('--write-sub', '--write-srt',
- action='store_true', dest='writesubtitles',
- help='write subtitle file', default=False)
- subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
- action='store_true', dest='writeautomaticsub',
- help='write automatic subtitle file (youtube only)', default=False)
- subtitles.add_option('--all-subs',
- action='store_true', dest='allsubtitles',
- help='downloads all the available subtitles of the video', default=False)
- subtitles.add_option('--list-subs',
- action='store_true', dest='listsubtitles',
- help='lists all available subtitles for the video', default=False)
- subtitles.add_option('--sub-format',
- action='store', dest='subtitlesformat', metavar='FORMAT',
- help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
- subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
- action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
- default=[], callback=_comma_separated_values_options_callback,
- help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
-
- downloader.add_option('-r', '--rate-limit',
- dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
- downloader.add_option('-R', '--retries',
- dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
- downloader.add_option('--buffer-size',
- dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
- downloader.add_option('--no-resize-buffer',
- action='store_true', dest='noresizebuffer',
- help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
- downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+ '--include-ads',
+ dest='include_ads', action='store_true',
+ help='Download advertisements as well (experimental)')
+ authentication = optparse.OptionGroup(parser, 'Authentication Options')
+ authentication.add_option(
+ '-u', '--username',
+ dest='username', metavar='USERNAME',
+ help='login with this account ID')
+ authentication.add_option(
+ '-p', '--password',
+ dest='password', metavar='PASSWORD',
+ help='account password')
+ authentication.add_option(
+ '-2', '--twofactor',
+ dest='twofactor', metavar='TWOFACTOR',
+ help='two-factor auth code')
+ authentication.add_option(
+ '-n', '--netrc',
+ action='store_true', dest='usenetrc', default=False,
+ help='use .netrc authentication data')
+ authentication.add_option(
+ '--video-password',
+ dest='videopassword', metavar='PASSWORD',
+ help='video password (vimeo, smotri)')
+
+ video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ video_format.add_option(
+ '-f', '--format',
+ action='store', dest='format', metavar='FORMAT', default=None,
+ help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
+ video_format.add_option(
+ '--all-formats',
+ action='store_const', dest='format', const='all',
+ help='download all available video formats')
+ video_format.add_option(
+ '--prefer-free-formats',
+ action='store_true', dest='prefer_free_formats', default=False,
+ help='prefer free video formats unless a specific one is requested')
+ video_format.add_option(
+ '--max-quality',
+ action='store', dest='format_limit', metavar='FORMAT',
+ help='highest quality format to download')
+ video_format.add_option(
+ '-F', '--list-formats',
+ action='store_true', dest='listformats',
+ help='list all available formats')
+ video_format.add_option(
+ '--youtube-include-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--youtube-skip-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help='Do not download the DASH manifest on YouTube videos')
+
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
+ subtitles.add_option(
+ '--write-sub', '--write-srt',
+ action='store_true', dest='writesubtitles', default=False,
+ help='write subtitle file')
+ subtitles.add_option(
+ '--write-auto-sub', '--write-automatic-sub',
+ action='store_true', dest='writeautomaticsub', default=False,
+ help='write automatic subtitle file (youtube only)')
+ subtitles.add_option(
+ '--all-subs',
+ action='store_true', dest='allsubtitles', default=False,
+ help='downloads all the available subtitles of the video')
+ subtitles.add_option(
+ '--list-subs',
+ action='store_true', dest='listsubtitles', default=False,
+ help='lists all available subtitles for the video')
+ subtitles.add_option(
+ '--sub-format',
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
+ help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+ subtitles.add_option(
+ '--sub-lang', '--sub-langs', '--srt-lang',
+ action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+ default=[], callback=_comma_separated_values_options_callback,
+ help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+
+ downloader = optparse.OptionGroup(parser, 'Download Options')
+ downloader.add_option(
+ '-r', '--rate-limit',
+ dest='ratelimit', metavar='LIMIT',
+ help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ downloader.add_option(
+ '-R', '--retries',
+ dest='retries', metavar='RETRIES', default=10,
+ help='number of retries (default is %default)')
+ downloader.add_option(
+ '--buffer-size',
+ dest='buffersize', metavar='SIZE', default='1024',
+ help='size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ downloader.add_option(
+ '--no-resize-buffer',
+ action='store_true', dest='noresizebuffer', default=False,
+ help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ downloader.add_option(
+ '--test',
+ action='store_true', dest='test', default=False,
+ help=optparse.SUPPRESS_HELP)
+
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
- '--encoding', dest='encoding', metavar='ENCODING',
+ '--encoding',
+ dest='encoding', metavar='ENCODING',
help='Force the specified encoding (experimental)')
workarounds.add_option(
- '--no-check-certificate', action='store_true',
- dest='no_check_certificate', default=False,
+ '--no-check-certificate',
+ action='store_true', dest='no_check_certificate', default=False,
help='Suppress HTTPS certificate validation.')
workarounds.add_option(
- '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+ '--prefer-insecure',
+ '--prefer-unsecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
workarounds.add_option(
- '--user-agent', metavar='UA',
- dest='user_agent', help='specify a custom user agent')
+ '--user-agent',
+ metavar='UA', dest='user_agent',
+ help='specify a custom user agent')
workarounds.add_option(
- '--referer', metavar='REF',
- dest='referer', default=None,
+ '--referer',
+ metavar='URL', dest='referer', default=None,
help='specify a custom referer, use if the video access is restricted to one domain',
)
workarounds.add_option(
- '--add-header', metavar='FIELD:VALUE',
- dest='headers', action='append',
+ '--add-header',
+ metavar='FIELD:VALUE', dest='headers', action='append',
help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
)
workarounds.add_option(
- '--bidi-workaround', dest='bidi_workaround', action='store_true',
- help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+ '--bidi-workaround',
+ dest='bidi_workaround', action='store_true',
+ help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
- verbosity.add_option('-q', '--quiet',
- action='store_true', dest='quiet', help='activates quiet mode', default=False)
+ verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+ verbosity.add_option(
+ '-q', '--quiet',
+ action='store_true', dest='quiet', default=False,
+ help='activates quiet mode')
verbosity.add_option(
'--no-warnings',
dest='no_warnings', action='store_true', default=False,
help='Ignore warnings')
- verbosity.add_option('-s', '--simulate',
- action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
- verbosity.add_option('--skip-download',
- action='store_true', dest='skip_download', help='do not download the video', default=False)
- verbosity.add_option('-g', '--get-url',
- action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
- verbosity.add_option('-e', '--get-title',
- action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
- verbosity.add_option('--get-id',
- action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
- verbosity.add_option('--get-thumbnail',
- action='store_true', dest='getthumbnail',
- help='simulate, quiet but print thumbnail URL', default=False)
- verbosity.add_option('--get-description',
- action='store_true', dest='getdescription',
- help='simulate, quiet but print video description', default=False)
- verbosity.add_option('--get-duration',
- action='store_true', dest='getduration',
- help='simulate, quiet but print video length', default=False)
- verbosity.add_option('--get-filename',
- action='store_true', dest='getfilename',
- help='simulate, quiet but print output filename', default=False)
- verbosity.add_option('--get-format',
- action='store_true', dest='getformat',
- help='simulate, quiet but print output format', default=False)
- verbosity.add_option('-j', '--dump-json',
- action='store_true', dest='dumpjson',
- help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
- verbosity.add_option('--newline',
- action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
- verbosity.add_option('--no-progress',
- action='store_true', dest='noprogress', help='do not print progress bar', default=False)
- verbosity.add_option('--console-title',
- action='store_true', dest='consoletitle',
- help='display progress in console titlebar', default=False)
- verbosity.add_option('-v', '--verbose',
- action='store_true', dest='verbose', help='print various debugging information', default=False)
- verbosity.add_option('--dump-intermediate-pages',
- action='store_true', dest='dump_intermediate_pages', default=False,
- help='print downloaded pages to debug problems (very verbose)')
- verbosity.add_option('--write-pages',
- action='store_true', dest='write_pages', default=False,
- help='Write downloaded intermediary pages to files in the current directory to debug problems')
- verbosity.add_option('--youtube-print-sig-code',
- action='store_true', dest='youtube_print_sig_code', default=False,
- help=optparse.SUPPRESS_HELP)
- verbosity.add_option('--print-traffic',
- dest='debug_printtraffic', action='store_true', default=False,
- help='Display sent and read HTTP traffic')
-
-
- filesystem.add_option('-a', '--batch-file',
- dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
- filesystem.add_option('--id',
- action='store_true', dest='useid', help='use only video ID in file name', default=False)
- filesystem.add_option('-A', '--auto-number',
- action='store_true', dest='autonumber',
- help='number downloaded files starting from 00000', default=False)
- filesystem.add_option('-o', '--output',
- dest='outtmpl', metavar='TEMPLATE',
- help=('output filename template. Use %(title)s to get the title, '
- '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
- '%(autonumber)s to get an automatically incremented number, '
- '%(ext)s for the filename extension, '
- '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
- '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
- '%(upload_date)s for the upload date (YYYYMMDD), '
- '%(extractor)s for the provider (youtube, metacafe, etc), '
- '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
- '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
- '%(height)s and %(width)s for the width and height of the video format. '
- '%(resolution)s for a textual description of the resolution of the video format. '
- 'Use - to output to stdout. Can also be used to download to a different directory, '
- 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
- filesystem.add_option('--autonumber-size',
- dest='autonumber_size', metavar='NUMBER',
- help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
- filesystem.add_option('--restrict-filenames',
- action='store_true', dest='restrictfilenames',
- help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
- filesystem.add_option('-t', '--title',
- action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
- filesystem.add_option('-l', '--literal',
- action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
- filesystem.add_option('-w', '--no-overwrites',
- action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
- filesystem.add_option('-c', '--continue',
- action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
- filesystem.add_option('--no-continue',
- action='store_false', dest='continue_dl',
- help='do not resume partially downloaded files (restart from beginning)')
- filesystem.add_option('--no-part',
- action='store_true', dest='nopart', help='do not use .part files', default=False)
- filesystem.add_option('--no-mtime',
- action='store_false', dest='updatetime',
- help='do not use the Last-modified header to set the file modification time', default=True)
- filesystem.add_option('--write-description',
- action='store_true', dest='writedescription',
- help='write video description to a .description file', default=False)
- filesystem.add_option('--write-info-json',
- action='store_true', dest='writeinfojson',
- help='write video metadata to a .info.json file', default=False)
- filesystem.add_option('--write-annotations',
- action='store_true', dest='writeannotations',
- help='write video annotations to a .annotation file', default=False)
- filesystem.add_option('--write-thumbnail',
- action='store_true', dest='writethumbnail',
- help='write thumbnail image to disk', default=False)
- filesystem.add_option('--load-info',
- dest='load_info_filename', metavar='FILE',
- help='json file containing the video information (created with the "--write-json" option)')
- filesystem.add_option('--cookies',
- dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
+ verbosity.add_option(
+ '-s', '--simulate',
+ action='store_true', dest='simulate', default=False,
+ help='do not download the video and do not write anything to disk',)
+ verbosity.add_option(
+ '--skip-download',
+ action='store_true', dest='skip_download', default=False,
+ help='do not download the video',)
+ verbosity.add_option(
+ '-g', '--get-url',
+ action='store_true', dest='geturl', default=False,
+ help='simulate, quiet but print URL')
+ verbosity.add_option(
+ '-e', '--get-title',
+ action='store_true', dest='gettitle', default=False,
+ help='simulate, quiet but print title')
+ verbosity.add_option(
+ '--get-id',
+ action='store_true', dest='getid', default=False,
+ help='simulate, quiet but print id')
+ verbosity.add_option(
+ '--get-thumbnail',
+ action='store_true', dest='getthumbnail', default=False,
+ help='simulate, quiet but print thumbnail URL')
+ verbosity.add_option(
+ '--get-description',
+ action='store_true', dest='getdescription', default=False,
+ help='simulate, quiet but print video description')
+ verbosity.add_option(
+ '--get-duration',
+ action='store_true', dest='getduration', default=False,
+ help='simulate, quiet but print video length')
+ verbosity.add_option(
+ '--get-filename',
+ action='store_true', dest='getfilename', default=False,
+ help='simulate, quiet but print output filename')
+ verbosity.add_option(
+ '--get-format',
+ action='store_true', dest='getformat', default=False,
+ help='simulate, quiet but print output format')
+ verbosity.add_option(
+ '-j', '--dump-json',
+ action='store_true', dest='dumpjson', default=False,
+ help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+ verbosity.add_option(
+ '-J', '--dump-single-json',
+ action='store_true', dest='dump_single_json', default=False,
+ help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ verbosity.add_option(
+ '--newline',
+ action='store_true', dest='progress_with_newline', default=False,
+ help='output progress bar as new lines')
+ verbosity.add_option(
+ '--no-progress',
+ action='store_true', dest='noprogress', default=False,
+ help='do not print progress bar')
+ verbosity.add_option(
+ '--console-title',
+ action='store_true', dest='consoletitle', default=False,
+ help='display progress in console titlebar')
+ verbosity.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='print various debugging information')
+ verbosity.add_option(
+ '--dump-intermediate-pages',
+ action='store_true', dest='dump_intermediate_pages', default=False,
+ help='print downloaded pages to debug problems (very verbose)')
+ verbosity.add_option(
+ '--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded intermediary pages to files in the current directory to debug problems')
+ verbosity.add_option(
+ '--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--print-traffic',
+ dest='debug_printtraffic', action='store_true', default=False,
+ help='Display sent and read HTTP traffic')
+
+ filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ filesystem.add_option(
+ '-a', '--batch-file',
+ dest='batchfile', metavar='FILE',
+ help='file containing URLs to download (\'-\' for stdin)')
+ filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help='use only video ID in file name')
+ filesystem.add_option(
+ '-A', '--auto-number',
+ action='store_true', dest='autonumber', default=False,
+ help='number downloaded files starting from 00000')
+ filesystem.add_option(
+ '-o', '--output',
+ dest='outtmpl', metavar='TEMPLATE',
+ help=('output filename template. Use %(title)s to get the title, '
+ '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
+ '%(autonumber)s to get an automatically incremented number, '
+ '%(ext)s for the filename extension, '
+ '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
+ '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+ '%(upload_date)s for the upload date (YYYYMMDD), '
+ '%(extractor)s for the provider (youtube, metacafe, etc), '
+ '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
+ '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
+ '%(height)s and %(width)s for the width and height of the video format. '
+ '%(resolution)s for a textual description of the resolution of the video format. '
+ 'Use - to output to stdout. Can also be used to download to a different directory, '
+ 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
+ filesystem.add_option(
+ '--autonumber-size',
+ dest='autonumber_size', metavar='NUMBER',
+ help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+ filesystem.add_option(
+ '--restrict-filenames',
+ action='store_true', dest='restrictfilenames', default=False,
+ help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+ filesystem.add_option(
+ '-t', '--title',
+ action='store_true', dest='usetitle', default=False,
+ help='[deprecated] use title in file name (default)')
+ filesystem.add_option(
+ '-l', '--literal', default=False,
+ action='store_true', dest='usetitle',
+ help='[deprecated] alias of --title')
+ filesystem.add_option(
+ '-w', '--no-overwrites',
+ action='store_true', dest='nooverwrites', default=False,
+ help='do not overwrite files')
+ filesystem.add_option(
+ '-c', '--continue',
+ action='store_true', dest='continue_dl', default=True,
+ help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+ filesystem.add_option(
+ '--no-continue',
+ action='store_false', dest='continue_dl',
+ help='do not resume partially downloaded files (restart from beginning)')
+ filesystem.add_option(
+ '--no-part',
+ action='store_true', dest='nopart', default=False,
+ help='do not use .part files - write directly into output file')
+ filesystem.add_option(
+ '--no-mtime',
+ action='store_false', dest='updatetime', default=True,
+ help='do not use the Last-modified header to set the file modification time')
+ filesystem.add_option(
+ '--write-description',
+ action='store_true', dest='writedescription', default=False,
+ help='write video description to a .description file')
+ filesystem.add_option(
+ '--write-info-json',
+ action='store_true', dest='writeinfojson', default=False,
+ help='write video metadata to a .info.json file')
+ filesystem.add_option(
+ '--write-annotations',
+ action='store_true', dest='writeannotations', default=False,
+ help='write video annotations to a .annotation file')
+ filesystem.add_option(
+ '--write-thumbnail',
+ action='store_true', dest='writethumbnail', default=False,
+ help='write thumbnail image to disk')
+ filesystem.add_option(
+ '--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='json file containing the video information (created with the "--write-json" option)')
+ filesystem.add_option(
+ '--cookies',
+ dest='cookiefile', metavar='FILE',
+ help='file to read cookies from and dump cookie jar in')
filesystem.add_option(
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -412,36 +552,61 @@ def parseOpts(overrideArguments=None):
'--no-cache-dir', action='store_const', const=False, dest='cachedir',
help='Disable filesystem caching')
filesystem.add_option(
- '--rm-cache-dir', action='store_true', dest='rm_cachedir',
+ '--rm-cache-dir',
+ action='store_true', dest='rm_cachedir',
help='Delete all filesystem cache files')
-
- postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
- help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
- postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
- postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
- help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
- postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
- help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
- postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
- help='keeps the video file on disk after the post-processing; the video is erased by default')
- postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
- help='do not overwrite post-processed files; the post-processed files are overwritten by default')
- postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
- help='embed subtitles in the video (only for mp4 videos)')
- postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
- help='embed thumbnail in the audio as cover art')
- postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
- help='write metadata to the video file')
- postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
- help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
- postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
+ postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+ postproc.add_option(
+ '-x', '--extract-audio',
+ action='store_true', dest='extractaudio', default=False,
+ help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ postproc.add_option(
+ '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+ help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+ postproc.add_option(
+ '--audio-quality', metavar='QUALITY',
+ dest='audioquality', default='5',
+ help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ postproc.add_option(
+ '--recode-video',
+ metavar='FORMAT', dest='recodevideo', default=None,
+ help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
+ postproc.add_option(
+ '-k', '--keep-video',
+ action='store_true', dest='keepvideo', default=False,
+ help='keeps the video file on disk after the post-processing; the video is erased by default')
+ postproc.add_option(
+ '--no-post-overwrites',
+ action='store_true', dest='nopostoverwrites', default=False,
+ help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+ postproc.add_option(
+ '--embed-subs',
+ action='store_true', dest='embedsubtitles', default=False,
+ help='embed subtitles in the video (only for mp4 videos)')
+ postproc.add_option(
+ '--embed-thumbnail',
+ action='store_true', dest='embedthumbnail', default=False,
+ help='embed thumbnail in the audio as cover art')
+ postproc.add_option(
+ '--add-metadata',
+ action='store_true', dest='addmetadata', default=False,
+ help='write metadata to the video file')
+ postproc.add_option(
+ '--xattrs',
+ action='store_true', dest='xattrs', default=False,
+ help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ postproc.add_option(
+ '--prefer-avconv',
+ action='store_false', dest='prefer_ffmpeg',
help='Prefer avconv over ffmpeg for running the postprocessors (default)')
- postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
+ postproc.add_option(
+ '--prefer-ffmpeg',
+ action='store_true', dest='prefer_ffmpeg',
help='Prefer ffmpeg over avconv for running the postprocessors')
postproc.add_option(
- '--exec', metavar='CMD', dest='exec_cmd',
+ '--exec',
+ metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
parser.add_option_group(general)
@@ -458,7 +623,7 @@ def parseOpts(overrideArguments=None):
if overrideArguments is not None:
opts, args = parser.parse_args(overrideArguments)
if opts.verbose:
- write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+ write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
commandLineConf = sys.argv[1:]
if '--ignore-config' in commandLineConf:
@@ -474,8 +639,8 @@ def parseOpts(overrideArguments=None):
opts, args = parser.parse_args(argv)
if opts.verbose:
- write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
- write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
- write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+ write_string('[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+ write_string('[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+ write_string('[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
index 15aa0daa9..6ac67cbae 100644
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -1,24 +1,26 @@
from .atomicparsley import AtomicParsleyPP
from .ffmpeg import (
+ FFmpegPostProcessor,
FFmpegAudioFixPP,
+ FFmpegEmbedSubtitlePP,
+ FFmpegExtractAudioPP,
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
- FFmpegExtractAudioPP,
- FFmpegEmbedSubtitlePP,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
__all__ = [
'AtomicParsleyPP',
+ 'ExecAfterDownloadPP',
'FFmpegAudioFixPP',
+ 'FFmpegEmbedSubtitlePP',
+ 'FFmpegExtractAudioPP',
'FFmpegMergerPP',
'FFmpegMetadataPP',
+ 'FFmpegPostProcessor',
'FFmpegVideoConvertor',
- 'FFmpegExtractAudioPP',
- 'FFmpegEmbedSubtitlePP',
'XAttrMetadataPP',
- 'ExecAfterDownloadPP',
]
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 8c5f7c43b..083c79592 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,4 +1,5 @@
import os
+import re
import subprocess
import sys
import time
@@ -7,10 +8,10 @@ import time
from .common import AudioConversionError, PostProcessor
from ..utils import (
- check_executable,
compat_subprocess_get_DEVNULL,
encodeArgument,
encodeFilename,
+ is_outdated_version,
PostProcessingError,
prepend_extension,
shell_quote,
@@ -18,6 +19,23 @@ from ..utils import (
)
+def get_version(executable):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ out, err = subprocess.Popen(
+ [executable, '-version'],
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
+ m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline)
+ if not m:
+ return u'present'
+ else:
+ return m.group(1)
+
+
class FFmpegPostProcessorError(PostProcessingError):
pass
@@ -25,31 +43,58 @@ class FFmpegPostProcessorError(PostProcessingError):
class FFmpegPostProcessor(PostProcessor):
def __init__(self, downloader=None, deletetempfiles=False):
PostProcessor.__init__(self, downloader)
- self._exes = self.detect_executables()
+ self._versions = self.get_versions()
self._deletetempfiles = deletetempfiles
+ def check_version(self):
+ if not self._executable:
+ raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+
+ REQUIRED_VERSION = '1.0'
+ if is_outdated_version(
+ self._versions[self._executable], REQUIRED_VERSION):
+ warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+ self._executable, self._executable, REQUIRED_VERSION)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+
@staticmethod
- def detect_executables():
+ def get_versions():
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- return dict((program, check_executable(program, ['-version'])) for program in programs)
+ return dict((program, get_version(program)) for program in programs)
+
+ @property
+ def _executable(self):
+ if self._downloader.params.get('prefer_ffmpeg', False):
+ prefs = ('ffmpeg', 'avconv')
+ else:
+ prefs = ('avconv', 'ffmpeg')
+ for p in prefs:
+ if self._versions[p]:
+ return p
+ return None
- def _get_executable(self):
+ @property
+ def _probe_executable(self):
if self._downloader.params.get('prefer_ffmpeg', False):
- return self._exes['ffmpeg'] or self._exes['avconv']
+ prefs = ('ffprobe', 'avprobe')
else:
- return self._exes['avconv'] or self._exes['ffmpeg']
+ prefs = ('avprobe', 'ffprobe')
+ for p in prefs:
+ if self._versions[p]:
+ return p
+ return None
def _uses_avconv(self):
- return self._get_executable() == self._exes['avconv']
+ return self._executable == 'avconv'
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
- if not self._get_executable():
- raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+ self.check_version()
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path, True)])
- cmd = ([self._get_executable(), '-y'] + files_cmd
+ cmd = ([self._executable, '-y'] + files_cmd
+ [encodeArgument(o) for o in opts] +
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
@@ -85,11 +130,12 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
- if not self._exes['ffprobe'] and not self._exes['avprobe']:
+
+ if not self._probe_executable:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try:
cmd = [
- self._exes['avprobe'] or self._exes['ffprobe'],
+ self._probe_executable,
'-show_streams',
encodeFilename(self._ffmpeg_filename_argument(path), True)]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
@@ -182,14 +228,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
else:
- self._downloader.to_screen(u'[' + self._get_executable() + '] Destination: ' + new_path)
+ self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
msg = u'audio conversion failed: ' + e.msg
else:
- msg = u'error running ' + self._get_executable()
+ msg = u'error running ' + self._executable
raise PostProcessingError(msg)
# Try to update the date time for extracted audio file.
@@ -487,7 +533,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
- args = ['-c', 'copy']
+ args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d7ae5a90a..6c0c39ca5 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -203,6 +203,82 @@ def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
+
+if sys.version_info >= (3, 0):
+ compat_getenv = os.getenv
+ compat_expanduser = os.path.expanduser
+else:
+ # Environment variables should be decoded with filesystem encoding.
+ # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+ def compat_getenv(key, default=None):
+ env = os.getenv(key, default)
+ if env:
+ env = env.decode(get_filesystem_encoding())
+ return env
+
+ # HACK: The default implementations of os.path.expanduser from cpython do not decode
+ # environment variables with filesystem encoding. We will work around this by
+ # providing adjusted implementations.
+ # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+ # for different platforms with correct environment variables decoding.
+
+ if os.name == 'posix':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
+ do nothing."""
+ if not path.startswith('~'):
+ return path
+ i = path.find('/', 1)
+ if i < 0:
+ i = len(path)
+ if i == 1:
+ if 'HOME' not in os.environ:
+ import pwd
+ userhome = pwd.getpwuid(os.getuid()).pw_dir
+ else:
+ userhome = compat_getenv('HOME')
+ else:
+ import pwd
+ try:
+ pwent = pwd.getpwnam(path[1:i])
+ except KeyError:
+ return path
+ userhome = pwent.pw_dir
+ userhome = userhome.rstrip('/')
+ return (userhome + path[i:]) or '/'
+ elif os.name == 'nt' or os.name == 'ce':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructs.
+
+ If user or $HOME is unknown, do nothing."""
+ if path[:1] != '~':
+ return path
+ i, n = 1, len(path)
+ while i < n and path[i] not in '/\\':
+ i = i + 1
+
+ if 'HOME' in os.environ:
+ userhome = compat_getenv('HOME')
+ elif 'USERPROFILE' in os.environ:
+ userhome = compat_getenv('USERPROFILE')
+ elif not 'HOMEPATH' in os.environ:
+ return path
+ else:
+ try:
+ drive = compat_getenv('HOMEDRIVE')
+ except KeyError:
+ drive = ''
+ userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+ if i != 1: #~user
+ userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+ return userhome + path[i:]
+ else:
+ compat_expanduser = os.path.expanduser
+
+
# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))
@@ -1207,11 +1283,14 @@ class locked_file(object):
return self.f.read(*args)
+def get_filesystem_encoding():
+ encoding = sys.getfilesystemencoding()
+ return encoding if encoding is not None else 'utf-8'
+
+
def shell_quote(args):
quoted_args = []
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
+ encoding = get_filesystem_encoding()
for a in args:
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
@@ -1261,7 +1340,7 @@ def format_bytes(bytes):
def get_term_width():
- columns = os.environ.get('COLUMNS', None)
+ columns = compat_getenv('COLUMNS', None)
if columns:
return int(columns)
@@ -1644,3 +1723,16 @@ def limit_length(s, length):
if len(s) > length:
return s[:length - len(ELLIPSES)] + ELLIPSES
return s
+
+
+def version_tuple(v):
+ return [int(e) for e in v.split('.')]
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 4f0d486b9..8e5a642a8 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.10.05.2'
+__version__ = '2014.10.26.2'