aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--test/test_all_urls.py28
-rwxr-xr-xyoutube_dl/YoutubeDL.py11
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/extractor/__init__.py8
-rw-r--r--youtube_dl/extractor/arte.py35
-rw-r--r--youtube_dl/extractor/bild.py39
-rw-r--r--youtube_dl/extractor/cinemassacre.py17
-rw-r--r--youtube_dl/extractor/cnn.py2
-rw-r--r--youtube_dl/extractor/common.py6
-rw-r--r--youtube_dl/extractor/crunchyroll.py11
-rw-r--r--youtube_dl/extractor/francetv.py4
-rw-r--r--youtube_dl/extractor/funnyordie.py2
-rw-r--r--youtube_dl/extractor/generic.py29
-rw-r--r--youtube_dl/extractor/justintv.py155
-rw-r--r--youtube_dl/extractor/mitele.py13
-rw-r--r--youtube_dl/extractor/mixcloud.py20
-rw-r--r--youtube_dl/extractor/pbs.py20
-rw-r--r--youtube_dl/extractor/sexykarma.py117
-rw-r--r--youtube_dl/extractor/ted.py23
-rw-r--r--youtube_dl/extractor/telecinco.py19
-rw-r--r--youtube_dl/extractor/twitch.py187
-rw-r--r--youtube_dl/extractor/vidzi.py33
-rw-r--r--youtube_dl/extractor/vimeo.py14
-rw-r--r--youtube_dl/extractor/youtube.py5
-rw-r--r--youtube_dl/options.py5
-rw-r--r--youtube_dl/version.py2
27 files changed, 577 insertions, 234 deletions
diff --git a/README.md b/README.md
index 90ba928c3..e772fc22a 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.
configuration in ~/.config/youtube-dl.conf
(%APPDATA%/youtube-dl/config.txt on
Windows)
+ --flat-playlist Do not extract the videos of a playlist,
+ only list them.
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 84b05da39..965e5d8a5 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -14,7 +14,7 @@ from test.helper import gettestcases
from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
- JustinTVIE,
+ TwitchIE,
YoutubeIE,
)
@@ -72,21 +72,17 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
- def test_justin_tv_channelid_matching(self):
- self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
- self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
- self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
-
- def test_justintv_videoid_matching(self):
- self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
-
- def test_justin_tv_chapterid_matching(self):
- self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
+ def test_twitch_channelid_matching(self):
+ self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
+ self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
+ self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
+ self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
+
+ def test_twitch_videoid_matching(self):
+ self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
+
+ def test_twitch_chapterid_matching(self):
+ self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dec0e20e7..623f9d6fe 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -165,6 +165,8 @@ class YoutubeDL(object):
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -568,8 +570,13 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video')
- if self.params.get('extract_flat', False):
- if result_type in ('url', 'url_transparent'):
+ if result_type in ('url', 'url_transparent'):
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+ extract_flat is True):
+ self.add_extra_info(ie_result, extra_info)
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(ie_result))
return ie_result
if result_type == 'video':
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 7f2b4dfcc..a1079e596 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -79,6 +79,7 @@ __authors__ = (
'Carlos Ramos',
'5moufl',
'lenaten',
+ 'Dennis Scheiba',
)
__license__ = 'Public Domain'
@@ -255,8 +256,6 @@ def _real_main(argv=None):
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
- if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
- parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
# Do not download videos when there are audio-only formats
if opts.extractaudio and not opts.keepvideo and opts.format is None:
@@ -369,6 +368,7 @@ def _real_main(argv=None):
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'encoding': opts.encoding,
'exec_cmd': opts.exec_cmd,
+ 'extract_flat': opts.extract_flat,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index c0b26c5b3..ab4af2079 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -26,6 +26,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .bild import BildIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
@@ -174,7 +175,6 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
-from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
@@ -317,6 +317,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
+from .sexykarma import SexyKarmaIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
@@ -368,6 +369,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
+from .telecinco import TelecincoIE
from .telemb import TeleMBIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
@@ -396,6 +398,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE
from .tvp import TvpIE
from .tvplay import TVPlayIE
+from .twitch import TwitchIE
from .ubu import UbuIE
from .udemy import (
UdemyIE,
@@ -421,6 +424,7 @@ from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vidme import VidmeIE
+from .vidzi import VidziIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@@ -489,10 +493,8 @@ from .youtube import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
-
from .zdf import ZDFIE
-
_ALL_CLASSES = [
klass
for name, klass in globals().items()
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index c3d02f85e..b9a9440c0 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -10,8 +10,8 @@ from ..utils import (
unified_strdate,
determine_ext,
get_element_by_id,
- compat_str,
get_element_by_attribute,
+ int_or_none,
)
# There are different sources of video in arte.tv, the extraction process
@@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):
if not upload_date_str:
upload_date_str = player_info.get('VDA', '').split(' ')[0]
+ title = player_info['VTI'].strip()
+ subtitle = player_info.get('VSU', '').strip()
+ if subtitle:
+ title += ' - %s' % subtitle
+
info_dict = {
'id': player_info['VID'],
- 'title': player_info['VTI'],
+ 'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
- all_formats = player_info['VSR'].values()
+ all_formats = []
+ for format_id, format_dict in player_info['VSR'].items():
+ fmt = dict(format_dict)
+ fmt['format_id'] = format_id
+ all_formats.append(fmt)
# Some formats use the m3u8 protocol
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
def _match_lang(f):
@@ -149,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
- quality = ''
- height = format_info.get('height')
- if height is not None:
- quality = compat_str(height)
- bitrate = format_info.get('bitrate')
- if bitrate is not None:
- quality += '-%d' % bitrate
- if format_info.get('versionCode') is not None:
- format_id = '%s-%s' % (quality, format_info['versionCode'])
- else:
- format_id = quality
info = {
- 'format_id': format_id,
- 'format_note': format_info.get('versionLibelle'),
- 'width': format_info.get('width'),
- 'height': height,
+ 'format_id': format_info['format_id'],
+ 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
+ 'width': int_or_none(format_info.get('width')),
+ 'height': int_or_none(format_info.get('height')),
+ 'tbr': int_or_none(format_info.get('bitrate')),
}
if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
new file mode 100644
index 000000000..0269d1174
--- /dev/null
+++ b/youtube_dl/extractor/bild.py
@@ -0,0 +1,39 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class BildIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+ IE_DESC = 'Bild.de'
+ _TEST = {
+ 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+ 'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+ 'info_dict': {
+ 'id': '38184146',
+ 'ext': 'mp4',
+ 'title': 'BILD hat sie getestet',
+ 'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+ 'duration': 196,
+ 'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
+ doc = self._download_xml(xml_url, video_id)
+
+ duration = int_or_none(doc.attrib.get('duration'), scale=1000)
+
+ return {
+ 'id': video_id,
+ 'title': doc.attrib['ueberschrift'],
+ 'description': doc.attrib.get('text'),
+ 'url': doc.attrib['src'],
+ 'thumbnail': doc.attrib.get('img'),
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index 496271be4..d064a28f9 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
- mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+ mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
@@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
- video_thumbnail = self._search_regex(
- r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
- sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
- videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
+ vidurl = self._search_regex(
+ r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+ vidid = self._search_regex(
+ r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
+ videoserver = self._html_search_regex(
+ r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
+
+ videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
formats = []
- baseurl = sd_url[:sd_url.rfind('/')+1]
+ baseurl = vidurl[:vidurl.rfind('/')+1]
for video in videolist.findall('.//video'):
src = video.get('src')
if not src:
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index dae40c136..78877b1cf 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 450c7dfd6..e8366f7f9 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -281,6 +281,12 @@ class InfoExtractor(object):
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if os.name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index f99888ecc..e3057d900 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -39,6 +39,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
+ 'url': 're:(?!.*&amp)',
},
'params': {
# rtmp
@@ -237,12 +238,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
- streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
- video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
- video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
+ streamdata = self._download_xml(
+ streamdata_req, video_id,
+ note='Downloading media info for %s' % video_format)
+ video_url = streamdata.find('.//host').text
+ video_play_path = streamdata.find('.//file').text
formats.append({
'url': video_url,
- 'play_path': video_play_path,
+ 'play_path': video_play_path,
'ext': 'flv',
'format': video_format,
'format_id': video_format,
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 0b3374d97..566e20d76 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
f4m_format['preference'] = 1
formats.extend(f4m_formats)
elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id))
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
@@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
formats.append({
'url': video_url,
'format_id': format_id,
- 'preference': 2,
+ 'preference': -1,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index d966e8403..ec6d96ada 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
+ links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
if not links:
raise ExtractorError('No media links available for %s' % video_id)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 122763a23..9b6498894 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):
'uploader': 'education-portal.com',
},
},
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ },
+ },
]
def report_following_redirect(self, new_url):
@@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
- assert ':' in default_search
+ if ':' not in default_search:
+ default_search += ':'
return self.url_result(default_search + url)
url, smuggled_data = unsmuggle_url(url)
@@ -609,13 +621,13 @@ class GenericIE(InfoExtractor):
if mobj:
player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
- return self.url_result(surl, 'Vimeo')
+ return self.url_result(surl)
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
- r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
- return self.url_result(mobj.group(1), 'Vimeo')
+ return self.url_result(mobj.group(1))
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
@@ -652,17 +664,20 @@ class GenericIE(InfoExtractor):
# Look for embedded Wistia player
match = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
+ embed_url = self._proto_relative_url(
+ unescapeHTML(match.group('url')))
return {
'_type': 'url_transparent',
- 'url': unescapeHTML(match.group('url')),
+ 'url': embed_url,
'ie_key': 'Wistia',
'uploader': video_uploader,
'title': video_title,
'id': video_id,
}
- match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+
+ match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py
deleted file mode 100644
index 27017e89f..000000000
--- a/youtube_dl/extractor/justintv.py
+++ /dev/null
@@ -1,155 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-import json
-import os
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- compat_str,
- ExtractorError,
- formatSeconds,
-)
-
-
-class JustinTVIE(InfoExtractor):
- """Information extractor for justin.tv and twitch.tv"""
- # TODO: One broadcast may be split into multiple videos. The key
- # 'broadcast_id' is the same for all parts, and 'broadcast_part'
- # starts at 1 and increases. Can we treat all parts as one video?
-
- _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
- (?:
- (?P<channelid>[^/]+)|
- (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
- (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
- )
- /?(?:\#.*)?$
- """
- _JUSTIN_PAGE_LIMIT = 100
- IE_NAME = 'justin.tv'
- IE_DESC = 'justin.tv and twitch.tv'
- _TEST = {
- 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
- 'md5': 'ecaa8a790c22a40770901460af191c9a',
- 'info_dict': {
- 'id': '296128360',
- 'ext': 'flv',
- 'upload_date': '20110927',
- 'uploader_id': 25114803,
- 'uploader': 'thegamedevhub',
- 'title': 'Beginner Series - Scripting With Python Pt.1'
- }
- }
-
- # Return count of items, list of *valid* items
- def _parse_page(self, url, video_id, counter):
- info_json = self._download_webpage(
- url, video_id,
- 'Downloading video info JSON on page %d' % counter,
- 'Unable to download video info JSON %d' % counter)
-
- response = json.loads(info_json)
- if type(response) != list:
- error_text = response.get('error', 'unknown error')
- raise ExtractorError('Justin.tv API: %s' % error_text)
- info = []
- for clip in response:
- video_url = clip['video_file_url']
- if video_url:
- video_extension = os.path.splitext(video_url)[1][1:]
- video_date = re.sub('-', '', clip['start_time'][:10])
- video_uploader_id = clip.get('user_id', clip.get('channel_id'))
- video_id = clip['id']
- video_title = clip.get('title', video_id)
- info.append({
- 'id': compat_str(video_id),
- 'url': video_url,
- 'title': video_title,
- 'uploader': clip.get('channel_name', video_uploader_id),
- 'uploader_id': video_uploader_id,
- 'upload_date': video_date,
- 'ext': video_extension,
- })
- return (len(response), info)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- api_base = 'http://api.justin.tv'
- paged = False
- if mobj.group('channelid'):
- paged = True
- video_id = mobj.group('channelid')
- api = api_base + '/channel/archives/%s.json' % video_id
- elif mobj.group('chapterid'):
- chapter_id = mobj.group('chapterid')
-
- webpage = self._download_webpage(url, chapter_id)
- m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
- if not m:
- raise ExtractorError('Cannot find archive of a chapter')
- archive_id = m.group(1)
-
- api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
- doc = self._download_xml(
- api, chapter_id,
- note='Downloading chapter information',
- errnote='Chapter information download failed')
- for a in doc.findall('.//archive'):
- if archive_id == a.find('./id').text:
- break
- else:
- raise ExtractorError('Could not find chapter in chapter information')
-
- video_url = a.find('./video_file_url').text
- video_ext = video_url.rpartition('.')[2] or 'flv'
-
- chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
- chapter_info = self._download_json(
- chapter_api_url, 'c' + chapter_id,
- note='Downloading chapter metadata',
- errnote='Download of chapter metadata failed')
-
- bracket_start = int(doc.find('.//bracket_start').text)
- bracket_end = int(doc.find('.//bracket_end').text)
-
- # TODO determine start (and probably fix up file)
- # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
- #video_url += '?start=' + TODO:start_timestamp
- # bracket_start is 13290, but we want 51670615
- self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
- 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
- info = {
- 'id': 'c' + chapter_id,
- 'url': video_url,
- 'ext': video_ext,
- 'title': chapter_info['title'],
- 'thumbnail': chapter_info['preview'],
- 'description': chapter_info['description'],
- 'uploader': chapter_info['channel']['display_name'],
- 'uploader_id': chapter_info['channel']['name'],
- }
- return info
- else:
- video_id = mobj.group('videoid')
- api = api_base + '/broadcast/by_archive/%s.json' % video_id
-
- entries = []
- offset = 0
- limit = self._JUSTIN_PAGE_LIMIT
- for counter in itertools.count(1):
- page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
- page_count, page_info = self._parse_page(
- page_url, video_id, counter)
- entries.extend(page_info)
- if not paged or page_count != limit:
- break
- offset += limit
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 979f3d692..6691521e5 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -6,6 +6,7 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
+ compat_urlparse,
get_element_by_attribute,
parse_duration,
strip_jsonp,
@@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):
).replace('\'', '"')
embed_data = json.loads(embed_data_json)
- info_url = embed_data['flashvars']['host']
+ domain = embed_data['mediaUrl']
+ if not domain.startswith('http'):
+ # only happens in telecinco.es videos
+ domain = 'http://' + domain
+ info_url = compat_urlparse.urljoin(
+ domain,
+ compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+ )
info_el = self._download_xml(info_url, episode).find('./video/info')
video_link = info_el.find('videoUrl/link').text
token_query = compat_urllib_parse.urlencode({'id': video_link})
token_info = self._download_json(
- 'http://token.mitele.es/?' + token_query, episode,
+ embed_data['flashvars']['ov_tk'] + '?' + token_query,
+ episode,
transform_source=strip_jsonp
)
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index a4564d3de..bb8937c4d 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):
},
}
- def check_urls(self, url_list):
- """Returns 1st active url from list"""
- for url in url_list:
+ def _get_url(self, track_id, template_url):
+ server_count = 30
+ for i in range(server_count):
+ url = template_url % i
try:
# We only want to know if the request succeed
# don't download the whole file
- self._request_webpage(HEADRequest(url), None, False)
+ self._request_webpage(
+ HEADRequest(url), track_id,
+ 'Checking URL %d/%d ...' % (i + 1, server_count + 1))
return url
except ExtractorError:
- url = None
+ pass
return None
- def _get_url(self, template_url):
- return self.check_urls(template_url % i for i in range(30))
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group(1)
@@ -61,11 +61,11 @@ class MixcloudIE(InfoExtractor):
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
- final_song_url = self._get_url(template_url)
+ final_song_url = self._get_url(track_id, template_url)
if final_song_url is None:
self.to_screen('Trying with m4a extension')
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
- final_song_url = self._get_url(template_url)
+ final_song_url = self._get_url(track_id, template_url)
if final_song_url is None:
raise ExtractorError('Unable to extract track url')
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 8f140d626..6118ed5c2 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140122',
}
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+ 'info_dict': {
+ 'id': 'united-states-of-secrets',
+ },
+ 'playlist_count': 2,
}
-
]
def _extract_webpage(self, url):
@@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
webpage, 'upload date', default=None))
+ # tabbed frontline videos
+ tabbed_videos = re.findall(
+ r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+ if tabbed_videos:
+ return tabbed_videos, presumptive_id, upload_date
+
MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
r'class="coveplayerid">([^<]+)<', # coveplayer
@@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id, upload_date = self._extract_webpage(url)
+ if isinstance(video_id, list):
+ entries = [self.url_result(
+ 'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+ for vid_id in video_id]
+ return self.playlist_result(entries, display_id)
+
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py
new file mode 100644
index 000000000..c833fc8ee
--- /dev/null
+++ b/youtube_dl/extractor/sexykarma.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ parse_duration,
+ int_or_none,
+)
+
+
+class SexyKarmaIE(InfoExtractor):
+ IE_DESC = 'Sexy Karma and Watch Indian Porn'
+ _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
+ 'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
+ 'info_dict': {
+ 'id': 'yHI70cOyIHt',
+ 'display_id': 'taking-a-quick-pee',
+ 'ext': 'mp4',
+ 'title': 'Taking a quick pee.',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'wildginger7',
+ 'upload_date': '20141007',
+ 'duration': 22,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
+ 'md5': 'dd216c68d29b49b12842b9babe762a5d',
+ 'info_dict': {
+ 'id': '8Id6EZPbuHf',
+ 'display_id': 'pot-pixie-tribute',
+ 'ext': 'mp4',
+ 'title': 'pot_pixie tribute',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'banffite',
+ 'upload_date': '20141013',
+ 'duration': 16,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
+ 'md5': '9afb80675550406ed9a63ac2819ef69d',
+ 'info_dict': {
+ 'id': 'dW2mtctxJfs',
+ 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
+ 'ext': 'mp4',
+ 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'Don',
+ 'upload_date': '20140213',
+ 'duration': 83,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._html_search_regex(
+ r"url: escape\('([^']+)'\)", webpage, 'url')
+
+ title = self._html_search_regex(
+ r'<h2 class="he2"><span>(.*?)</span>',
+ webpage, 'title')
+ thumbnail = self._html_search_regex(
+ r'<span id="container"><img\s+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ uploader = self._html_search_regex(
+ r'class="aupa">\s*(.*?)</a>',
+ webpage, 'uploader')
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
+
+ duration = parse_duration(self._search_regex(
+ r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+ webpage, 'comment count', fatal=False))
+
+ categories = re.findall(
+ r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
+ webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ }
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index d5e28efad..cd4af96fd 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):
'title': 'Who are the hackers?',
},
'playlist_mincount': 6,
+ }, {
+ # contains a youtube video
+ 'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_ZG8HBuDjgc',
+ 'ext': 'mp4',
+ 'title': 'Douglas Adams: Parrots the Universe and Everything',
+ 'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
+ 'uploader': 'University of California Television (UCTV)',
+ 'uploader_id': 'UCtelevision',
+ 'upload_date': '20080522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
_NATIVE_FORMATS = {
@@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0]
+ if talk_info.get('external') is not None:
+ self.to_screen('Found video from %s' % talk_info['external']['service'])
+ return {
+ '_type': 'url',
+ 'url': talk_info['external']['uri'],
+ }
+
formats = [{
'url': format_url,
'format_id': format_id,
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
new file mode 100644
index 000000000..db9788c18
--- /dev/null
+++ b/youtube_dl/extractor/telecinco.py
@@ -0,0 +1,19 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .mitele import MiTeleIE
+
+
+class TelecincoIE(MiTeleIE):
+ IE_NAME = 'telecinco.es'
+ _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+
+ _TEST = {
+ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+ 'info_dict': {
+ 'id': 'MDSVID20141015_0058',
+ 'ext': 'mp4',
+ 'title': 'Con Martín Berasategui, hacer un bacalao al ...',
+ 'duration': 662,
+ },
+ }
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
new file mode 100644
index 000000000..36aa1ad6e
--- /dev/null
+++ b/youtube_dl/extractor/twitch.py
@@ -0,0 +1,187 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class TwitchIE(InfoExtractor):
+ # TODO: One broadcast may be split into multiple videos. The key
+ # 'broadcast_id' is the same for all parts, and 'broadcast_part'
+ # starts at 1 and increases. Can we treat all parts as one video?
+ _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
+ (?:
+ (?P<channelid>[^/]+)|
+ (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
+ (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
+ )
+ /?(?:\#.*)?$
+ """
+ _PAGE_LIMIT = 100
+ _API_BASE = 'https://api.twitch.tv'
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/riotgames/b/577357806',
+ 'info_dict': {
+ 'id': 'a577357806',
+ 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+ 'info_dict': {
+ 'id': 'c5285812',
+ 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://www.twitch.tv/vanillatv',
+ 'info_dict': {
+ 'id': 'vanillatv',
+ 'title': 'VanillaTV',
+ },
+ 'playlist_mincount': 412,
+ }]
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
+ expected=True)
+
+ def _download_json(self, url, video_id, note='Downloading JSON metadata'):
+ response = super(TwitchIE, self)._download_json(url, video_id, note)
+ self._handle_error(response)
+ return response
+
+ def _extract_media(self, item, item_id):
+ ITEMS = {
+ 'a': 'video',
+ 'c': 'chapter',
+ }
+ info = self._extract_info(self._download_json(
+ '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+ 'Downloading %s info JSON' % ITEMS[item]))
+ response = self._download_json(
+ '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+ 'Downloading %s playlist JSON' % ITEMS[item])
+ entries = []
+ chunks = response['chunks']
+ qualities = list(chunks.keys())
+ for num, fragment in enumerate(zip(*chunks.values()), start=1):
+ formats = []
+ for fmt_num, fragment_fmt in enumerate(fragment):
+ format_id = qualities[fmt_num]
+ fmt = {
+ 'url': fragment_fmt['url'],
+ 'format_id': format_id,
+ 'quality': 1 if format_id == 'live' else 0,
+ }
+ m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+ if m:
+ fmt['height'] = int(m.group('height'))
+ formats.append(fmt)
+ self._sort_formats(formats)
+ entry = dict(info)
+ entry['id'] = '%s_%d' % (entry['id'], num)
+ entry['title'] = '%s part %d' % (entry['title'], num)
+ entry['formats'] = formats
+ entries.append(entry)
+ return self.playlist_result(entries, info['id'], info['title'])
+
+ def _extract_info(self, info):
+ return {
+ 'id': info['_id'],
+ 'title': info['title'],
+ 'description': info['description'],
+ 'duration': info['length'],
+ 'thumbnail': info['preview'],
+ 'uploader': info['channel']['display_name'],
+ 'uploader_id': info['channel']['name'],
+ 'timestamp': parse_iso8601(info['recorded_at']),
+ 'view_count': info['views'],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj.group('chapterid'):
+ return self._extract_media('c', mobj.group('chapterid'))
+
+ """
+ webpage = self._download_webpage(url, chapter_id)
+ m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+ if not m:
+ raise ExtractorError('Cannot find archive of a chapter')
+ archive_id = m.group(1)
+
+ api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
+ doc = self._download_xml(
+ api, chapter_id,
+ note='Downloading chapter information',
+ errnote='Chapter information download failed')
+ for a in doc.findall('.//archive'):
+ if archive_id == a.find('./id').text:
+ break
+ else:
+ raise ExtractorError('Could not find chapter in chapter information')
+
+ video_url = a.find('./video_file_url').text
+ video_ext = video_url.rpartition('.')[2] or 'flv'
+
+ chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+ chapter_info = self._download_json(
+ chapter_api_url, 'c' + chapter_id,
+ note='Downloading chapter metadata',
+ errnote='Download of chapter metadata failed')
+
+ bracket_start = int(doc.find('.//bracket_start').text)
+ bracket_end = int(doc.find('.//bracket_end').text)
+
+ # TODO determine start (and probably fix up file)
+ # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
+ #video_url += '?start=' + TODO:start_timestamp
+ # bracket_start is 13290, but we want 51670615
+ self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+ 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+
+ info = {
+ 'id': 'c' + chapter_id,
+ 'url': video_url,
+ 'ext': video_ext,
+ 'title': chapter_info['title'],
+ 'thumbnail': chapter_info['preview'],
+ 'description': chapter_info['description'],
+ 'uploader': chapter_info['channel']['display_name'],
+ 'uploader_id': chapter_info['channel']['name'],
+ }
+ return info
+ """
+ elif mobj.group('videoid'):
+ return self._extract_media('a', mobj.group('videoid'))
+ elif mobj.group('channelid'):
+ channel_id = mobj.group('channelid')
+ info = self._download_json(
+ '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+ channel_id, 'Downloading channel info JSON')
+ channel_name = info.get('display_name') or info.get('name')
+ entries = []
+ offset = 0
+ limit = self._PAGE_LIMIT
+ for counter in itertools.count(1):
+ response = self._download_json(
+ '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
+ % (self._API_BASE, channel_id, offset, limit),
+ channel_id, 'Downloading channel videos JSON page %d' % counter)
+ videos = response['videos']
+ if not videos:
+ break
+ entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
+ offset += limit
+ return self.playlist_result(entries, channel_id, channel_name)
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
new file mode 100644
index 000000000..669979e13
--- /dev/null
+++ b/youtube_dl/extractor/vidzi.py
@@ -0,0 +1,33 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class VidziIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
+ _TEST = {
+ 'url': 'http://vidzi.tv/cghql9yq6emu.html',
+ 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+ 'info_dict': {
+ 'id': 'cghql9yq6emu',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._html_search_regex(
+ r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
+ title = self._html_search_regex(
+ r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
+ \ No newline at end of file
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index e6a86f18e..07959d3fe 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -157,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': 62,
}
},
+ {
+ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+ 'url': 'https://player.vimeo.com/video/98044508',
+ 'note': 'The js code contains assignments to the same variable as the config',
+ 'info_dict': {
+ 'id': '98044508',
+ 'ext': 'mp4',
+ 'title': 'Pier Solar OUYA Official Trailer',
+ 'uploader': 'Tulio Gonçalves',
+ 'uploader_id': 'user28849593',
+ },
+ },
]
def _verify_video_password(self, url, video_id, webpage):
@@ -244,7 +256,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
# We try to find out to which variable is assigned the config dic
m_variable_name = re.search('(\w)\.video\.id', webpage)
if m_variable_name is not None:
- config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
+ config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config = self._search_regex(config_re, webpage, 'info section',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index cfae2de89..4ab56e0ac 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _real_initialize(self):
if self._downloader is None:
return
- if not self._set_language():
- return
+ if self._get_login_info()[0] is not None:
+ if not self._set_language():
+ return
if not self._login():
return
self._confirm_age()
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 649361bde..2b1cd7438 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -159,6 +159,11 @@ def parseOpts(overrideArguments=None):
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist',
+ default=False,
+ help='Do not extract the videos of a playlist, only list them.')
selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 416e2fd85..59cb3b1a1 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.10.13'
+__version__ = '2014.10.24'