aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xyoutube_dl/YoutubeDL.py5
-rw-r--r--youtube_dl/compat.py4
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/arte.py114
-rw-r--r--youtube_dl/extractor/bliptv.py14
-rw-r--r--youtube_dl/extractor/brightcove.py4
-rw-r--r--youtube_dl/extractor/comedycentral.py5
-rw-r--r--youtube_dl/extractor/common.py44
-rw-r--r--youtube_dl/extractor/folketinget.py75
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/goldenmoustache.py2
-rw-r--r--youtube_dl/extractor/mtv.py3
-rw-r--r--youtube_dl/extractor/rtlnl.py8
-rw-r--r--youtube_dl/extractor/stanfordoc.py88
-rw-r--r--youtube_dl/extractor/sztvhu.py24
-rw-r--r--youtube_dl/extractor/vh1.py5
-rw-r--r--youtube_dl/jsinterp.py2
-rw-r--r--youtube_dl/utils.py27
-rw-r--r--youtube_dl/version.py2
19 files changed, 270 insertions, 159 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 94c50903c..fde026fbf 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -624,7 +624,7 @@ class YoutubeDL(object):
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
- elif result_type == 'playlist':
+ elif result_type == 'playlist' or result_type == 'multi_video':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -679,6 +679,9 @@ class YoutubeDL(object):
ie_result['entries'] = playlist_results
return ie_result
elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
def _fixup(r):
self.add_extra_info(r,
{
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 64a975489..9d33a8ec5 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -302,8 +302,10 @@ else:
# Fix https://github.com/rg3/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
+ op = optparse.OptionParser()
+ og = optparse.OptionGroup(op, 'foo')
try:
- optparse.OptionGroup('foo').add_option('-t')
+ og.add_option('-t')
except TypeError:
real_add_option = optparse.OptionGroup.add_option
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 7275d247a..7497a97f5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -115,6 +115,7 @@ from .fktv import (
FKTVPosteckeIE,
)
from .flickr import FlickrIE
+from .folketinget import FolketingetIE
from .fourtube import FourTubeIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index b9a9440c0..3a57ce527 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -5,13 +5,12 @@ import re
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
find_xpath_attr,
unified_strdate,
- determine_ext,
get_element_by_id,
get_element_by_attribute,
int_or_none,
+ qualities,
)
# There are different sources of video in arte.tv, the extraction process
@@ -102,79 +101,54 @@ class ArteTVPlus7IE(InfoExtractor):
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
+ qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
- all_formats = []
+ formats = []
for format_id, format_dict in player_info['VSR'].items():
- fmt = dict(format_dict)
- fmt['format_id'] = format_id
- all_formats.append(fmt)
- # Some formats use the m3u8 protocol
- all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
- def _match_lang(f):
- if f.get('versionCode') is None:
- return True
- # Return true if that format is in the language of the url
- if lang == 'fr':
- l = 'F'
- elif lang == 'de':
- l = 'A'
- else:
- l = lang
- regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
- return any(re.match(r, f['versionCode']) for r in regexes)
- # Some formats may not be in the same language as the url
- # TODO: Might want not to drop videos that does not match requested language
- # but to process those formats with lower precedence
- formats = filter(_match_lang, all_formats)
- formats = list(formats) # in python3 filter returns an iterator
- if not formats:
- # Some videos are only available in the 'Originalversion'
- # they aren't tagged as being in French or German
- # Sometimes there are neither videos of requested lang code
- # nor original version videos available
- # For such cases we just take all_formats as is
- formats = all_formats
- if not formats:
- raise ExtractorError('The formats list is empty')
-
- if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
- def sort_key(f):
- return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
- else:
- def sort_key(f):
- versionCode = f.get('versionCode')
- if versionCode is None:
- versionCode = ''
- return (
- # Sort first by quality
- int(f.get('height', -1)),
- int(f.get('bitrate', -1)),
- # The original version with subtitles has lower relevance
- re.match(r'VO-ST(F|A)', versionCode) is None,
- # The version with sourds/mal subtitles has also lower relevance
- re.match(r'VO?(F|A)-STM\1', versionCode) is None,
- # Prefer http downloads over m3u8
- 0 if f['url'].endswith('m3u8') else 1,
- )
- formats = sorted(formats, key=sort_key)
- def _format(format_info):
- info = {
- 'format_id': format_info['format_id'],
- 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
- 'width': int_or_none(format_info.get('width')),
- 'height': int_or_none(format_info.get('height')),
- 'tbr': int_or_none(format_info.get('bitrate')),
+ f = dict(format_dict)
+ versionCode = f.get('versionCode')
+
+ langcode = {
+ 'fr': 'F',
+ 'de': 'A',
+ }.get(lang, lang)
+ lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
+ lang_pref = (
+ None if versionCode is None else (
+ 10 if any(re.match(r, versionCode) for r in lang_rexs)
+ else -10))
+ source_pref = 0
+ if versionCode is not None:
+ # The original version with subtitles has lower relevance
+ if re.match(r'VO-ST(F|A)', versionCode):
+ source_pref -= 10
+ # The version with sourds/mal subtitles has also lower relevance
+ elif re.match(r'VO?(F|A)-STM\1', versionCode):
+ source_pref -= 9
+ format = {
+ 'format_id': format_id,
+ 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
+ 'language_preference': lang_pref,
+ 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ 'quality': qfunc(f['quality']),
+ 'source_preference': source_pref,
}
- if format_info['mediaType'] == 'rtmp':
- info['url'] = format_info['streamer']
- info['play_path'] = 'mp4:' + format_info['url']
- info['ext'] = 'flv'
+
+ if f.get('mediaType') == 'rtmp':
+ format['url'] = f['streamer']
+ format['play_path'] = 'mp4:' + f['url']
+ format['ext'] = 'flv'
else:
- info['url'] = format_info['url']
- info['ext'] = determine_ext(info['url'])
- return info
- info_dict['formats'] = [_format(f) for f in formats]
+ format['url'] = f['url']
+
+ formats.append(format)
+
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
return info_dict
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 2370c24b0..f2b02643d 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -166,9 +166,17 @@ class BlipTVIE(SubtitlesInfoExtractor):
class BlipTVUserIE(InfoExtractor):
- _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
+ _TEST = {
+ 'url': 'http://blip.tv/actone',
+ 'info_dict': {
+ 'id': 'actone',
+ 'title': 'Act One: The Series',
+ },
+ 'playlist_count': 5,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -179,6 +187,7 @@ class BlipTVUserIE(InfoExtractor):
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
+ title = self._og_search_title(page)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
@@ -215,4 +224,5 @@ class BlipTVUserIE(InfoExtractor):
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
- return [self.playlist_result(url_entries, playlist_title=username)]
+ return self.playlist_result(
+ url_entries, playlist_title=title, playlist_id=username)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index a6920685e..2db7f9fef 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -111,6 +111,8 @@ class BrightcoveIE(InfoExtractor):
lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
+ # remove namespace to simplify extraction
+ object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
object_str = fix_xml_ampersands(object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
@@ -219,7 +221,7 @@ class BrightcoveIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex(
- r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
+ r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
'error message', default=None)
if error_msg is not None:
raise ExtractorError(
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 49b978b4e..2e3ef3fda 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
@@ -110,9 +109,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
+ mobj = re.match(self._VALID_URL, url)
if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'):
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b77f0e519..93a5a3d57 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -43,7 +43,11 @@ class InfoExtractor(object):
information possibly downloading the video to the file system, among
other possible outcomes.
- The dictionaries must include the following fields:
+ The type field determines the the type of the result.
+ By far the most common value (and the default if _type is missing) is
+ "video", which indicates a single video.
+
+ For a video, the dictionaries must include the following fields:
id: Video identifier.
title: Video title, unescaped.
@@ -87,6 +91,11 @@ class InfoExtractor(object):
by this field, regardless of all other values.
-1 for default (order by other properties),
-2 or smaller for less than default.
+ * language_preference Is this in the correct requested
+ language?
+ 10 if it's what the URL is about,
+ -1 for default (don't know),
+ -10 otherwise, other values reserved for now.
* quality Order number of the video quality of this
format, irrespective of the file format.
-1 for default (order by other properties),
@@ -146,6 +155,38 @@ class InfoExtractor(object):
Unless mentioned otherwise, None is equivalent to absence of information.
+
+ _type "playlist" indicates multiple videos.
+ There must be a key "entries", which is a list or a PagedList object, each
+ element of which is a valid dictionary under this specfication.
+
+ Additionally, playlists can have "title" and "id" attributes with the same
+ semantics as videos (see above).
+
+
+ _type "multi_video" indicates that there are multiple videos that
+ form a single show, for examples multiple acts of an opera or TV episode.
+ It must have an entries key like a playlist and contain all the keys
+ required for a video at the same time.
+
+
+ _type "url" indicates that the video must be extracted from another
+ location, possibly by a different extractor. Its only required key is:
+ "url" - the next URL to extract.
+
+ Additionally, it may have properties believed to be identical to the
+ resolved entity, for example "title" if the title of the referred video is
+ known ahead of time.
+
+
+ _type "url_transparent" entities have the same specification as "url", but
+ indicate that the given additional information is more precise than the one
+ associated with the resolved URL.
+ This is useful when a site employs a video service that hosts the video and
+ its technical metadata, but that video service does not embed a useful
+ title, description etc.
+
+
Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
@@ -615,6 +656,7 @@ class InfoExtractor(object):
return (
preference,
+ f.get('language_preference') if f.get('language_preference') is not None else -1,
f.get('quality') if f.get('quality') is not None else -1,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
new file mode 100644
index 000000000..68e2db943
--- /dev/null
+++ b/youtube_dl/extractor/folketinget.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+ IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+ _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+ _TEST = {
+ 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'info_dict': {
+ 'id': '1165642',
+ 'ext': 'mp4',
+ 'title': 'Åbent samråd i Erhvervsudvalget',
+ 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+ 'view_count': int,
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 928000,
+ 'timestamp': 1416493800,
+ 'upload_date': '20141120',
+ 'duration': 3960,
+ },
+ 'params': {
+ 'skip_download': 'rtmpdump required',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+ webpage, 'description', fatal=False)
+
+ player_params = compat_parse_qs(self._search_regex(
+ r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+ webpage, 'player params'))
+ xml_url = player_params['xml'][0]
+ doc = self._download_xml(xml_url, video_id)
+
+ timestamp = parse_iso8601(xpath_text(doc, './/date'))
+ duration = parse_duration(xpath_text(doc, './/duration'))
+ width = int_or_none(xpath_text(doc, './/width'))
+ height = int_or_none(xpath_text(doc, './/height'))
+ view_count = int_or_none(xpath_text(doc, './/views'))
+
+ formats = [{
+ 'format_id': n.attrib['bitrate'],
+ 'url': xpath_text(n, './url', fatal=True),
+ 'tbr': int_or_none(n.attrib['bitrate']),
+ } for n in doc.findall('.//streams/stream')]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'width': width,
+ 'height': height,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index af769ab61..c7a824c29 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -979,7 +979,7 @@ class GenericIE(InfoExtractor):
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
# HTML5 video
- found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
+ found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
diff --git a/youtube_dl/extractor/goldenmoustache.py b/youtube_dl/extractor/goldenmoustache.py
index 7e13b131b..10001d4d9 100644
--- a/youtube_dl/extractor/goldenmoustache.py
+++ b/youtube_dl/extractor/goldenmoustache.py
@@ -1,9 +1,7 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
int_or_none,
)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 474bdff7d..5f0f476b6 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -145,7 +145,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
idoc = self._download_xml(
feed_url + '?' + data, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
- return [self._get_video_info(item) for item in idoc.findall('.//item')]
+ return self.playlist_result(
+ [self._get_video_info(item) for item in idoc.findall('.//item')])
def _real_extract(self, url):
title = url_basename(url)
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index 5daef2fc5..4a188e5d4 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
uuid = mobj.group('uuid')
- # Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
info = self._download_json(
- 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
+ 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
uuid)
material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
- videopath = material['videopath']
+ # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
+ videopath = material['videopath'].replace('.f4m', '.m3u8')
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
- video_urlpart = videopath.split('/adaptive/')[1][:-4]
+ video_urlpart = videopath.split('/flash/')[1][:-4]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py
index 44c52c718..5feb4ff83 100644
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -9,24 +11,23 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor):
- IE_NAME = u'stanfordoc'
- IE_DESC = u'Stanford Open ClassRoom'
- _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+ IE_NAME = 'stanfordoc'
+ IE_DESC = 'Stanford Open ClassRoom'
+ _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = {
- u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
- u'file': u'PracticalUnix_intro-environment.mp4',
- u'md5': u'544a9468546059d4e80d76265b0443b8',
- u'info_dict': {
- u"title": u"Intro Environment"
+ 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
+ 'md5': '544a9468546059d4e80d76265b0443b8',
+ 'info_dict': {
+ 'id': 'PracticalUnix_intro-environment',
+ 'ext': 'mp4',
+ 'title': 'Intro Environment',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- if mobj.group('course') and mobj.group('video'): # A specific video
+ if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
video = mobj.group('video')
info = {
@@ -35,7 +36,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
- self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml'
mdoc = self._download_xml(xmlUrl, info['id'])
@@ -43,63 +43,49 @@ class StanfordOpenClassroomIE(InfoExtractor):
info['title'] = mdoc.findall('./title')[0].text
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
except IndexError:
- raise ExtractorError(u'Invalid metadata XML file')
- info['ext'] = info['url'].rpartition('.')[2]
- return [info]
- elif mobj.group('course'): # A course page
+ raise ExtractorError('Invalid metadata XML file')
+ return info
+ elif mobj.group('course'): # A course page
course = mobj.group('course')
info = {
'id': course,
- 'type': 'playlist',
+ '_type': 'playlist',
'uploader': None,
'upload_date': None,
}
- coursepage = self._download_webpage(url, info['id'],
- note='Downloading course info page',
- errnote='Unable to download course info page')
+ coursepage = self._download_webpage(
+ url, info['id'],
+ note='Downloading course info page',
+ errnote='Unable to download course info page')
- info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
+ info['title'] = self._html_search_regex(
+ r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
- info['description'] = self._html_search_regex('<description>([^<]+)</description>',
- coursepage, u'description', fatal=False)
+ info['description'] = self._html_search_regex(
+ r'(?s)<description>([^<]+)</description>',
+ coursepage, 'description', fatal=False)
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
- info['list'] = [
- {
- 'type': 'reference',
- 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
- }
- for vpage in links]
- results = []
- for entry in info['list']:
- assert entry['type'] == 'reference'
- results += self.extract(entry['url'])
- return results
- else: # Root page
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
+ else: # Root page
info = {
'id': 'Stanford OpenClassroom',
- 'type': 'playlist',
+ '_type': 'playlist',
'uploader': None,
'upload_date': None,
}
+ info['title'] = info['id']
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
rootpage = self._download_webpage(rootURL, info['id'],
- errnote=u'Unable to download course info page')
-
- info['title'] = info['id']
+ errnote='Unable to download course info page')
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
- info['list'] = [
- {
- 'type': 'reference',
- 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
- }
- for cpage in links]
-
- results = []
- for entry in info['list']:
- assert entry['type'] == 'reference'
- results += self.extract(entry['url'])
- return results
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py
index c9359fafb..aa5964acb 100644
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dl/extractor/sztvhu.py
@@ -1,27 +1,24 @@
# -*- coding: utf-8 -*-
-
-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import determine_ext
class SztvHuIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+ _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
_TEST = {
- u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
- u'file': u'20130909.mp4',
- u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
- u'info_dict': {
- u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
- u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+ 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+ 'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+ 'info_dict': {
+ 'id': '20130909',
+ 'ext': 'mp4',
+ 'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+ 'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
},
- u'skip': u'Service temporarily disabled as of 2013-11-20'
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_file = self._search_regex(
r'file: "...:(.*?)",', webpage, 'video file')
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
'id': video_id,
'url': video_url,
'title': title,
- 'ext': determine_ext(video_url),
'description': description,
'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py
index 2f77e3898..6be3774b7 100644
--- a/youtube_dl/extractor/vh1.py
+++ b/youtube_dl/extractor/vh1.py
@@ -121,4 +121,7 @@ class VH1IE(MTVIE):
idoc = self._download_xml(
doc_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
- return [self._get_video_info(item) for item in idoc.findall('.//item')]
+ return self.playlist_result(
+ [self._get_video_info(item) for item in idoc.findall('.//item')],
+ playlist_id=video_id,
+ )
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c40cd376d..b4617fbad 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -61,7 +61,7 @@ class JSInterpreter(object):
pass
m = re.match(
- r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+ r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
expr)
if m:
variable = m.group('var')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 94b496dd0..5be7cf992 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -71,10 +71,10 @@ def preferredencoding():
def write_json_file(obj, fn):
- """ Encode obj as JSON and write it to fn, atomically """
+ """ Encode obj as JSON and write it to fn, atomically if possible """
fn = encodeFilename(fn)
- if sys.version_info < (3, 0):
+ if sys.version_info < (3, 0) and sys.platform != 'win32':
encoding = get_filesystem_encoding()
# os.path.basename returns a bytes object, but NamedTemporaryFile
# will fail if the filename contains non ascii characters unless we
@@ -108,6 +108,13 @@ def write_json_file(obj, fn):
try:
with tf:
json.dump(obj, tf)
+ if sys.platform == 'win32':
+ # Need to remove existing file on Windows, else os.rename raises
+ # WindowsError or FileExistsError.
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
os.rename(tf.name, fn)
except:
try:
@@ -413,6 +420,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
pass # Python < 3.4
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
@@ -427,7 +435,13 @@ class ExtractorError(Exception):
if cause:
msg += ' (caused by %r)' % cause
if not expected:
- msg = msg + '; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
+ if ytdl_is_updateable():
+ update_cmd = 'type youtube-dl -U to update'
+ else:
+ update_cmd = 'see https://yt-dl.org/update on how to update'
+ msg += '; please report this issue on https://yt-dl.org/bug .'
+ msg += ' Make sure you are using the latest version; %s.' % update_cmd
+ msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
super(ExtractorError, self).__init__(msg)
self.traceback = tb
@@ -1412,3 +1426,10 @@ def is_outdated_version(version, limit, assume_new=True):
return version_tuple(version) < version_tuple(limit)
except ValueError:
return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if youtube-dl can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 34bf665ad..a283afbe3 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.11.16'
+__version__ = '2014.11.21.1'