diff options
-rw-r--r-- | README.md | 13 | ||||
-rw-r--r-- | test/test_playlists.py | 6 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 10 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/ard.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/blinkx.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/bloomberg.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/br.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 19 | ||||
-rw-r--r-- | youtube_dl/extractor/francetv.py | 36 | ||||
-rw-r--r-- | youtube_dl/extractor/gamestar.py | 74 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/godtube.py | 58 | ||||
-rw-r--r-- | youtube_dl/extractor/streamcloud.py | 30 | ||||
-rw-r--r-- | youtube_dl/extractor/swrmediathek.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/vevo.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/vidme.py | 68 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
19 files changed, 327 insertions, 52 deletions
@@ -46,15 +46,15 @@ which means you can modify it, redistribute it or use it however you like. an empty string (--proxy "") for direct connection --socket-timeout None Time to wait before giving up, in seconds - --bidi-workaround Work around terminals that lack - bidirectional text support. Requires bidiv - or fribidi executable in PATH --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let - youtube-dl guess. The default value "error" - just throws an error. + youtube-dl guess ("auto_warning" to emit a + warning when guessing). "error" just throws + an error. The default value "fixup_error" + repairs broken URLs, but emits an error if + this is not possible instead of searching. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user @@ -213,6 +213,9 @@ which means you can modify it, redistribute it or use it however you like. --add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH ## Video Format Options: -f, --format FORMAT video format code, specify the order of diff --git a/test/test_playlists.py b/test/test_playlists.py index c221c47b9..4f188345b 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -193,10 +193,10 @@ class TestPlaylists(unittest.TestCase): def test_bandcamp_album(self): dl = FakeYDL() ie = BandcampAlbumIE(dl) - result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') + result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave') self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Nightmare Night EP') - assertGreaterEqual(self, len(result['entries']), 4) + self.assertEqual(result['title'], 'Hierophany of the Open Grave') + assertGreaterEqual(self, len(result['entries']), 9) def test_smotri_community(self): dl = FakeYDL() diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6ff0be00f..429630ce5 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -253,12 +253,9 @@ def parseOpts(overrideArguments=None): '--socket-timeout', dest='socket_timeout', type=float, default=None, help=u'Time to wait before giving up, in seconds') general.add_option( - '--bidi-workaround', dest='bidi_workaround', action='store_true', - help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') - general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') general.add_option( '--ignore-config', action='store_true', @@ -386,6 +383,9 @@ def parseOpts(overrideArguments=None): dest='headers', action='append', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', ) + workarounds.add_option( + '--bidi-workaround', dest='bidi_workaround', action='store_true', + help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) @@ -709,7 +709,7 @@ def _real_main(argv=None): date = DateRange.day(opts.date) else: date = DateRange(opts.dateafter, opts.datebefore) - if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search: + if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search: parser.error(u'--default-search invalid; did you forget a colon (:) at the end?') # Do not download videos when there are audio-only formats diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index e6be6ae6c..71353f607 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -220,6 +220,7 @@ class F4mFD(FileDownloader): def real_download(self, filename, info_dict): man_url = info_dict['url'] + requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() self.report_destination(filename) @@ -233,8 +234,14 @@ class F4mFD(FileDownloader): doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] - formats = sorted(formats, key=lambda f: f[0]) - rate, media = formats[-1] + if requested_bitrate is None: + # get the best format + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + else: + rate, media = list(filter( + lambda f: int(f[0]) == requested_bitrate, formats))[0] + base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) metadata = base64.b64decode(media.find(_add_ns('metadata')).text) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2bf8bc5e8..f6c0ee795 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,9 +112,11 @@ from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE from .gameone import GameOneIE from .gamespot import GameSpotIE +from .gamestar import GameStarIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE +from .godtube import GodTubeIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -346,6 +348,7 @@ from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE +from .vidme import VidmeIE from .vimeo import ( VimeoIE, VimeoChannelIE, diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 30a85c8c1..957bdefcb 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,6 +8,8 @@ from ..utils import ( determine_ext, ExtractorError, qualities, + compat_urllib_parse_urlparse, + compat_urllib_parse, ) @@ -44,6 +46,9 @@ class ARDIE(InfoExtractor): else: video_id = m.group('video_id') + urlp = compat_urllib_parse_urlparse(url) + url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl() + webpage = self._download_webpage(url, video_id) title = self._html_search_regex( diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 7d558e262..3e461e715 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor): 'height': int(m['h']), }) elif m['type'] == 'original': - duration = m['d'] + duration = float(m['d']) elif m['type'] == 'youtube': yt_id = m['link'] self.to_screen('Youtube video detected: %s' % yt_id) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 25fb79e14..c51a97ce4 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor): _TEST = { 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', - 'md5': '7bf08858ff7c203c870e8a6190e221e5', + # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', @@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor): return { 'id': name.split('-')[-1], 'title': title, - 'url': f4m_url, - 'ext': 'flv', + 'formats': self._extract_f4m_formats(f4m_url, name), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index f7f2f713a..86f0c2861 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + parse_duration, ) @@ -22,8 +23,9 @@ class BRIE(InfoExtractor): 'info_dict': { 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', 'ext': 'mp4', - 'title': 'Am 1. und 2. August in Oberammergau', - 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', + 'title': 'Wenn das Traditions-Theater wackelt', + 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt', + 'duration': 34, } }, { @@ -34,6 +36,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Über den Pass', 'description': 'Die Eroberung der Alpen: Über den Pass', + 'duration': 2588, } }, { @@ -44,6 +47,7 @@ class BRIE(InfoExtractor): 'ext': 'aac', 'title': '"Keine neuen Schulden im nächsten Jahr"', 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"', + 'duration': 64, } }, { @@ -54,6 +58,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Umweltbewusster Häuslebauer', 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer', + 'duration': 116, } }, { @@ -64,6 +69,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Folge 1 - Metaphysik', 'description': 'Kant für Anfänger: Folge 1 - Metaphysik', + 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20140117', } @@ -84,6 +90,7 @@ class BRIE(InfoExtractor): media = { 'id': xml_media.get('externalId'), 'title': xml_media.find('title').text, + 'duration': parse_duration(xml_media.find('duration').text), 'formats': self._extract_formats(xml_media.find('assets')), 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')), 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()), diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 52c00186e..342bfb8b3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..utils import ( clean_html, compiled_regex_type, ExtractorError, + int_or_none, RegexNotFoundError, sanitize_filename, unescapeHTML, @@ -590,6 +591,24 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) + def _extract_f4m_formats(self, manifest_url, video_id): + manifest = self._download_xml( + manifest_url, video_id, 'Downloading f4m manifest', + 'Unable to download f4m manifest') + + formats = [] + for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'): + formats.append({ + 'url': manifest_url, + 'ext': 'flv', + 'tbr': int_or_none(media_el.attrib.get('bitrate')), + 'width': int_or_none(media_el.attrib.get('width')), + 'height': int_or_none(media_el.attrib.get('height')), + }) + self._sort_formats(formats) + + return formats + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 1fbe6d175..1b0e8e5d5 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor): + video_id, video_id, 'Downloading XML config') manifest_url = info.find('videos/video/url').text - video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') - video_url = video_url.replace('/z/', '/i/') + manifest_url = manifest_url.replace('/z/', '/i/') + + if manifest_url.startswith('rtmp'): + formats = [{'url': manifest_url, 'ext': 'flv'}] + else: + formats = [] + available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats') + for index, format_descr in enumerate(available_formats.split(',')): + format_info = { + 'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index), + 'ext': 'mp4', + } + m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr) + if m_resolution is not None: + format_info.update({ + 'width': int(m_resolution.group('width')), + 'height': int(m_resolution.group('height')), + }) + formats.append(format_info) + thumbnail_path = info.find('image').text - return {'id': video_id, - 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4', - 'url': video_url, - 'title': info.find('titre').text, - 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), - 'description': info.find('synopsis').text, - } + return { + 'id': video_id, + 'title': info.find('titre').text, + 'formats': formats, + 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), + 'description': info.find('synopsis').text, + } class PluzzIE(FranceTVBaseInfoExtractor): diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py new file mode 100644 index 000000000..50f8fc7e7 --- /dev/null +++ b/youtube_dl/extractor/gamestar.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + str_to_int, + unified_strdate, +) + + +class GameStarIE(InfoExtractor): + _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _TEST = { + 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', + 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', + 'info_dict': { + 'id': '76110', + 'ext': 'mp4', + 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', + 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.', + 'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg', + 'upload_date': '20140728', + 'duration': 17 + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + og_title = self._og_search_title(webpage) + title = og_title.replace(' - Video bei GameStar.de', '').strip() + + url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id + + description = self._og_search_description(webpage).strip() + + thumbnail = self._proto_relative_url( + self._og_search_thumbnail(webpage), scheme='http:') + + upload_date = unified_strdate(self._html_search_regex( + r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ', + webpage, 'upload_date', fatal=False)) + + duration = parse_duration(self._html_search_regex( + r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration', + fatal=False)) + + view_count = str_to_int(self._html_search_regex( + r' Zuschauer: ([0-9\.]+) ', webpage, + 'view_count', fatal=False)) + + comment_count = int_or_none(self._html_search_regex( + r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count', + fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'ext': 'mp4', + 'thumbnail': thumbnail, + 'description': description, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9db27f9aa..bcb076594 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -383,13 +383,13 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'error' + default_search = 'fixup_error' - if default_search in ('auto', 'auto_warning'): + if default_search in ('auto', 'auto_warning', 'fixup_error'): if '/' in url: self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) - else: + elif default_search != 'fixup_error': if default_search == 'auto_warning': if re.match(r'^(?:url|URL)$', url): raise ExtractorError( @@ -399,7 +399,8 @@ class GenericIE(InfoExtractor): self._downloader.report_warning( 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) - elif default_search == 'error': + + if default_search in ('error', 'fixup_error'): raise ExtractorError( ('%r is not a valid URL. ' 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube' diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py new file mode 100644 index 000000000..73bd6d890 --- /dev/null +++ b/youtube_dl/extractor/godtube.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, +) + + +class GodTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)' + _TESTS = [ + { + 'url': 'https://www.godtube.com/watch/?v=0C0CNNNU', + 'md5': '77108c1e4ab58f48031101a1a2119789', + 'info_dict': { + 'id': '0C0CNNNU', + 'ext': 'mp4', + 'title': 'Woman at the well.', + 'duration': 159, + 'timestamp': 1205712000, + 'uploader': 'beverlybmusic', + 'upload_date': '20080317', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + config = self._download_xml( + 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(), + video_id, 'Downloading player config XML') + + video_url = config.find('.//file').text + uploader = config.find('.//author').text + timestamp = parse_iso8601(config.find('.//date').text) + duration = parse_duration(config.find('.//duration').text) + thumbnail = config.find('.//image').text + + media = self._download_xml( + 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML') + + title = media.find('.//title').text + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + } diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 9faf3a5e3..172def221 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import unicode_literals + import re import time @@ -10,18 +12,18 @@ from ..utils import ( class StreamcloudIE(InfoExtractor): - IE_NAME = u'streamcloud.eu' + IE_NAME = 'streamcloud.eu' _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html' _TEST = { - u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', - u'file': u'skp9j99s4bpz.mp4', - u'md5': u'6bea4c7fa5daaacc2a946b7146286686', - u'info_dict': { - u'title': u'youtube-dl test video \'/\\ ä ↭', - u'duration': 9, + 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', + 'md5': '6bea4c7fa5daaacc2a946b7146286686', + 'info_dict': { + 'id': 'skp9j99s4bpz', + 'ext': 'mp4', + 'title': 'youtube-dl test video \'/\\ ä ↭', }, - u'skip': u'Only available from the EU' + 'skip': 'Only available from the EU' } def _real_extract(self, url): @@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor): req = compat_urllib_request.Request(url, post, headers) webpage = self._download_webpage( - req, video_id, note=u'Downloading video page ...') + req, video_id, note='Downloading video page ...') title = self._html_search_regex( - r'<h1[^>]*>([^<]+)<', webpage, u'title') + r'<h1[^>]*>([^<]+)<', webpage, 'title') video_url = self._search_regex( - r'file:\s*"([^"]+)"', webpage, u'video URL') - duration_str = self._search_regex( - r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False) - duration = None if duration_str is None else int(duration_str) + r'file:\s*"([^"]+)"', webpage, 'video URL') thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False) + r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) return { 'id': video_id, 'title': title, 'url': video_url, - 'duration': duration, 'thumbnail': thumbnail, } diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 6c688c520..5d9d70367 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class SWRMediathekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6', @@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor): 'uploader': 'SWR 2', 'uploader_id': '284670', } + }, { + 'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6', + 'md5': '881531487d0633080a8cc88d31ef896f', + 'info_dict': { + 'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6', + 'ext': 'mp4', + 'title': 'Familienspaß am Bodensee', + 'description': 'md5:0b591225a32cfde7be1629ed49fe4315', + 'thumbnail': 're:http://.*\.jpg', + 'duration': 1784, + 'upload_date': '20140727', + 'uploader': 'SWR Fernsehen BW', + 'uploader_id': '281130', + } }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index eada13ce9..d2ffd1b6b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -177,6 +177,7 @@ class VevoIE(InfoExtractor): self._downloader.report_warning( 'Cannot download SMIL information, falling back to JSON ..') + self._sort_formats(formats) timestamp_ms = int(self._search_regex( r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py new file mode 100644 index 000000000..5c89824c1 --- /dev/null +++ b/youtube_dl/extractor/vidme.py @@ -0,0 +1,68 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + str_to_int, +) + + +class VidmeIE(InfoExtractor): + _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)' + _TEST = { + 'url': 'https://vid.me/QNB', + 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', + 'info_dict': { + 'id': 'QNB', + 'ext': 'mp4', + 'title': 'Fishing for piranha - the easy way', + 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871', + 'duration': 119.92, + 'timestamp': 1406313244, + 'upload_date': '20140725', + 'thumbnail': 're:^https?://.*\.jpg', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage, default='') + thumbnail = self._og_search_thumbnail(webpage) + timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False)) + width = int_or_none(self._og_search_property('video:width', webpage, fatal=False)) + height = int_or_none(self._og_search_property('video:height', webpage, fatal=False)) + duration = float_or_none(self._html_search_regex( + r'data-duration="([^"]+)"', webpage, 'duration', fatal=False)) + view_count = str_to_int(self._html_search_regex( + r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) + like_count = str_to_int(self._html_search_regex( + r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">', + webpage, 'like count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( + r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">', + webpage, 'comment count', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'width': width, + 'height': height, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'comment_count': comment_count, + } diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e77494595..6e7d56cf7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.25.1' +__version__ = '2014.07.30' |