diff options
59 files changed, 1454 insertions, 302 deletions
@@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 4b56137ce..2bc81f020 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - def test_youtube_truncated(self): - self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) - def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) diff --git a/test/test_playlists.py b/test/test_playlists.py index 465b07b9e..3a88cf270 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,8 +28,9 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, LivestreamIE, + LivestreamOriginalIE, NHLVideocenterIE, BambuserChannelIE, BandcampAlbumIE, @@ -40,6 +41,7 @@ from youtube_dl.extractor import ( KhanAcademyIE, EveryonesMixtapeIE, RutubeChannelIE, + RutubePersonIE, GoogleSearchIE, GenericIE, TEDIE, @@ -114,10 +116,10 @@ class TestPlaylists(unittest.TestCase): def test_ustream_channel(self): dl = FakeYDL() ie = UstreamChannelIE(dl) - result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') + result = ie.extract('http://www.ustream.tv/channel/channeljapan') self.assertIsPlaylist(result) - self.assertEqual(result['id'], '5124905') - self.assertTrue(len(result['entries']) >= 6) + self.assertEqual(result['id'], '10874166') + self.assertTrue(len(result['entries']) >= 54) def test_soundcloud_set(self): dl = FakeYDL() @@ -135,6 +137,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_likes(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/likes') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 1) + def test_soundcloud_playlist(self): dl = FakeYDL() ie = SoundcloudPlaylistIE(dl) @@ -154,6 +164,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) + def test_livestreamoriginal_folder(self): + dl = FakeYDL() + ie = LivestreamOriginalIE(dl) + result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertTrue(len(result['entries']) >= 28) + def test_nhl_videocenter(self): dl = FakeYDL() ie = NHLVideocenterIE(dl) @@ -256,10 +274,18 @@ class TestPlaylists(unittest.TestCase): def test_rutube_channel(self): dl = FakeYDL() ie = RutubeChannelIE(dl) - result = ie.extract('http://rutube.ru/tags/video/1409') + result = ie.extract('http://rutube.ru/tags/video/1800/') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '1800') + self.assertTrue(len(result['entries']) >= 68) + + def test_rutube_person(self): + dl = FakeYDL() + ie = RutubePersonIE(dl) + result = ie.extract('http://rutube.ru/video/person/313878/') self.assertIsPlaylist(result) - self.assertEqual(result['id'], '1409') - self.assertTrue(len(result['entries']) >= 34) + self.assertEqual(result['id'], '313878') + self.assertTrue(len(result['entries']) >= 37) def test_multiple_brightcove_videos(self): # https://github.com/rg3/youtube-dl/issues/2283 @@ -361,13 +387,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) - def test_TeacherTubeClassroom(self): + def test_TeacherTubeUser(self): dl = FakeYDL() - ie = TeacherTubeClassroomIE(dl) - result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + ie = TeacherTubeUserIE(dl) + result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 179) if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + u'js', + u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + ), ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) - src_sig = compat_str(string.printable[:sig_length]) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba986a..3dff723b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1e01432d2..31ed63fcc 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -59,6 +59,7 @@ __authors__ = ( 'Adam Thalhammer', 'Georg Jähnig', 'Ralf Haring', + 'Koki Takahashi', ) __license__ = 'Public Domain' @@ -269,7 +270,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') general.add_option( '--ignore-config', action='store_true', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4b7900b4f..44e1708ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE +from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE @@ -63,6 +64,7 @@ from .dailymotion import ( from .daum import DaumIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .drtv import DRTVIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE @@ -109,6 +111,8 @@ from .gdcvault import GDCVaultIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE @@ -146,7 +150,11 @@ from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE -from .livestream import LivestreamIE, LivestreamOriginalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) from .lynda import ( LyndaIE, LyndaCourseIE @@ -164,11 +172,13 @@ from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE from .morningstar import MorningstarIE +from .motherless import MotherlessIE from .motorsport import MotorsportIE from .moviezine import MoviezineIE from .movshare import MovShareIE from .mtv import ( MTVIE, + MTVServicesEmbeddedIE, MTVIggyIE, ) from .musicplayon import MusicPlayOnIE @@ -195,6 +205,7 @@ from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE +from .npo import NPOIE from .nrk import ( NRKIE, NRKTVIE, @@ -216,6 +227,7 @@ from .pornotube import PornotubeIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .radiofrance import RadioFranceIE +from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE @@ -252,6 +264,7 @@ from .soundcloud import ( SoundcloudUserIE, SoundcloudPlaylistIE ) +from .soundgasm import SoundgasmIE from .southparkstudios import ( SouthParkStudiosIE, SouthparkDeIE, @@ -271,7 +284,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .teachertube import ( TeacherTubeIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE @@ -331,6 +344,7 @@ from .vine import ( ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE @@ -345,6 +359,7 @@ from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE +from .wrzuta import WrzutaIE from .xbef import XBefIE from .xhamster import XHamsterIE from .xnxx import XNXXIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..34f0cd49b --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + qualities, + determine_ext, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + typ = mobj.group('typ') + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + if typ == 'film': + video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') + else: + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + + xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + + video = xml.find('.//AcVisionVideo').attrib + quality = qualities(['ld', 'md', 'hd']) + + formats = [] + for k, v in video.items(): + if re.match(r'.+_path', k): + format_id = k.split('_')[0] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': v, + 'ext': determine_ext(v), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['videoTitle'], + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 2b019daa9..31f0d417c 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,22 +1,24 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class AnitubeIE(InfoExtractor): - IE_NAME = u'anitube.se' + IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' _TEST = { - u'url': u'http://www.anitube.se/video/36621', - u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', - u'file': u'36621.mp4', - u'info_dict': { - u'id': u'36621', - u'ext': u'mp4', - u'title': u'Recorder to Randoseru 01', + 'url': 'http://www.anitube.se/video/36621', + 'md5': '59d0eeae28ea0bc8c05e7af429998d43', + 'info_dict': { + 'id': '36621', + 'ext': 'mp4', + 'title': 'Recorder to Randoseru 01', + 'duration': 180.19, }, - u'skip': u'Blocked in the US', + 'skip': 'Blocked in the US', } def _real_extract(self, url): @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', - webpage, u'key') + key = self._html_search_regex( + r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') - config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, - key) + config_xml = self._download_xml( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) video_title = config_xml.find('title').text + thumbnail = config_xml.find('image').text + duration = float(config_xml.find('duration').text) formats = [] video_url = config_xml.find('file') @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): return { 'id': video_id, 'title': video_title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats } diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index c6d22c029..b36a4d46a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -56,7 +56,18 @@ class ARDIE(InfoExtractor): raise ExtractorError('This video is only available after 20:00') formats = [] + for s in streams: + if type(s['_stream']) == list: + for index, url in enumerate(s['_stream'][::-1]): + quality = s['_quality'] + index + formats.append({ + 'quality': quality, + 'url': url, + 'format_id': '%s-%s' % (determine_ext(url), quality) + }) + continue + format = { 'quality': s['_quality'], 'url': s['_stream'], diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b528a9ec5..9591bad8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): formats = [{ 'forma_id': q.attrib['quality'], - 'url': q.text, + # The playpath starts at 'mp4:', if we don't manually + # split the url, rtmpdump will incorrectly parse them + 'url': q.text.split('mp4:', 1)[0], + 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], 'ext': 'flv', 'quality': 2 if q.attrib['quality'] == 'hd' else 1, } for q in config.findall('./urls/url')] @@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor): if not formats: # Some videos are only available in the 'Originalversion' # they aren't tagged as being in French or German - if all(f['versionCode'] == 'VO' for f in all_formats): + if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): formats = all_formats else: raise ExtractorError(u'The formats list is empty') @@ -189,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE): _TEST = { 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', 'info_dict': { - 'id': '050940-003', + 'id': '5201', 'ext': 'mp4', 'title': 'Les champignons au secours de la planète', + 'upload_date': '20131101', }, } diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 45067b944..0d5889f5d 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -13,7 +13,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/' + _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -56,7 +56,7 @@ class BiliBiliIE(InfoExtractor): 'thumbnailUrl', video_code, 'thumbnail', fatal=False) player_params = compat_parse_qs(self._html_search_regex( - r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"', + r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"', webpage, 'player params')) if 'cid' in player_params: diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index d4da08991..acfc4ad73 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -15,7 +15,7 @@ from ..utils import ( class BlipTVIE(SubtitlesInfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))' + _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))' _TESTS = [ { diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index b5b56ff00..993360714 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -17,15 +17,13 @@ class BRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html', - 'md5': 'c4f83cf0f023ba5875aba0bf46860df2', + 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html', + 'md5': '93556dd2bcb2948d9259f8670c516d59', 'info_dict': { - 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532', + 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', 'ext': 'mp4', - 'title': 'Feiern und Verzichten', - 'description': 'Anselm Grün: Feiern und Verzichten', - 'uploader': 'BR/Birgit Baier', - 'upload_date': '20140301', + 'title': 'Am 1. und 2. August in Oberammergau', + 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', } }, { diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 3c02c297a..419951b62 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -15,6 +15,7 @@ from ..utils import ( compat_urllib_request, compat_parse_qs, + determine_ext, ExtractorError, unsmuggle_url, unescapeHTML, @@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor): { # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', - 'file': '2371591881001.mp4', 'md5': '5423e113865d26e40624dce2e4b45d95', 'note': 'Test Brightcove downloads and detection in GenericIE', 'info_dict': { + 'id': '2371591881001', + 'ext': 'mp4', 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 'uploader': '8TV', 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', @@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor): { # From http://medianetwork.oracle.com/video/player/1785452137001 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', - 'file': '1785452137001.flv', 'info_dict': { + 'id': '1785452137001', + 'ext': 'flv', 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', 'uploader': 'Oracle', @@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor): 'description': 'md5:363109c02998fee92ec02211bd8000df', 'uploader': 'National Ballet of Canada', }, - } + }, + { + # test flv videos served by akamaihd.net + # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', + # The md5 checksum changes on each download + 'info_dict': { + 'id': '2996102916001', + 'ext': 'flv', + 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'uploader': 'Red Bull TV', + 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + }, + }, ] @classmethod @@ -187,7 +203,7 @@ class BrightcoveIE(InfoExtractor): webpage = self._download_webpage(req, video_id) self.report_extraction(video_id) - info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') + info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') info = json.loads(info)['data'] video_info = info['programmedContent']['videoPlayer']['mediaDTO'] video_info['_youtubedl_adServerURL'] = info.get('adServerURL') @@ -219,12 +235,26 @@ class BrightcoveIE(InfoExtractor): renditions = video_info.get('renditions') if renditions: - renditions = sorted(renditions, key=lambda r: r['size']) - info['formats'] = [{ - 'url': rend['defaultURL'], - 'height': rend.get('frameHeight'), - 'width': rend.get('frameWidth'), - } for rend in renditions] + formats = [] + for rend in renditions: + url = rend['defaultURL'] + if rend['remote']: + # This type of renditions are served through akamaihd.net, + # but they don't use f4m manifests + url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' + ext = 'flv' + else: + ext = determine_ext(url) + size = rend.get('size') + formats.append({ + 'url': url, + 'ext': ext, + 'height': rend.get('frameHeight'), + 'width': rend.get('frameWidth'), + 'filesize': size if size != 0 else None, + }) + self._sort_formats(formats) + info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: info.update({ 'url': video_info['FLVFullLengthURL'], diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index ba4d73ab8..8af0abade 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode').rpartition('/')[-1] + epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) if len(mMovieParams) == 0: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 49e75405e..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -459,6 +460,9 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) + def _og_search_url(self, html, **kargs): + return self._og_search_property('url', html, **kargs) + def _html_search_meta(self, name, html, display_name=None, fatal=False): if display_name is None: display_name = name @@ -572,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -615,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 55216201f..5d0bfe454 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): return { 'id': video_id, 'formats': formats, - 'uploader': info['owner_screenname'], + 'uploader': info['owner.screenname'], 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'subtitles': video_subtitles, diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 2ae6ecc12..554df6735 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -7,9 +7,9 @@ from .common import InfoExtractor class DiscoveryIE(InfoExtractor): - _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' + _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' _TEST = { - 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', + 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'md5': 'e12614f9ee303a6ccef415cb0793eba2', 'info_dict': { 'id': '614784', diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py new file mode 100644 index 000000000..cdccfd376 --- /dev/null +++ b/youtube_dl/extractor/drtv.py @@ -0,0 +1,91 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from .common import ExtractorError +from ..utils import parse_iso8601 + + +class DRTVIE(SubtitlesInfoExtractor): + _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)' + + _TEST = { + 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', + 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'info_dict': { + 'id': 'partiets-mand-7-8', + 'ext': 'mp4', + 'title': 'Partiets mand (7:8)', + 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', + 'timestamp': 1403047940, + 'upload_date': '20140617', + 'duration': 1299.040, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + programcard = self._download_json( + 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') + + data = programcard['Data'][0] + + title = data['Title'] + description = data['Description'] + timestamp = parse_iso8601(data['CreatedTime'][:-5]) + + thumbnail = None + duration = None + + restricted_to_denmark = False + + formats = [] + subtitles = {} + + for asset in data['Assets']: + if asset['Kind'] == 'Image': + thumbnail = asset['Uri'] + elif asset['Kind'] == 'VideoResource': + duration = asset['DurationInMilliseconds'] / 1000.0 + restricted_to_denmark = asset['RestrictedToDenmark'] + for link in asset['Links']: + target = link['Target'] + uri = link['Uri'] + formats.append({ + 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, + 'format_id': target, + 'ext': link['FileFormat'], + 'preference': -1 if target == 'HDS' else -2, + }) + subtitles_list = asset.get('SubtitlesList') + if isinstance(subtitles_list, list): + LANGS = { + 'Danish': 'dk', + } + for subs in subtitles_list: + lang = subs['Language'] + subtitles[LANGS.get(lang, lang)] = subs['Uri'] + + if not formats and restricted_to_denmark: + raise ExtractorError( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + + self._sort_formats(formats) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, subtitles), + } diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py index eccd8dde9..0993af1c9 100644 --- a/youtube_dl/extractor/firstpost.py +++ b/youtube_dl/extractor/firstpost.py @@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor): 'id': '1025403', 'ext': 'mp4', 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', + 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', } } @@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + page = self._download_webpage(url, video_id) + title = self._html_search_meta('twitter:title', page, 'title') + description = self._html_search_meta('twitter:description', page, 'title') + data = self._download_xml( 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, 'Downloading video XML') item = data.find('./playlist/item') thumbnail = item.find('./image').text - title = item.find('./title').text formats = [ { @@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor): return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'formats': formats, } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3105b47ab..f97b59845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -278,6 +278,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # MTVSercices embed + { + 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', + 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'info_dict': { + 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'ext': 'mp4', + 'title': 'Review', + 'description': 'Mario\'s life in the fast lane has never looked so good.', + }, + }, ] def report_download_webpage(self, video_id): @@ -372,7 +383,7 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'error' if default_search in ('auto', 'auto_warning'): if '/' in url: @@ -386,8 +397,13 @@ class GenericIE(InfoExtractor): expected=True) else: self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + elif default_search == 'error': + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) @@ -609,6 +625,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'VK') + # Look for embedded ivi player + mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ivi') + # Look for embedded Huffington Post player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) @@ -676,6 +697,14 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url, ie='Vulture') + # Look for embedded mtvservices player + mobj = re.search( + r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"', + webpage) + if mobj is not None: + url = unescapeHTML(mobj.group('url')) + return self.url_result(url, ie='MTVServicesEmbedded') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index cc29a7e5d..07d994b44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor): # Extract title # Get the first line for title - video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', - webpage, 'title', default='NA') + video_title = self._og_search_description(webpage).splitlines()[0] # Step 2, Simulate clicking the image box to launch video DOMAIN = 'https://plus.google.com/' diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py new file mode 100644 index 000000000..50ef54cce --- /dev/null +++ b/youtube_dl/extractor/gorillavid.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + + +class GorillaVidIE(InfoExtractor): + IE_DESC = 'GorillaVid.in and daclips.in' + _VALID_URL = r'''(?x) + https?://(?:www\.)? + (?:daclips\.in|gorillavid\.in)/ + (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? + ''' + + _TESTS = [{ + 'url': 'http://gorillavid.in/06y9juieqpmi', + 'md5': '5ae4a3580620380619678ee4875893ba', + 'info_dict': { + 'id': '06y9juieqpmi', + 'ext': 'flv', + 'title': 'Rebecca Black My Moment Official Music Video Reaction', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', + 'md5': 'c9e293ca74d46cad638e199c3f3fe604', + 'info_dict': { + 'id': 'z08zf8le23c6', + 'ext': 'mp4', + 'title': 'Say something nice', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://daclips.in/3rso4kdn6f9m', + 'info_dict': { + 'id': '3rso4kdn6f9m', + 'ext': 'mp4', + 'title': 'Micro Pig piglets ready on 16th July 2009', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', webpage)) + + if fields['op'] == 'download1': + post = compat_urllib_parse.urlencode(fields) + + req = compat_urllib_request.Request(url, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + + webpage = self._download_webpage(req, video_id, 'Downloading video page') + + title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title') + thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail') + url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': determine_ext(url), + 'quality': 1, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py new file mode 100644 index 000000000..7bca21ad0 --- /dev/null +++ b/youtube_dl/extractor/goshgay.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + str_to_int, + ExtractorError, +) +import json + + +class GoshgayIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') + + player_config = self._search_regex( + r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 528be1524..4027deb70 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -14,7 +14,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)' _TESTS = [ # Single movie diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 7a431a274..8d9491f23 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', - 'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg', + 'thumbnail': 're:http://.*\.jpg', 'upload_date': '20140130', } } diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 1dcd1fb2d..2c100d424 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -6,31 +8,35 @@ from ..utils import ( compat_urllib_parse_urlparse, compat_urlparse, xpath_with_ns, + compat_str, + orderedSet, ) class LivestreamIE(InfoExtractor): - IE_NAME = u'livestream' + IE_NAME = 'livestream' _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' _TEST = { - u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', - u'file': u'4719370.mp4', - u'md5': u'0d2186e3187d185a04b3cdd02b828836', - u'info_dict': { - u'title': u'Live from Webster Hall NYC', - u'upload_date': u'20121012', + 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', + 'md5': '53274c76ba7754fb0e8d072716f2292b', + 'info_dict': { + 'id': '4719370', + 'ext': 'mp4', + 'title': 'Live from Webster Hall NYC', + 'upload_date': '20121012', } } def _extract_video_info(self, video_data): video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') - return {'id': video_data['id'], - 'url': video_url, - 'ext': 'mp4', - 'title': video_data['caption'], - 'thumbnail': video_data['thumbnail_url'], - 'upload_date': video_data['updated_at'].replace('-','')[:8], - } + return { + 'id': compat_str(video_data['id']), + 'url': video_url, + 'ext': 'mp4', + 'title': video_data['caption'], + 'thumbnail': video_data['thumbnail_url'], + 'upload_date': video_data['updated_at'].replace('-', '')[:8], + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -40,43 +46,43 @@ class LivestreamIE(InfoExtractor): if video_id is None: # This is an event page: - config_json = self._search_regex(r'window.config = ({.*?});', - webpage, u'window config') + config_json = self._search_regex( + r'window.config = ({.*?});', webpage, 'window config') info = json.loads(config_json)['event'] videos = [self._extract_video_info(video_data['data']) - for video_data in info['feed']['data'] if video_data['type'] == u'video'] + for video_data in info['feed']['data'] if video_data['type'] == 'video'] return self.playlist_result(videos, info['id'], info['full_name']) else: - og_video = self._og_search_video_url(webpage, name=u'player url') + og_video = self._og_search_video_url(webpage, 'player url') query_str = compat_urllib_parse_urlparse(og_video).query query = compat_urlparse.parse_qs(query_str) api_url = query['play_url'][0].replace('.smil', '') - info = json.loads(self._download_webpage(api_url, video_id, - u'Downloading video info')) + info = json.loads(self._download_webpage( + api_url, video_id, 'Downloading video info')) return self._extract_video_info(info) # The original version of Livestream uses a different system class LivestreamOriginalIE(InfoExtractor): - IE_NAME = u'livestream:original' - _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' + IE_NAME = 'livestream:original' + _VALID_URL = r'''(?x)https?://www\.livestream\.com/ + (?P<user>[^/]+)/(?P<type>video|folder) + (?:\?.*?Id=|/)(?P<id>.*?)(&|$) + ''' _TEST = { - u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - u'info_dict': { - u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - u'ext': u'flv', - u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', + 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', + 'info_dict': { + 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', + 'ext': 'flv', + 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', }, - u'params': { + 'params': { # rtmp - u'skip_download': True, + 'skip_download': True, }, } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - user = mobj.group('user') + def _extract_video(self, user, video_id): api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) info = self._download_xml(api_url, video_id) @@ -84,7 +90,7 @@ class LivestreamOriginalIE(InfoExtractor): ns = {'media': 'http://search.yahoo.com/mrss'} thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] # Remove the extension and number from the path (like 1.jpg) - path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path') + path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') return { 'id': video_id, @@ -94,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail_url, } + + def _extract_folder(self, url, folder_id): + webpage = self._download_webpage(url, folder_id) + urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage)) + + return { + '_type': 'playlist', + 'id': folder_id, + 'entries': [{ + '_type': 'url', + 'url': video_url, + } for video_url in urls], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + id = mobj.group('id') + user = mobj.group('user') + url_type = mobj.group('type') + if url_type == 'folder': + return self._extract_folder(url, id) + else: + return self._extract_video(user, id) + + +# The server doesn't support HEAD request, the generic extractor can't detect +# the redirection +class LivestreamShortenerIE(InfoExtractor): + IE_NAME = 'livestream:shortener' + IE_DESC = False # Do not list + _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + id = mobj.group('id') + webpage = self._download_webpage(url, id) + + return { + '_type': 'url', + 'url': self._og_search_url(webpage), + } diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py new file mode 100644 index 000000000..6229b2173 --- /dev/null +++ b/youtube_dl/extractor/motherless.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import datetime +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, +) + + +class MotherlessIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)' + _TESTS = [ + { + 'url': 'http://motherless.com/AC3FFE1', + 'md5': '5527fef81d2e529215dad3c2d744a7d9', + 'info_dict': { + 'id': 'AC3FFE1', + 'ext': 'flv', + 'title': 'Fucked in the ass while playing PS3', + 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], + 'upload_date': '20100913', + 'uploader_id': 'famouslyfuckedup', + 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, + } + }, + { + 'url': 'http://motherless.com/532291B', + 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', + 'info_dict': { + 'id': '532291B', + 'ext': 'mp4', + 'title': 'Amazing girl playing the omegle game, PERFECT!', + 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'], + 'upload_date': '20140622', + 'uploader_id': 'Sulivana7x', + 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, + } + } + ] + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') + + video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url') + age_limit = self._rta_search(webpage) + + view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count') + + upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date') + if 'Ago' in upload_date: + days = int(re.search(r'([0-9]+)', upload_date).group(1)) + upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') + else: + upload_date = unified_strdate(upload_date) + + like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count') + + comment_count = webpage.count('class="media-comment-contents"') + uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id') + + categories = self._html_search_meta('keywords', webpage) + if categories: + categories = [cat.strip() for cat in categories.split(',')] + + return { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'thumbnail': self._og_search_thumbnail(webpage), + 'categories': categories, + 'view_count': int_or_none(view_count.replace(',', '')), + 'like_count': int_or_none(like_count.replace(',', '')), + 'comment_count': comment_count, + 'age_limit': age_limit, + 'url': video_url, + } diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._search_regex( - r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') + r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') data = json.loads(data_json) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e5ca41b40..af9490ccc 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -22,6 +22,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor): base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' return base + m.group('finalid') + def _get_feed_url(self, uri): + return self._FEED_URL + def _get_thumbnail_url(self, uri, itemdoc): search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) thumb_node = itemdoc.find(search_path) @@ -136,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor): def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) + feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) - idoc = self._download_xml( - self._FEED_URL + '?' + data, video_id, + feed_url + '?' + data, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return [self._get_video_info(item) for item in idoc.findall('.//item')] @@ -160,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor): return self._get_videos_info(mgid) +class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): + IE_NAME = 'mtvservices:embedded' + _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)' + + _TEST = { + # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/ + 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906', + 'md5': 'cb349b21a7897164cede95bd7bf3fbb9', + 'info_dict': { + 'id': '1043906', + 'ext': 'mp4', + 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', + 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', + }, + } + + def _get_feed_url(self, uri): + video_id = self._id_from_uri(uri) + site_id = uri.replace(video_id, '') + config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id) + config_doc = self._download_xml(config_url, video_id) + feed_node = config_doc.find('.//feed') + feed_url = feed_node.text.strip().split('?')[0] + return feed_url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mgid = mobj.group('mgid') + return self._get_videos_info(mgid) + + class MTVIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 2fd5b8f04..551bd4d7a 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -4,18 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' _TEST = { - 'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', + 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 'info_dict': { - 'id': 'd156a237-a6e9-4111-a682-039995f721f1', + 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'flv', - 'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', - 'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', - 'duration': 20.04, + 'title': 'Телеканал CNN переместил город Славянск в Крым', + 'description': 'md5:419a8c9f03442bc0b0a794d689360335', + 'duration': 31.05, }, 'params': { # rtmp download @@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor): def ns(s): return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} + error_message = player.find(ns('./ErrorMessage')) + if error_message is not None: + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) + session_id = player.find(ns('./SessionId')).text media_info = player.find(ns('./Medias/MediaInfo')) title = media_info.find(ns('./Name')).text diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..c0c139b5d 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,10 +8,9 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - compat_str, - - ExtractorError, unified_strdate, + parse_duration, + int_or_none, ) @@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor): 'uploader_id': '2698420', 'upload_date': '20131123', 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', + 'duration': 33, }, 'params': { 'username': 'ydl.niconico@gmail.com', @@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' + _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' + # Determine whether the downloader uses authentication to download video + _AUTHENTICATE = False def _real_initialize(self): - self._login() + if self._downloader.params.get('username', None) is not None: + self._AUTHENTICATE = True + + if self._AUTHENTICATE: + self._login() def _login(self): (username, password) = self._get_login_info() - if username is None: - # Login is required - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) # Log in login_form_strs = { @@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, - video_id, 'Downloading flv info') + if self._AUTHENTICATE: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + video_id, 'Downloading flv info') + else: + # Get external player info + ext_player_info = self._download_webpage( + 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) + thumb_play_key = self._search_regex( + r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') + + # Get flv info + flv_info_data = compat_urllib_parse.urlencode({ + 'k': thumb_play_key, + 'v': video_id + }) + flv_info_request = compat_urllib_request.Request( + 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, + {'Content-Type': 'application/x-www-form-urlencoded'}) + flv_info_webpage = self._download_webpage( + flv_info_request, video_id, + note='Downloading flv info', errnote='Unable to download flv info') + video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information - video_title = video_info.find('.//title').text - video_extension = video_info.find('.//movie_type').text - video_format = video_extension.upper() - video_thumbnail = video_info.find('.//thumbnail_url').text - video_description = video_info.find('.//description').text - video_uploader_id = video_info.find('.//user_id').text - video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - video_view_count = video_info.find('.//view_counter').text - video_webpage_url = video_info.find('.//watch_url').text - - # uploader - video_uploader = video_uploader_id - url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id - try: - user_info = self._download_xml( - url, video_id, note='Downloading user information') - video_uploader = user_info.find('.//nickname').text - except ExtractorError as err: - self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err)) + title = video_info.find('.//title').text + extension = video_info.find('.//movie_type').text + video_format = extension.upper() + thumbnail = video_info.find('.//thumbnail_url').text + description = video_info.find('.//description').text + upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) + view_count = int_or_none(video_info.find('.//view_counter').text) + comment_count = int_or_none(video_info.find('.//comment_num').text) + duration = parse_duration(video_info.find('.//length').text) + webpage_url = video_info.find('.//watch_url').text + + if video_info.find('.//ch_id') is not None: + uploader_id = video_info.find('.//ch_id').text + uploader = video_info.find('.//ch_name').text + elif video_info.find('.//user_id') is not None: + uploader_id = video_info.find('.//user_id').text + uploader = video_info.find('.//user_nickname').text + else: + uploader_id = uploader = None return { 'id': video_id, 'url': video_real_url, - 'title': video_title, - 'ext': video_extension, + 'title': title, + 'ext': extension, 'format': video_format, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'uploader': video_uploader, - 'upload_date': video_upload_date, - 'uploader_id': video_uploader_id, - 'view_count': video_view_count, - 'webpage_url': video_webpage_url, + 'thumbnail': thumbnail, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'duration': duration, + 'webpage_url': webpage_url, } diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index c2e7b67c7..33daa0dec 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( - r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) + r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) youtube_id = post_view['videoExternalId'] title = post_view['title'] diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index d451cd1bf..da203538d 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -35,7 +35,7 @@ class NocoIE(InfoExtractor): video_id = mobj.group('id') medias = self._download_json( - 'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') + 'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') formats = [] @@ -43,7 +43,7 @@ class NocoIE(InfoExtractor): format_id = fmt['quality_key'] file = self._download_json( - 'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), + 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), video_id, 'Downloading %s video JSON' % format_id) file_url = file['file'] @@ -71,7 +71,7 @@ class NocoIE(InfoExtractor): self._sort_formats(formats) show = self._download_json( - 'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] + 'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] upload_date = unified_strdate(show['indexed']) uploader = show['partner_name'] diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py new file mode 100644 index 000000000..fbcbe1f40 --- /dev/null +++ b/youtube_dl/extractor/npo.py @@ -0,0 +1,62 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class NPOIE(InfoExtractor): + IE_NAME = 'npo.nl' + _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' + + _TEST = { + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'mp4', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + metadata = self._download_json( + 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, + video_id, + # We have to remove the javascript callback + transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) + ) + token_page = self._download_webpage( + 'http://ida.omroep.nl/npoplayer/i.js', + video_id, + note='Downloading token' + ) + token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') + streams_info = self._download_json( + 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), + video_id + ) + + stream_info = self._download_json( + streams_info['streams'][0] + '&type=json', + video_id, + 'Downloading stream info' + ) + + return { + 'id': video_id, + 'title': metadata['titel'], + 'ext': 'mp4', + 'url': stream_info['url'], + 'description': metadata['info'], + 'thumbnail': metadata['images'][-1]['url'], + 'upload_date': unified_strdate(metadata['gidsdatum']), + } diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1f066cf05..96f0ae1eb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,14 +72,14 @@ class NRKIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' + _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' _TESTS = [ { - 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014', + 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', 'info_dict': { - 'id': 'muhh48000314', + 'id': 'MUHH48000314', 'ext': 'flv', 'title': '20 spørsmål', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', @@ -141,4 +141,4 @@ class NRKTVIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index e4c4ad714..da64a1a7b 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -158,19 +158,19 @@ class ProSiebenSat1IE(InfoExtractor): _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', - r'clipId=(\d+)', + r'clip[iI]d=(\d+)', ] _TITLE_REGEXES = [ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<header class="clearfix">\s*<h3>(.+?)</h3>', r'<!-- start video -->\s*<h1>(.+?)</h1>', - r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>', + r'<h1 class="att-name">\s*(.+?)</h1>', ] _DESCRIPTION_REGEXES = [ r'<p itemprop="description">\s*(.+?)</p>', r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', - r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">', + r'<p class="att-description">\s*(.+?)\s*</p>', ] _UPLOAD_DATE_REGEXES = [ r'<meta property="og:published_time" content="(.+?)">', diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py new file mode 100644 index 000000000..ba3dd707f --- /dev/null +++ b/youtube_dl/extractor/rai.py @@ -0,0 +1,122 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, + compat_urllib_parse, +) + + +class RaiIE(SubtitlesInfoExtractor): + _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' + _TESTS = [ + { + 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': 'c064c0b2d09c278fb293116ef5d0a32d', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', + 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', + 'upload_date': '20140407', + 'duration': 6160, + } + }, + { + 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'md5': '8bb9c151924ce241b74dd52ef29ceafa', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'description': '', + 'upload_date': '20140612', + 'duration': 1758, + }, + 'skip': 'Error 404', + }, + { + 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', + 'md5': '35cf7c229f22eeef43e48b5cf923bef0', + 'info_dict': { + 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', + 'ext': 'mp4', + 'title': 'State of the Net, Antonella La Carpia: regole virali', + 'description': 'md5:b0ba04a324126903e3da7763272ae63c', + 'upload_date': '20140613', + }, + 'skip': 'Error 404', + }, + { + 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', + 'md5': '35694f062977fe6619943f08ed935730', + 'info_dict': { + 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', + 'ext': 'mp4', + 'title': 'Alluvione in Sardegna e dissesto idrogeologico', + 'description': 'Edizione delle ore 20:30 ', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + + title = media.get('name') + description = media.get('desc') + thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') + duration = parse_duration(media.get('length')) + uploader = media.get('author') + upload_date = unified_strdate(media.get('date')) + + formats = [] + + for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: + media_url = media.get(format_id) + if not media_url: + continue + formats.append({ + 'url': media_url, + 'format_id': format_id, + 'ext': 'mp4', + }) + + if self._downloader.params.get('listsubtitles', False): + page = self._download_webpage(url, video_id) + self._list_available_subtitles(video_id, page) + return + + subtitles = {} + if self._have_to_download_any_subtitles: + page = self._download_webpage(url, video_id) + subtitles = self.extract_subtitles(video_id, page) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } + + def _get_available_subtitles(self, video_id, webpage): + subtitles = {} + m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) + if m: + captions = m.group('captions') + STL_EXT = '.stl' + SRT_EXT = '.srt' + if captions.endswith(STL_EXT): + captions = captions[:-len(STL_EXT)] + SRT_EXT + subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) + return subtitles
\ No newline at end of file diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7aa100fb2..14ec9452d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' + _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') + resource = mobj.group('rsrc') + if resource is None: + resource = 'tracks' + elif resource == 'likes': + resource = 'favorites' url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') - base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader + base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) entries = [] for i in itertools.count(): data = compat_urllib_parse.urlencode({ 'offset': i * 50, + 'limit': 50, 'client_id': self._CLIENT_ID, }) new_entries = self._download_json( base_url + data, uploader, 'Downloading track page %s' % (i + 1)) - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) - if len(new_entries) < 50: + if len(new_entries) == 0: + self.to_screen('%s: End page received' % uploader) break + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py new file mode 100644 index 000000000..a4f8ce6c3 --- /dev/null +++ b/youtube_dl/extractor/soundgasm.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoundgasmIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' + _TEST = { + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'ytdl_Piano-sample', + 'description': 'Royalty Free Sample Music' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('title') + audio_title = mobj.group('user') + '_' + mobj.group('title') + webpage = self._download_webpage(url, display_id) + audio_url = self._html_search_regex( + r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') + audio_id = re.split('\/|\.', audio_url)[-2] + description = self._html_search_regex( + r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', + fatal=False) + + return { + 'id': audio_id, + 'display_id': display_id, + 'url': audio_url, + 'title': audio_title, + 'description': description + } diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 9156d7faf..340a38440 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,3 +1,4 @@ +# encoding: utf-8 from __future__ import unicode_literals import re @@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _TESTS = [{ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', - 'file': '1259285.mp4', 'md5': '2c2754212136f35fb4b19767d242f66e', 'info_dict': { + 'id': '1259285', + 'ext': 'mp4', 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', + 'description': 'md5:8029d8310232196eb235d27575a8b9f4', + 'duration': 49, }, - }, - { + }, { 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', - 'file': '1309159.mp4', 'md5': 'f2cdf638d7aa47654e251e1aee360af1', 'info_dict': { + 'id': '1309159', + 'ext': 'mp4', 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', + 'description': 'md5:c2322b65e58f385a820c10fa03b2d088', + 'duration': 983, + }, + }, { + 'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html', + 'md5': '54f58ba0e752e3c07bc2a26222dd0acf', + 'info_dict': { + 'id': '1502367', + 'ext': 'mp4', + 'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern', + 'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91', + 'duration': 42, }, }] @@ -30,18 +46,20 @@ class SpiegelIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex( + title = self._html_search_regex( r'<div class="module-title">(.*?)</div>', webpage, 'title') + description = self._html_search_meta('description', webpage, 'description') + + base_url = self._search_regex( + r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL') - xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' - idoc = self._download_xml( - xml_url, video_id, - note='Downloading XML', errnote='Failed to download XML') + xml_url = base_url + video_id + '.xml' + idoc = self._download_xml(xml_url, video_id) formats = [ { 'format_id': n.tag.rpartition('type')[2], - 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text, + 'url': base_url + n.find('./filename').text, 'width': int(n.find('./width').text), 'height': int(n.find('./height').text), 'abr': int(n.find('./audiobitrate').text), @@ -59,7 +77,8 @@ class SpiegelIE(InfoExtractor): return { 'id': video_id, - 'title': video_title, + 'title': title, + 'description': description, 'duration': duration, 'formats': formats, } diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index 1d8d57224..af689e2c2 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -53,7 +53,7 @@ class SteamIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20140329', 'title': 'FRONTIERS - Final Greenlight Trailer', - 'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205", + 'description': 'md5:6df4fe8dd494ae811869672b0767e025', 'uploader': 'AAD Productions', 'uploader_id': 'AtomicAgeDogGames', } diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 36331529e..25b9864ad 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor): 'thumbnail': 're:^http:.*\.jpg$', }, }, { - 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', - 'md5': '8aaa8bf3ae1ca2652309718c03019128', + 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', + 'md5': '66652566900963a3f962333579eeffcf', 'info_dict': { - 'id': '196', + 'id': '5964', 'ext': 'mp4', - 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', - 'description': 'md5:f22e4af75821d174fa6c977349682691', + 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', + 'description': 'md5:07bfc78c48eec3145ed4805299a1900a', 'thumbnail': 're:http://.*\.jpg', }, }] diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 4d9666c6b..2c2113b14 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', @@ -22,8 +22,8 @@ class TeacherTubeIE(InfoExtractor): 'info_dict': { 'id': '339997', 'ext': 'mp4', - 'title': 'Measures of dispersion from a frequency table_x264', - 'description': 'md5:a3e9853487185e9fcd7181a07164650b', + 'title': 'Measures of dispersion from a frequency table', + 'description': 'Measures of dispersion from a frequency table', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -33,7 +33,7 @@ class TeacherTubeIE(InfoExtractor): 'id': '340064', 'ext': 'mp4', 'title': 'How to Make Paper Dolls _ Paper Art Projects', - 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b', + 'description': 'Learn how to make paper dolls in this simple', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -43,7 +43,16 @@ class TeacherTubeIE(InfoExtractor): 'id': '8805', 'ext': 'mp3', 'title': 'PER ASPERA AD ASTRA', - 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA', + 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', + }, + }, { + 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', + 'md5': '9c79fbb2dd7154823996fc28d4a26998', + 'info_dict': { + 'id': '297790', + 'ext': 'mp4', + 'title': 'Intro Video - Schleicher', + 'description': 'Intro Video - Why to flip, how flipping will', }, }] @@ -53,9 +62,20 @@ class TeacherTubeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('title', webpage, 'title') + TITLE_SUFFIX = ' - TeacherTube' + if title.endswith(TITLE_SUFFIX): + title = title[:-len(TITLE_SUFFIX)].strip() + + description = self._html_search_meta('description', webpage, 'description') + if description: + description = description.strip() + quality = qualities(['mp3', 'flv', 'mp4']) - _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage)) + media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) + media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) + media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage)) formats = [ { @@ -68,28 +88,37 @@ class TeacherTubeIE(InfoExtractor): return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'title': title, + 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': description, } -class TeacherTubeClassroomIE(InfoExtractor): - IE_NAME = 'teachertube:classroom' - IE_DESC = 'teachertube.com online classrooms' +class TeacherTubeUserIE(InfoExtractor): + IE_NAME = 'teachertube:user:collection' + IE_DESC = 'teachertube.com user and collection videos' + + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)' + _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') - rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, - user_id, 'Downloading classroom RSS') + urls = [] + webpage = self._download_webpage(url, user_id) + urls.extend(re.findall(self._MEDIA_RE, webpage)) + + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] + for p in pages: + more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) + webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) entries = [] - for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): - entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) + for url in urls: + entries.append(self.url_result(url, 'TeacherTube')) return self.playlist_result(entries, user_id) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d260c91c2..bce32a873 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor): ''' _TESTS = [{ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - 'md5': '4ea1dada91e4174b53dac2bb8ace429d', + 'md5': 'fc94ac279feebbce69f21c0c6ee82810', 'info_dict': { 'id': '102', 'ext': 'mp4', diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 34008afc6..0f389bd93 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -1,10 +1,13 @@ +# -*- coding:utf-8 -*- +from __future__ import unicode_literals + from .common import InfoExtractor import re class ToypicsIE(InfoExtractor): IE_DESC = 'Toypics user profile' - _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' + _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' _TEST = { 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 'md5': '16e806ad6d6f58079d210fe30985e08b', @@ -61,7 +64,7 @@ class ToypicsUserIE(InfoExtractor): note='Downloading page %d/%d' % (n, page_count)) urls.extend( re.findall( - r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">', + r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">', lpage)) return { diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 544369068..2882c1809 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re @@ -10,14 +11,27 @@ from ..utils import ( class TumblrIE(InfoExtractor): _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)' - _TEST = { + _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', - 'file': '54196191430.mp4', 'md5': '479bb068e5b16462f5176a6828829767', 'info_dict': { - "title": "tatiana maslany news" + 'id': '54196191430', + 'ext': 'mp4', + 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', + 'description': 'md5:dfac39636969fe6bf1caa2d50405f069', + 'thumbnail': 're:http://.*\.jpg', } - } + }, { + 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', + 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359', + 'info_dict': { + 'id': '90208453769', + 'ext': 'mp4', + 'title': '5SOS STRUM ;)', + 'description': 'md5:dba62ac8639482759c8eb10ce474586a', + 'thumbnail': 're:http://.*\.jpg', + } + }] def _real_extract(self, url): m_url = re.match(self._VALID_URL, url) @@ -48,6 +62,7 @@ class TumblrIE(InfoExtractor): return [{'id': video_id, 'url': video_url, 'title': video_title, + 'description': self._html_search_meta('description', webpage), 'thumbnail': video_thumbnail, 'ext': ext }] diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index fb132aef6..a7953a7e7 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor): 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'uploader': 'newsy-videos', }, + 'skip': 'This video has been deleted.', }, ] diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index b5034b02f..a647807d0 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -4,7 +4,10 @@ import re import base64 from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + unified_strdate, + int_or_none, +) class VideoTtIE(InfoExtractor): @@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor): 'thumbnail': settings['config']['thumbnail'], 'upload_date': unified_strdate(video['added']), 'uploader': video['owner'], - 'view_count': int(video['view_count']), - 'comment_count': int(video['comment_count']), - 'like_count': int(video['liked']), - 'dislike_count': int(video['disliked']), + 'view_count': int_or_none(video['view_count']), + 'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']), + 'like_count': int_or_none(video['liked']), + 'dislike_count': int_or_none(video['disliked']), 'formats': formats, }
\ No newline at end of file diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index fb082f364..918bd1098 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ @@ -27,7 +27,7 @@ class VKIE(InfoExtractor): 'id': '162222515', 'ext': 'flv', 'title': 'ProtivoGunz - Хуёвая песня', - 'uploader': 'Noize MC', + 'uploader': 're:Noize MC.*', 'duration': 195, }, }, @@ -62,11 +62,47 @@ class VKIE(InfoExtractor): 'id': '164049491', 'ext': 'mp4', 'uploader': 'Триллеры', - 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', + 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'duration': 8352, }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', + 'md5': 'd82c22e449f036282d1d3f7f4d276869', + 'info_dict': { + 'id': '166094326', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': 'Запах женщины (1992)', + 'duration': 9392, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', + 'md5': '4d7a5ef8cf114dfa09577e57b2993202', + 'info_dict': { + 'id': '168067957', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': ' ', + 'duration': 7291, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', + 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', + 'note': 'ivi.ru embed', + 'info_dict': { + 'id': '60690', + 'ext': 'mp4', + 'title': 'Книга Илая', + 'duration': 6771, + }, + 'skip': 'Only works from Russia', + }, ] def _login(self): @@ -110,6 +146,16 @@ class VKIE(InfoExtractor): if m_yt is not None: self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) + if m_opts: + m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) + if m_opts_url: + opts_url = m_opts_url.group(1) + if opts_url.startswith('//'): + opts_url = 'http:' + opts_url + return self.url_result(opts_url) + data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data = json.loads(data_json) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..dfc570930 --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', webpage)) + + if fields['op'] == 'download1': + self._sleep(3, video_id) # they do detect when requests happen too fast! + post = compat_urllib_parse.urlencode(fields) + req = compat_urllib_request.Request(url, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + webpage = self._download_webpage( + req, video_id, 'Downloading video page') + + title = self._search_regex( + r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title') + thumbnail = self._search_regex( + r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') + url = self._search_regex( + r'file:\s*"(http[^\"]+)",', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': url, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index feeb44b45..f741ba540 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re @@ -54,14 +55,14 @@ class WDRIE(InfoExtractor): }, }, { - 'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', - 'md5': 'cfff440d4ee64114083ac44676df5d15', + 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', + 'md5': '24e83813e832badb0a8d7d1ef9ef0691', 'info_dict': { - 'id': 'mdb-363068', + 'id': 'mdb-463528', 'ext': 'mp3', - 'title': 'Grenzenlos lecker - Baklava', + 'title': 'Süpersong: Soul Bossa Nova', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140311', + 'upload_date': '20140630', }, }, ] @@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor): 'info_dict': { 'title': '4283021', 'id': '421735', + 'ext': 'mp4', 'age_limit': 0, }, - '_skip': 'Will be depublicized shortly' + 'skip': 'Problems with loading data.' } def _real_extract(self, url): @@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, + 'ext': determine_ext(url), 'user_agent': 'mobile', } diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index bc31c2e64..e6bfa9e14 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -5,14 +7,16 @@ from .common import InfoExtractor class WistiaIE(InfoExtractor): - _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' _TEST = { - u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt", - u"file": u"sh7fpupwlt.mov", - u"md5": u"cafeb56ec0c53c18c97405eecb3133df", - u"info_dict": { - u"title": u"cfh_resourceful_zdkh_final_1" + 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', + 'md5': 'cafeb56ec0c53c18c97405eecb3133df', + 'info_dict': { + 'id': 'sh7fpupwlt', + 'ext': 'mov', + 'title': 'Being Resourceful', + 'duration': 117, }, } @@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._html_search_regex( - r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data') + r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') data = json.loads(data_json) @@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor): 'title': data['name'], 'formats': formats, 'thumbnails': thumbnails, + 'duration': data.get('duration'), } diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py new file mode 100644 index 000000000..34dd6d952 --- /dev/null +++ b/youtube_dl/extractor/wrzuta.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, +) + + +class WrzutaIE(InfoExtractor): + IE_NAME = 'wrzuta.pl' + + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)' + + _TESTS = [{ + 'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game', + 'md5': '9e67e05bed7c03b82488d87233a9efe7', + 'info_dict': { + 'id': 'aq4hIZWrkBu', + 'ext': 'mp4', + 'title': 'Nike Football: The Last Game', + 'duration': 307, + 'uploader_id': 'laboratoriumdextera', + 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', + }, + }, { + 'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad', + 'md5': '1e546a18e1c22ac6e9adce17b8961ff5', + 'info_dict': { + 'id': '9oXJqdcndqv', + 'ext': 'ogg', + 'title': 'David Guetta & Showtek ft. Vassy - Bad', + 'duration': 270, + 'uploader_id': 'w729', + 'description': 'md5:4628f01c666bbaaecefa83476cfa794a', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + typ = mobj.group('typ') + uploader = mobj.group('uploader') + + webpage = self._download_webpage(url, video_id) + + quality = qualities(['SD', 'MQ', 'HQ', 'HD']) + + audio_table = {'flv': 'mp3', 'webm': 'ogg'} + + embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id) + + formats = [] + for media in embedpage['url']: + if typ == 'audio': + ext = audio_table[media['type'].split('@')[0]] + else: + ext = media['type'].split('@')[0] + + formats.append({ + 'format_id': '%s_%s' % (ext, media['quality'].lower()), + 'url': media['url'], + 'ext': ext, + 'quality': quality(media['quality']), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'duration': int_or_none(embedpage['duration']), + 'uploader_id': uploader, + 'description': self._og_search_description(webpage), + 'age_limit': embedpage.get('minimalAge', 0), + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7c50881c4..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -224,6 +224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, @@ -440,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'signature=([a-zA-Z]+)', jscode, + r'signature=([$a-zA-Z]+)', jscode, u'Initial JS player signature function name') jsi = JSInterpreter(jscode) @@ -864,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if player_url is not None: - if player_url.startswith(u'//'): - player_url = u'https:' + player_url - try: - player_id = (player_url, len(s)) - if player_id not in self._player_cache: - func = self._extract_signature_function( - video_id, player_url, len(s) - ) - self._player_cache[player_id] = func - func = self._player_cache[player_id] - if self._downloader.params.get('youtube_print_sig_code'): - self._print_sig_code(func, len(s)) - return func(s) - except Exception: - tb = traceback.format_exc() - self._downloader.report_warning( - u'Automatic signature extraction failed: ' + tb) - - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') - - return self._static_decrypt_signature( - s, video_id, player_url, age_gate) - - def _static_decrypt_signature(self, s, video_id, player_url, age_gate): - if age_gate: - # The videos with age protection use another player, so the - # algorithms can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - - if len(s) == 93: - return s[86:29:-1] + s[88] + s[28:5:-1] - elif len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 91: - return s[84:27:-1] + s[86] + s[26:5:-1] - elif len(s) == 90: - return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] - elif len(s) == 89: - return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] - elif len(s) == 88: - return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] - elif len(s) == 87: - return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] - elif len(s) == 86: - return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] - elif len(s) == 85: - return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] - elif len(s) == 84: - return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] - elif len(s) == 83: - return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] - elif len(s) == 82: - return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] - elif len(s) == 81: - return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - elif len(s) == 80: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] - elif len(s) == 79: - return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] + if player_url is None: + raise ExtractorError(u'Cannot decrypt signature without player_url') - else: - raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) + if player_url.startswith(u'//'): + player_url = u'https:' + player_url + try: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url, len(s) + ) + self._player_cache[player_id] = func + func = self._player_cache[player_id] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) + except Exception as e: + tb = traceback.format_exc() + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) def _get_available_subtitles(self, video_id, webpage): try: @@ -1386,13 +1342,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | p/ ) ( - (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} + (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* | - ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) + ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _MORE_PAGES_INDICATOR = r'data-link-type="next"' @@ -1697,14 +1653,14 @@ class YoutubeSearchURLIE(InfoExtractor): webpage = self._download_webpage(url, query) result_code = self._search_regex( - r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') + r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML') part_codes = re.findall( r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( - r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) + [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False) part_url_snippet = self._html_search_regex( r'(?s)href="([^"]+)"', part_code, 'item URL') part_url = compat_urlparse.urljoin( @@ -1824,10 +1780,21 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| + (?:https?://)?[^/]+/watch\?(?: + feature=[a-z_]+| + annotation_id=annotation_[^&]+ + )?$| (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ ''' + _TESTS = [{ + 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/watch?', + 'only_matching': True, + }] + def _real_extract(self, url): raise ExtractorError( u'Did you forget to quote the URL? Remember that & is a meta ' diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object): if member == 'split("")': return list(val) if member == 'join("")': - return u''.join(val) + return ''.join(val) if member == 'length': return len(val) if member == 'reverse()': @@ -99,7 +99,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - (r'(?:function %s|%s\s*=\s*function)' % ( + (r'(?:function %s|[{;]%s\s*=\s*function)' % ( re.escape(funcname), re.escape(funcname))) + r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', self.code) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b97e62ae9..09312e81a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -816,6 +816,9 @@ def unified_strdate(date_str): '%d %b %Y', '%B %d %Y', '%b %d %Y', + '%b %dst %Y %I:%M%p', + '%b %dnd %Y %I:%M%p', + '%b %dth %Y %I:%M%p', '%Y-%m-%d', '%d.%m.%Y', '%d/%m/%Y', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0c9dd6895..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.09' +__version__ = '2014.07.11' |