diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/canalplus.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/gamespot.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/kickstarter.py | 37 | ||||
-rw-r--r-- | youtube_dl/extractor/ustream.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 |
6 files changed, 64 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a7cddef73..06f9542d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .kankan import KankanIE +from .kickstarter import KickStarterIE from .keek import KeekIE from .liveleak import LiveLeakIE from .livestream import LivestreamIE diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1f02519a0..1db9b24cf 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import re import xml.etree.ElementTree @@ -5,24 +6,29 @@ from .common import InfoExtractor from ..utils import unified_strdate class CanalplusIE(InfoExtractor): - _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)' + _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' IE_NAME = u'canalplus.fr' _TEST = { - u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', - u'file': u'889861.flv', - u'md5': u'590a888158b5f0d6832f84001fbf3e99', + u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', + u'file': u'922470.flv', u'info_dict': { - u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', - u'upload_date': u'20130620', + u'title': u'Zapping - 26/08/13', + u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', + u'upload_date': u'20130826', + }, + u'params': { + u'skip_download': True, }, - u'skip': u'Requires rtmpdump' } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + if video_id is None: + webpage = self._download_webpage(url, mobj.group('path')) + video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') info_url = self._VIDEO_INFO_TEMPLATE % video_id info_page = self._download_webpage(info_url,video_id, u'Downloading video info') @@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor): 'ext': 'flv', 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), 'thumbnail': media.find('IMAGES/GRAND').text, + 'description': infos.find('DESCRIPTION').text, + 'view_count': int(infos.find('NB_VUES').text), } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 7585b7061..cd3bbe65f 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor): u"file": u"6410818.mp4", u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", u"info_dict": { - u"title": u"Arma III - Community Guide: SITREP I", + u"title": u"Arma 3 - Community Guide: SITREP I", u"upload_date": u"20130627", } } diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py new file mode 100644 index 000000000..50bc883ef --- /dev/null +++ b/youtube_dl/extractor/kickstarter.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class KickStarterIE(InfoExtractor): + _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*' + _TEST = { + u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", + u"file": u"1404461844.mp4", + u"md5": u"c81addca81327ffa66c642b5d8b08cab", + u"info_dict": { + u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + webpage_src = self._download_webpage(url, video_id) + + video_url = self._search_regex(r'data-video="(.*?)">', + webpage_src, u'video URL') + if 'mp4' in video_url: + ext = 'mp4' + else: + ext = 'flv' + video_title = self._html_search_regex(r"<title>(.*?)</title>", + webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() + + results = [{ + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': ext, + }] + return results diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 16cdcc765..f69b27d44 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,9 +1,11 @@ -from HTMLParser import HTMLParser import json import re -from urlparse import urljoin from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_html_parser, +) class UstreamIE(InfoExtractor): @@ -49,7 +51,7 @@ class UstreamIE(InfoExtractor): # More robust than regular expressions -class ChannelParser(HTMLParser): +class ChannelParser(compat_html_parser.HTMLParser): """ <meta name="ustream:channel_id" content="1234"> """ @@ -65,13 +67,13 @@ class ChannelParser(HTMLParser): if value.isdigit(): self.channel_id = value -class SocialstreamParser(HTMLParser): +class SocialstreamParser(compat_html_parser.HTMLParser): """ <li class="content123 video" data-content-id="123" data-length="1452" data-href="/recorded/123" data-og-url="/recorded/123"> """ def __init__(self): - HTMLParser.__init__(self) + compat_html_parser.HTMLParser.__init__(self) self.content_ids = [] def handle_starttag(self, tag, attrs): @@ -88,8 +90,6 @@ class UstreamChannelIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) slug = m.group('slug') - # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines, - # so if we got this far it's probably percent encoded and we needn't worry. p = ChannelParser() p.feed(self._download_webpage(url, slug)) @@ -100,16 +100,12 @@ class UstreamChannelIE(InfoExtractor): BASE = 'http://www.ustream.tv' next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id while next_url: - reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id)) + reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id)) p.feed(reply['data']) next_url = reply['nextUrl'] p.close() video_ids = p.content_ids - # From YoutubeChannelIE - - self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids))) - urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids] url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] - return [self.playlist_result(url_entries, channel_id)] + return self.playlist_result(url_entries, channel_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2e0d70eaf..f49665925 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] + return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: |