aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/canalplus.py22
-rw-r--r--youtube_dl/extractor/gamespot.py2
-rw-r--r--youtube_dl/extractor/kickstarter.py37
-rw-r--r--youtube_dl/extractor/ustream.py22
-rw-r--r--youtube_dl/extractor/youtube.py2
6 files changed, 64 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a7cddef73..06f9542d2 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
+from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 1f02519a0..1db9b24cf 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
import re
import xml.etree.ElementTree
@@ -5,24 +6,29 @@ from .common import InfoExtractor
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
+ _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr'
_TEST = {
- u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
- u'file': u'889861.flv',
- u'md5': u'590a888158b5f0d6832f84001fbf3e99',
+ u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
+ u'file': u'922470.flv',
u'info_dict': {
- u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
- u'upload_date': u'20130620',
+ u'title': u'Zapping - 26/08/13',
+ u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
+ u'upload_date': u'20130826',
+ },
+ u'params': {
+ u'skip_download': True,
},
- u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ if video_id is None:
+ webpage = self._download_webpage(url, mobj.group('path'))
+ video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id,
u'Downloading video info')
@@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):
'ext': 'flv',
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
'thumbnail': media.find('IMAGES/GRAND').text,
+ 'description': infos.find('DESCRIPTION').text,
+ 'view_count': int(infos.find('NB_VUES').text),
}
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 7585b7061..cd3bbe65f 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):
u"file": u"6410818.mp4",
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
u"info_dict": {
- u"title": u"Arma III - Community Guide: SITREP I",
+ u"title": u"Arma 3 - Community Guide: SITREP I",
u"upload_date": u"20130627",
}
}
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
new file mode 100644
index 000000000..50bc883ef
--- /dev/null
+++ b/youtube_dl/extractor/kickstarter.py
@@ -0,0 +1,37 @@
+import re
+
+from .common import InfoExtractor
+
+
+class KickStarterIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+ _TEST = {
+ u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
+ u"file": u"1404461844.mp4",
+ u"md5": u"c81addca81327ffa66c642b5d8b08cab",
+ u"info_dict": {
+ u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+ },
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+ webpage_src = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(r'data-video="(.*?)">',
+ webpage_src, u'video URL')
+ if 'mp4' in video_url:
+ ext = 'mp4'
+ else:
+ ext = 'flv'
+ video_title = self._html_search_regex(r"<title>(.*?)</title>",
+ webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
+
+ results = [{
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'ext': ext,
+ }]
+ return results
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 16cdcc765..f69b27d44 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -1,9 +1,11 @@
-from HTMLParser import HTMLParser
import json
import re
-from urlparse import urljoin
from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ compat_html_parser,
+)
class UstreamIE(InfoExtractor):
@@ -49,7 +51,7 @@ class UstreamIE(InfoExtractor):
# More robust than regular expressions
-class ChannelParser(HTMLParser):
+class ChannelParser(compat_html_parser.HTMLParser):
"""
<meta name="ustream:channel_id" content="1234">
"""
@@ -65,13 +67,13 @@ class ChannelParser(HTMLParser):
if value.isdigit():
self.channel_id = value
-class SocialstreamParser(HTMLParser):
+class SocialstreamParser(compat_html_parser.HTMLParser):
"""
<li class="content123 video" data-content-id="123" data-length="1452"
data-href="/recorded/123" data-og-url="/recorded/123">
"""
def __init__(self):
- HTMLParser.__init__(self)
+ compat_html_parser.HTMLParser.__init__(self)
self.content_ids = []
def handle_starttag(self, tag, attrs):
@@ -88,8 +90,6 @@ class UstreamChannelIE(InfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
slug = m.group('slug')
- # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines,
- # so if we got this far it's probably percent encoded and we needn't worry.
p = ChannelParser()
p.feed(self._download_webpage(url, slug))
@@ -100,16 +100,12 @@ class UstreamChannelIE(InfoExtractor):
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
while next_url:
- reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id))
+ reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
p.feed(reply['data'])
next_url = reply['nextUrl']
p.close()
video_ids = p.content_ids
- # From YoutubeChannelIE
-
- self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
- return [self.playlist_result(url_entries, channel_id)]
+ return self.playlist_result(url_entries, channel_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2e0d70eaf..f49665925 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82:
- return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
+ return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80: