aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py10
-rw-r--r--youtube_dl/extractor/common.py19
-rw-r--r--youtube_dl/extractor/fc2.py60
-rw-r--r--youtube_dl/extractor/mixcloud.py52
-rw-r--r--youtube_dl/extractor/nuvid.py48
-rw-r--r--youtube_dl/extractor/slutload.py47
-rw-r--r--youtube_dl/extractor/videott.py58
-rw-r--r--youtube_dl/extractor/vine.py31
-rw-r--r--youtube_dl/extractor/wdr.py28
9 files changed, 335 insertions, 18 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 228f62b40..3b10fc819 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -79,6 +79,7 @@ from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
+from .fc2 import FC2IE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
@@ -195,6 +196,7 @@ from .nowvideo import NowVideoIE
from .nrk import NRKIE
from .ntv import NTVIE
from .nytimes import NYTimesIE
+from .nuvid import NuvidIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE
from .orf import ORFIE
@@ -231,6 +233,7 @@ from .scivee import SciVeeIE
from .servingsys import ServingSysIE
from .sina import SinaIE
from .slideshare import SlideshareIE
+from .slutload import SlutloadIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
@@ -296,6 +299,7 @@ from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
+from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vimeo import (
VimeoIE,
@@ -306,7 +310,10 @@ from .vimeo import (
VimeoReviewIE,
VimeoWatchLaterIE,
)
-from .vine import VineIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
@@ -315,6 +322,7 @@ from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wdr import (
WDRIE,
+ WDRMobileIE,
WDRMausIE,
)
from .weibo import WeiboIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 10b0cbe69..11b31db88 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -242,10 +242,11 @@ class InfoExtractor(object):
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
- if len(url) > 200:
- h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
- url = url[:200 - len(h)] + h
- raw_filename = ('%s_%s.dump' % (video_id, url))
+ basen = video_id + '_' + url
+ if len(basen) > 240:
+ h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:240 - len(h)] + h
+ raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen(u'Saving request to ' + filename)
with open(filename, 'wb') as outf:
@@ -555,6 +556,16 @@ class InfoExtractor(object):
if self._downloader.params.get('prefer_insecure', False)
else 'https:')
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
new file mode 100644
index 000000000..ca8993241
--- /dev/null
+++ b/youtube_dl/extractor/fc2.py
@@ -0,0 +1,60 @@
+#! -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_request,
+ compat_urlparse,
+)
+
+
+class FC2IE(InfoExtractor):
+ _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+ IE_NAME = 'fc2'
+ _TEST = {
+ 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
+ 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
+ 'info_dict': {
+ 'id': '20121103kUan1KHs',
+ 'ext': 'flv',
+ 'title': 'Boxing again with Puff',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ self._downloader.cookiejar.clear_session_cookies() # must clear
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ refer = url.replace('/content/', '/a/content/')
+
+ mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+
+ info_url = (
+ "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
+ format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
+
+ info_webpage = self._download_webpage(
+ info_url, video_id, note='Downloading info page')
+ info = compat_urlparse.parse_qs(info_webpage)
+
+ if 'err_code' in info:
+ raise ExtractorError('Error code: %s' % info['err_code'][0])
+
+ video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
+
+ return {
+ 'id': video_id,
+ 'title': info['title'][0],
+ 'url': video_url,
+ 'ext': 'flv',
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index c4bd53fe7..5f64e7bd0 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -4,9 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
compat_urllib_parse,
ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
@@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
'upload_date': '20111115',
+ 'timestamp': 1321359578,
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
},
}
@@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):
webpage = self._download_webpage(url, track_id)
- api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
- info = self._download_json(
- api_url, track_id, 'Downloading cloudcast info')
-
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
@@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
final_song_url = self._get_url(template_url)
if final_song_url is None:
- raise ExtractorError(u'Unable to extract track url')
+ raise ExtractorError('Unable to extract track url')
+
+ PREFIX = (
+ r'<div class="cloudcast-play-button-container"'
+ r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
+ title = self._html_search_regex(
+ PREFIX + r'm-title="([^"]+)"', webpage, 'title')
+ thumbnail = self._proto_relative_url(self._html_search_regex(
+ PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
+ fatal=False))
+ uploader = self._html_search_regex(
+ PREFIX + r'm-owner-name="([^"]+)"',
+ webpage, 'uploader', fatal=False)
+ uploader_id = self._search_regex(
+ r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
+ description = self._og_search_description(webpage)
+ like_count = int_or_none(self._search_regex(
+ r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
+ webpage, 'like count', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+ webpage, 'play count', fatal=False))
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time itemprop="dateCreated" datetime="([^"]+)">',
+ webpage, 'upload date'))
return {
'id': track_id,
- 'title': info['name'],
+ 'title': title,
'url': final_song_url,
- 'description': info.get('description'),
- 'thumbnail': info['pictures'].get('extra_large'),
- 'uploader': info['user']['name'],
- 'uploader_id': info['user']['username'],
- 'upload_date': unified_strdate(info['created_time']),
- 'view_count': info['play_count'],
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': like_count,
}
diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py
new file mode 100644
index 000000000..f0befa116
--- /dev/null
+++ b/youtube_dl/extractor/nuvid.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class NuvidIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://m.nuvid.com/video/1310741/',
+ 'md5': 'eab207b7ac4fccfb4e23c86201f11277',
+ 'info_dict': {
+ 'id': '1310741',
+ 'ext': 'mp4',
+ "title": "Horny babes show their awesome bodeis and",
+ "age_limit": 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ murl = url.replace('://www.', '://m.')
+ webpage = self._download_webpage(murl, video_id)
+
+ title = self._html_search_regex(
+ r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
+ webpage, 'title').strip()
+
+ url_end = self._html_search_regex(
+ r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"',
+ webpage, 'video_url')
+ video_url = 'http://m.nuvid.com' + url_end
+
+ thumbnail = self._html_search_regex(
+ r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
+ webpage, 'thumbnail URL', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
new file mode 100644
index 000000000..ecc0abfda
--- /dev/null
+++ b/youtube_dl/extractor/slutload.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+)
+
+
+class SlutloadIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
+ _TEST = {
+ 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
+ 'md5': '0cf531ae8006b530bd9df947a6a0df77',
+ 'info_dict': {
+ 'id': 'TD73btpBqSxc',
+ 'ext': 'mp4',
+ "title": "virginie baisee en cam",
+ "age_limit": 18,
+ 'thumbnail': 're:https?://.*?\.jpg'
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
+ webpage, 'title').strip()
+
+ video_url = self._html_search_regex(
+ r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
+ webpage, 'video URL')
+ thumbnail = self._html_search_regex(
+ r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18
+ }
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py
new file mode 100644
index 000000000..b5034b02f
--- /dev/null
+++ b/youtube_dl/extractor/videott.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class VideoTtIE(InfoExtractor):
+ ID_NAME = 'video.tt'
+ IE_DESC = 'video.tt - Your True Tube'
+ _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
+
+ _TEST = {
+ 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
+ 'md5': 'b13aa9e2f267effb5d1094443dff65ba',
+ 'info_dict': {
+ 'id': 'amd5YujV8',
+ 'ext': 'flv',
+ 'title': 'Motivational video Change your mind in just 2.50 mins',
+ 'description': '',
+ 'upload_date': '20130827',
+ 'uploader': 'joseph313',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ settings = self._download_json(
+ 'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id,
+ 'Downloading video JSON')['settings']
+
+ video = settings['video_details']['video']
+
+ formats = [
+ {
+ 'url': base64.b64decode(res['u']).decode('utf-8'),
+ 'ext': 'flv',
+ 'format_id': res['l'],
+ } for res in settings['res'] if res['u']
+ ]
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video['description'],
+ 'thumbnail': settings['config']['thumbnail'],
+ 'upload_date': unified_strdate(video['added']),
+ 'uploader': video['owner'],
+ 'view_count': int(video['view_count']),
+ 'comment_count': int(video['comment_count']),
+ 'like_count': int(video['liked']),
+ 'dislike_count': int(video['disliked']),
+ 'formats': formats,
+ } \ No newline at end of file
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 5136ec466..076c87119 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
import re
import json
+import itertools
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -58,3 +59,33 @@ class VineIE(InfoExtractor):
'repost_count': data['reposts']['count'],
'formats': formats,
}
+
+
+class VineUserIE(InfoExtractor):
+ IE_NAME = 'vine:user'
+ _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
+ _VINE_BASE_URL = "https://vine.co/"
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user = mobj.group('user')
+
+ profile_url = "%sapi/users/profiles/vanity/%s" % (
+ self._VINE_BASE_URL, user)
+ profile_data = self._download_json(
+ profile_url, user, note='Downloading user profile data')
+
+ user_id = profile_data['data']['userId']
+ timeline_data = []
+ for pagenum in itertools.count(1):
+ timeline_url = "%sapi/timelines/users/%s?page=%s" % (
+ self._VINE_BASE_URL, user_id, pagenum)
+ timeline_page = self._download_json(
+ timeline_url, user, note='Downloading page %d' % pagenum)
+ timeline_data.extend(timeline_page['data']['records'])
+ if timeline_page['data']['nextPage'] is None:
+ break
+
+ entries = [
+ self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
+ return self.playlist_result(entries, user)
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 63691aa67..feeb44b45 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -115,6 +115,34 @@ class WDRIE(InfoExtractor):
}
+class WDRMobileIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://mobile-ondemand\.wdr\.de/
+ .*?/fsk(?P<age_limit>[0-9]+)
+ /[0-9]+/[0-9]+/
+ (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
+ IE_NAME = 'wdr:mobile'
+ _TEST = {
+ 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
+ 'info_dict': {
+ 'title': '4283021',
+ 'id': '421735',
+ 'age_limit': 0,
+ },
+ '_skip': 'Will be depublicized shortly'
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ return {
+ 'id': mobj.group('id'),
+ 'title': mobj.group('title'),
+ 'age_limit': int(mobj.group('age_limit')),
+ 'url': url,
+ 'user_agent': 'mobile',
+ }
+
+
class WDRMausIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
IE_DESC = 'Sendung mit der Maus'