diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/audiomack.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/bbccouk.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/foxgay.py | 48 | ||||
-rw-r--r-- | youtube_dl/extractor/myspace.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/myvidster.py | 29 | ||||
-rw-r--r-- | youtube_dl/extractor/udemy.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 13 |
9 files changed, 94 insertions, 15 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8b513ffd1..647352b59 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -121,6 +121,7 @@ from .fktv import ( from .flickr import FlickrIE from .folketinget import FolketingetIE from .fourtube import FourTubeIE +from .foxgay import FoxgayIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( @@ -245,6 +246,7 @@ from .muzu import MuzuTVIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvideo import MyVideoIE +from .myvidster import MyVidsterIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 9ce4982d9..622b20989 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -26,14 +26,13 @@ class AudiomackIE(InfoExtractor): { 'add_ie': ['Soundcloud'], 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', - 'info_dict': - { + 'info_dict': { 'id': '172419696', 'ext': 'mp3', 'description': 'md5:1fc3272ed7a635cce5be1568c2822997', 'title': 'Young Thug ft Lil Wayne - Take Kare', - 'uploader':'Young Thug World', - 'upload_date':'20141016', + 'uploader': 'Young Thug World', + 'upload_date': '20141016', } }, ] diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index beb6cfc8a..01c02d360 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import re import xml.etree.ElementTree from .subtitles import SubtitlesInfoExtractor diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e80a2dad0..5b5e33cea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -819,7 +819,8 @@ class InfoExtractor(object): return res def _set_cookie(self, domain, name, value, expire_time=None): - cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None, + cookie = compat_cookiejar.Cookie( + 0, name, value, None, None, domain, None, None, '/', True, False, expire_time, '', None, None, None) self._downloader.cookiejar.set_cookie(cookie) diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py new file mode 100644 index 000000000..08b8ea362 --- /dev/null +++ b/youtube_dl/extractor/foxgay.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class FoxgayIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' + _TEST = { + 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', + 'md5': '80d72beab5d04e1655a56ad37afe6841', + 'info_dict': { + 'id': '2582', + 'ext': 'mp4', + 'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', + 'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', + 'age_limit': 18, + 'thumbnail': 're:https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<title>(?P<title>.*?)</title>', + webpage, 'title', fatal=False) + description = self._html_search_regex( + r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>', + webpage, 'description', fatal=False) + + # Find the URL for the iFrame which contains the actual video. + iframe = self._download_webpage( + self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'), + video_id) + video_url = self._html_search_regex( + r"v_path = '(?P<vid>http://.*?)'", iframe, 'url') + thumb_url = self._html_search_regex( + r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'description': description, + 'thumbnail': thumb_url, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index e62614670..83414a232 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -88,6 +88,7 @@ class MySpaceIE(InfoExtractor): self.report_warning( '%s: No downloadable song on this page' % video_id) return + def search_data(name): return self._search_regex( r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py new file mode 100644 index 000000000..a94ab8358 --- /dev/null +++ b/youtube_dl/extractor/myvidster.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MyVidsterIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' + + _TEST = { + 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', + 'md5': '95296d0231c1363222c3441af62dc4ca', + 'info_dict': { + 'id': '3685814', + 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', + 'upload_date': '20141027', + 'uploader_id': 'utkualp', + 'ext': 'mp4', + 'age_limit': 18, + }, + 'add_ie': ['XHamster'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return self.url_result(self._html_search_regex( + r'rel="videolink" href="(?P<real_url>.*)">', + webpage, 'real video url')) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 0e4d386a8..5271611ac 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -97,11 +97,8 @@ class UdemyIE(InfoExtractor): if 'returnUrl' not in response: raise ExtractorError('Unable to log in') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lecture_id = mobj.group('id') + lecture_id = self._match_id(url) lecture = self._download_json( 'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1cba40387..f452a90d8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -44,9 +44,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = False def _set_language(self): - self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en', + self._set_cookie( + '.youtube.com', 'PREF', 'f1=50000000&hl=en', # YouTube sets the expire time to about two months - expire_time=time.time() + 60*24*3600) + expire_time=time.time() + 2 * 30 * 24 * 3600) def _login(self): """ @@ -722,9 +723,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # We fallback to the get_video_info pages (used by the embed page) self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - video_info_webpage = self._download_webpage(video_info_url, + video_info_url = ( + '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (proto, video_id, el_type)) + video_info_webpage = self._download_webpage( + video_info_url, video_id, note=False, errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) |