diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/anysex.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/beeg.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/comedycentral.py | 36 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/drtuber.py | 50 | ||||
-rw-r--r-- | youtube_dl/extractor/eporner.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/hornbunny.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/npo.py | 35 | ||||
-rw-r--r-- | youtube_dl/extractor/sunporno.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 22 | ||||
-rw-r--r-- | youtube_dl/utils.py | 4 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
14 files changed, 167 insertions, 46 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index b15695053..bf616e3b6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -876,7 +876,7 @@ def _real_main(argv=None): ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') else: if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): - ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') + ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir) retcode = 141 else: ydl.to_screen( diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dede0cde5..6c7668fe2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -70,6 +70,7 @@ from .daum import DaumIE from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .drtuber import DrTuberIE from .drtv import DRTVIE from .dump import DumpIE from .defense import DefenseGouvFrIE diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py index adeacba01..bc64423a3 100644 --- a/youtube_dl/extractor/anysex.py +++ b/youtube_dl/extractor/anysex.py @@ -21,6 +21,7 @@ class AnySexIE(InfoExtractor): 'description': 'md5:de9e418178e2931c10b62966474e1383', 'categories': ['Erotic'], 'duration': 270, + 'age_limit': 18, } } @@ -43,7 +44,6 @@ class AnySexIE(InfoExtractor): duration = parse_duration(self._search_regex( r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._html_search_regex( r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False)) @@ -57,4 +57,5 @@ class AnySexIE(InfoExtractor): 'categories': categories, 'duration': duration, 'view_count': view_count, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c2692cfdc..d7301fe18 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -17,6 +17,7 @@ class BeegIE(InfoExtractor): 'description': 'md5:6db3c6177972822aaba18652ff59c773', 'categories': list, # NSFW 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, } } @@ -41,7 +42,9 @@ class BeegIE(InfoExtractor): categories_str = self._html_search_regex( r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) - categories = categories_str.split(',') + categories = ( + None if categories_str is None + else categories_str.split(',')) return { 'id': video_id, @@ -50,4 +53,5 @@ class BeegIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'categories': categories, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index c81ce5a96..035046120 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor): (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) )| (?P<interview> extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) (?:[?#].*|$)''' - _TEST = { + _TESTS = [{ 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'info_dict': { @@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor): 'uploader': 'thedailyshow', 'title': 'thedailyshow kristen-stewart part 1', } - } + }, { + 'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', + 'only_matching': True, + }, { + 'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', + 'only_matching': True, + }, { + 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', + 'only_matching': True, + }] _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor): 'ext': self._video_extensions.get(format, 'mp4'), 'height': h, 'width': w, + + 'format_note': 'HTTP 400 at the moment (patches welcome!)', + 'preference': -100, }) formats.append({ 'format_id': 'rtmp-%s' % format, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8453321c5..929dd1e97 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -677,9 +677,12 @@ class InfoExtractor(object): } codecs = last_info.get('CODECS') if codecs: - video, audio = codecs.split(',') - f['vcodec'] = video.partition('.')[0] - f['acodec'] = audio.partition('.')[0] + # TODO: looks like video codec is not always necessarily goes first + va_codecs = codecs.split(',') + if va_codecs[0]: + f['vcodec'] = va_codecs[0].partition('.')[0] + if len(va_codecs) > 1 and va_codecs[1]: + f['acodec'] = va_codecs[1].partition('.')[0] resolution = last_info.get('RESOLUTION') if resolution: width_str, height_str = resolution.split('x') diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py new file mode 100644 index 000000000..9a13925b4 --- /dev/null +++ b/youtube_dl/extractor/drtuber.py @@ -0,0 +1,50 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DrTuberIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)' + _TEST = { + 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', + 'md5': '93e680cf2536ad0dfb7e74d94a89facd', + 'info_dict': { + 'id': '1740434', + 'ext': 'mp4', + 'title': 'Hot Perky Blonde Naked Golf', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'<source src="([^"]+)"', webpage, 'video URL') + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*Free', webpage, 'title') + + thumbnail = self._html_search_regex( + r'poster="([^"]+)"', + webpage, 'thumbnail', fatal=False) + + cats_str = self._html_search_regex( + r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) + categories = None if cats_str is None else cats_str.split(' ') + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'categories': categories, + 'age_limit': self._rta_search(webpage), + } diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 4c2c074cb..6926fcda3 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -21,6 +21,7 @@ class EpornerIE(InfoExtractor): 'title': 'Infamous Tiffany Teen Strip Tease Video', 'duration': 194, 'view_count': int, + 'age_limit': 18, } } @@ -35,9 +36,10 @@ class EpornerIE(InfoExtractor): r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id, webpage, 'redirect_code') redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code) - webpage2 = self._download_webpage(redirect_url, video_id) + player_code = self._download_webpage( + redirect_url, video_id, note='Downloading player config') video_url = self._html_search_regex( - r'file: "(.*?)",', webpage2, 'video_url') + r'file: "(.*?)",', player_code, 'video_url') duration = parse_duration(self._search_regex( r'class="mbtim">([0-9:]+)</div>', webpage, 'duration', @@ -52,4 +54,5 @@ class EpornerIE(InfoExtractor): 'title': title, 'duration': duration, 'view_count': view_count, + 'age_limit': self._rta_search(webpage), } diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py index a42fba0cb..7e7714438 100644 --- a/youtube_dl/extractor/hornbunny.py +++ b/youtube_dl/extractor/hornbunny.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + parse_duration, +) + class HornBunnyIE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html' @@ -15,7 +19,8 @@ class HornBunnyIE(InfoExtractor): 'id': '5227', 'ext': 'flv', 'title': 'panty slut jerk off instruction', - 'duration': 550 + 'duration': 550, + 'age_limit': 18, } } @@ -23,16 +28,22 @@ class HornBunnyIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title') - redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title') + webpage = self._download_webpage( + url, video_id, note='Downloading initial webpage') + title = self._html_search_regex( + r'class="title">(.*?)</h2>', webpage, 'title') + redirect_url = self._html_search_regex( + r'pg&settings=(.*?)\|0"\);', webpage, 'title') webpage2 = self._download_webpage(redirect_url, video_id) - video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url') + video_url = self._html_search_regex( + r'flvMask:(.*?);', webpage2, 'video_url') - mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage) - duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None - - view_count = self._html_search_regex(r'<strong>Views:</strong> (\d+)</div>', webpage, 'view count', fatal=False) + duration = parse_duration(self._search_regex( + r'<strong>Runtime:</strong>\s*([0-9:]+)</div>', + webpage, 'duration', fatal=False)) + view_count = int_or_none(self._search_regex( + r'<strong>Views:</strong>\s*(\d+)</div>', + webpage, 'view count', fatal=False)) return { 'id': video_id, @@ -40,5 +51,6 @@ class HornBunnyIE(InfoExtractor): 'title': title, 'ext': 'flv', 'duration': duration, - 'view_count': int_or_none(view_count), + 'view_count': view_count, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 12e85a716..902d62944 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( unified_strdate, + qualities, ) @@ -17,7 +18,7 @@ class NPOIE(InfoExtractor): 'md5': '4b3f9c429157ec4775f2c9cb7b911016', 'info_dict': { 'id': 'VPWON_1220719', - 'ext': 'mp4', + 'ext': 'm4v', 'title': 'Nieuwsuur', 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', 'upload_date': '20140622', @@ -39,24 +40,32 @@ class NPOIE(InfoExtractor): video_id, note='Downloading token' ) - token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') - streams_info = self._download_json( - 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), - video_id - ) + token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') - stream_info = self._download_json( - streams_info['streams'][0] + '&type=json', - video_id, - 'Downloading stream info' - ) + formats = [] + quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std']) + for format_id in metadata['pubopties']: + streams_info = self._download_json( + 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), + video_id, 'Downloading %s streams info' % format_id) + stream_info = self._download_json( + streams_info['streams'][0] + '&type=json', + video_id, 'Downloading %s stream info' % format_id) + if format_id == 'adaptive': + formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id)) + else: + formats.append({ + 'url': stream_info['url'], + 'format_id': format_id, + 'quality': quality(format_id), + }) + self._sort_formats(formats) return { 'id': video_id, 'title': metadata['titel'], - 'ext': 'mp4', - 'url': stream_info['url'], 'description': metadata['info'], 'thumbnail': metadata['images'][-1]['url'], 'upload_date': unified_strdate(metadata['gidsdatum']), + 'formats': formats, } diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index c7a46eb71..7de3c9dd5 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -23,6 +23,7 @@ class SunPornoIE(InfoExtractor): 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 302, + 'age_limit': 18, } } @@ -65,4 +66,5 @@ class SunPornoIE(InfoExtractor): 'view_count': view_count, 'comment_count': comment_count, 'formats': formats, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78f3b7e7b..08a04737c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1055,21 +1055,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): self._login() def _ids_to_results(self, ids): - return [self.url_result(vid_id, 'Youtube', video_id=vid_id) - for vid_id in ids] + return [ + self.url_result(vid_id, 'Youtube', video_id=vid_id) + for vid_id in ids] def _extract_mix(self, playlist_id): # The mixes are generated from a a single video # the id of the playlist is just 'RD' + video_id url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) - webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') + webpage = self._download_webpage( + url, playlist_id, u'Downloading Youtube mix') search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) - title_span = (search_title('playlist-title') or - search_title('title long-title') or search_title('title')) + title_span = ( + search_title('playlist-title') or + search_title('title long-title') or + search_title('title')) title = clean_html(title_span) - video_re = r'''(?x)data-video-username=".*?".*? - href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) - ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) + ids = orderedSet(re.findall( + r'''(?xs)data-video-username=".*?".*? + href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), + webpage)) url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) @@ -1162,6 +1167,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): msg = u'Downloading Youtube mix' if i > 0: msg += ', retry #%d' % i + webpage = self._download_webpage(url, title, msg) ids = orderedSet(re.findall(video_re, webpage)) if ids: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8a36e619a..6fe057234 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1141,10 +1141,10 @@ else: import fcntl def _lock_file(f, exclusive): - fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) def _unlock_file(f): - fcntl.lockf(f, fcntl.LOCK_UN) + fcntl.flock(f, fcntl.LOCK_UN) class locked_file(object): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 59f6b4736..c9005afe1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.01.1' +__version__ = '2014.09.01.2' |