aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_all_urls.py26
-rw-r--r--youtube_dl/__init__.py2
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/anysex.py3
-rw-r--r--youtube_dl/extractor/beeg.py6
-rw-r--r--youtube_dl/extractor/comedycentral.py36
-rw-r--r--youtube_dl/extractor/common.py9
-rw-r--r--youtube_dl/extractor/drtuber.py50
-rw-r--r--youtube_dl/extractor/eporner.py7
-rw-r--r--youtube_dl/extractor/hornbunny.py34
-rw-r--r--youtube_dl/extractor/npo.py35
-rw-r--r--youtube_dl/extractor/sunporno.py2
-rw-r--r--youtube_dl/extractor/youtube.py22
-rw-r--r--youtube_dl/utils.py4
-rw-r--r--youtube_dl/version.py2
15 files changed, 167 insertions, 72 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 5f1092939..84b05da39 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -143,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
- def test_ComedyCentralShows(self):
- self.assertMatch(
- 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
- ['ComedyCentralShows'])
- self.assertMatch(
- 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
- ['ComedyCentralShows'])
-
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
self.assertMatch(
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index b15695053..bf616e3b6 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -876,7 +876,7 @@ def _real_main(argv=None):
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
else:
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
- ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
+ ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)
retcode = 141
else:
ydl.to_screen(
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index dede0cde5..6c7668fe2 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -70,6 +70,7 @@ from .daum import DaumIE
from .dfb import DFBIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
+from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dump import DumpIE
from .defense import DefenseGouvFrIE
diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py
index adeacba01..bc64423a3 100644
--- a/youtube_dl/extractor/anysex.py
+++ b/youtube_dl/extractor/anysex.py
@@ -21,6 +21,7 @@ class AnySexIE(InfoExtractor):
'description': 'md5:de9e418178e2931c10b62966474e1383',
'categories': ['Erotic'],
'duration': 270,
+ 'age_limit': 18,
}
}
@@ -43,7 +44,6 @@ class AnySexIE(InfoExtractor):
duration = parse_duration(self._search_regex(
r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
-
view_count = int_or_none(self._html_search_regex(
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
@@ -57,4 +57,5 @@ class AnySexIE(InfoExtractor):
'categories': categories,
'duration': duration,
'view_count': view_count,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index c2692cfdc..d7301fe18 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -17,6 +17,7 @@ class BeegIE(InfoExtractor):
'description': 'md5:6db3c6177972822aaba18652ff59c773',
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
}
}
@@ -41,7 +42,9 @@ class BeegIE(InfoExtractor):
categories_str = self._html_search_regex(
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
- categories = categories_str.split(',')
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
return {
'id': video_id,
@@ -50,4 +53,5 @@ class BeegIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'categories': categories,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index c81ce5a96..035046120 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor):
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip>
- (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
+ (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)|
(?P<interview>
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
(?:[?#].*|$)'''
- _TEST = {
+ _TESTS = [{
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
'info_dict': {
@@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor):
'uploader': 'thedailyshow',
'title': 'thedailyshow kristen-stewart part 1',
}
- }
+ }, {
+ 'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
+ 'only_matching': True,
+ }]
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor):
'ext': self._video_extensions.get(format, 'mp4'),
'height': h,
'width': w,
+
+ 'format_note': 'HTTP 400 at the moment (patches welcome!)',
+ 'preference': -100,
})
formats.append({
'format_id': 'rtmp-%s' % format,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8453321c5..929dd1e97 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -677,9 +677,12 @@ class InfoExtractor(object):
}
codecs = last_info.get('CODECS')
if codecs:
- video, audio = codecs.split(',')
- f['vcodec'] = video.partition('.')[0]
- f['acodec'] = audio.partition('.')[0]
+ # TODO: looks like video codec is not always necessarily goes first
+ va_codecs = codecs.split(',')
+ if va_codecs[0]:
+ f['vcodec'] = va_codecs[0].partition('.')[0]
+ if len(va_codecs) > 1 and va_codecs[1]:
+ f['acodec'] = va_codecs[1].partition('.')[0]
resolution = last_info.get('RESOLUTION')
if resolution:
width_str, height_str = resolution.split('x')
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
new file mode 100644
index 000000000..9a13925b4
--- /dev/null
+++ b/youtube_dl/extractor/drtuber.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DrTuberIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
+ _TEST = {
+ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
+ 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
+ 'info_dict': {
+ 'id': '1740434',
+ 'ext': 'mp4',
+ 'title': 'Hot Perky Blonde Naked Golf',
+ 'categories': list, # NSFW
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+
+ thumbnail = self._html_search_regex(
+ r'poster="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ cats_str = self._html_search_regex(
+ r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
+ categories = None if cats_str is None else cats_str.split(' ')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'age_limit': self._rta_search(webpage),
+ }
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py
index 4c2c074cb..6926fcda3 100644
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@@ -21,6 +21,7 @@ class EpornerIE(InfoExtractor):
'title': 'Infamous Tiffany Teen Strip Tease Video',
'duration': 194,
'view_count': int,
+ 'age_limit': 18,
}
}
@@ -35,9 +36,10 @@ class EpornerIE(InfoExtractor):
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
webpage, 'redirect_code')
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
- webpage2 = self._download_webpage(redirect_url, video_id)
+ player_code = self._download_webpage(
+ redirect_url, video_id, note='Downloading player config')
video_url = self._html_search_regex(
- r'file: "(.*?)",', webpage2, 'video_url')
+ r'file: "(.*?)",', player_code, 'video_url')
duration = parse_duration(self._search_regex(
r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
@@ -52,4 +54,5 @@ class EpornerIE(InfoExtractor):
'title': title,
'duration': duration,
'view_count': view_count,
+ 'age_limit': self._rta_search(webpage),
}
diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py
index a42fba0cb..7e7714438 100644
--- a/youtube_dl/extractor/hornbunny.py
+++ b/youtube_dl/extractor/hornbunny.py
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
class HornBunnyIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
@@ -15,7 +19,8 @@ class HornBunnyIE(InfoExtractor):
'id': '5227',
'ext': 'flv',
'title': 'panty slut jerk off instruction',
- 'duration': 550
+ 'duration': 550,
+ 'age_limit': 18,
}
}
@@ -23,16 +28,22 @@ class HornBunnyIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title')
- redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title')
+ webpage = self._download_webpage(
+ url, video_id, note='Downloading initial webpage')
+ title = self._html_search_regex(
+ r'class="title">(.*?)</h2>', webpage, 'title')
+ redirect_url = self._html_search_regex(
+ r'pg&settings=(.*?)\|0"\);', webpage, 'title')
webpage2 = self._download_webpage(redirect_url, video_id)
- video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url')
+ video_url = self._html_search_regex(
+ r'flvMask:(.*?);', webpage2, 'video_url')
- mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage)
- duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
-
- view_count = self._html_search_regex(r'<strong>Views:</strong> (\d+)</div>', webpage, 'view count', fatal=False)
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
+ webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'<strong>Views:</strong>\s*(\d+)</div>',
+ webpage, 'view count', fatal=False))
return {
'id': video_id,
@@ -40,5 +51,6 @@ class HornBunnyIE(InfoExtractor):
'title': title,
'ext': 'flv',
'duration': duration,
- 'view_count': int_or_none(view_count),
+ 'view_count': view_count,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 12e85a716..902d62944 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
+ qualities,
)
@@ -17,7 +18,7 @@ class NPOIE(InfoExtractor):
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
'info_dict': {
'id': 'VPWON_1220719',
- 'ext': 'mp4',
+ 'ext': 'm4v',
'title': 'Nieuwsuur',
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
'upload_date': '20140622',
@@ -39,24 +40,32 @@ class NPOIE(InfoExtractor):
video_id,
note='Downloading token'
)
- token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
- streams_info = self._download_json(
- 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
- video_id
- )
+ token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
- stream_info = self._download_json(
- streams_info['streams'][0] + '&type=json',
- video_id,
- 'Downloading stream info'
- )
+ formats = []
+ quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
+ for format_id in metadata['pubopties']:
+ streams_info = self._download_json(
+ 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
+ video_id, 'Downloading %s streams info' % format_id)
+ stream_info = self._download_json(
+ streams_info['streams'][0] + '&type=json',
+ video_id, 'Downloading %s stream info' % format_id)
+ if format_id == 'adaptive':
+ formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
+ else:
+ formats.append({
+ 'url': stream_info['url'],
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
return {
'id': video_id,
'title': metadata['titel'],
- 'ext': 'mp4',
- 'url': stream_info['url'],
'description': metadata['info'],
'thumbnail': metadata['images'][-1]['url'],
'upload_date': unified_strdate(metadata['gidsdatum']),
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index c7a46eb71..7de3c9dd5 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -23,6 +23,7 @@ class SunPornoIE(InfoExtractor):
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 302,
+ 'age_limit': 18,
}
}
@@ -65,4 +66,5 @@ class SunPornoIE(InfoExtractor):
'view_count': view_count,
'comment_count': comment_count,
'formats': formats,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 78f3b7e7b..08a04737c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1055,21 +1055,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
self._login()
def _ids_to_results(self, ids):
- return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
+ return [
+ self.url_result(vid_id, 'Youtube', video_id=vid_id)
+ for vid_id in ids]
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
- webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+ webpage = self._download_webpage(
+ url, playlist_id, u'Downloading Youtube mix')
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
- title_span = (search_title('playlist-title') or
- search_title('title long-title') or search_title('title'))
+ title_span = (
+ search_title('playlist-title') or
+ search_title('title long-title') or
+ search_title('title'))
title = clean_html(title_span)
- video_re = r'''(?x)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
- ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+ ids = orderedSet(re.findall(
+ r'''(?xs)data-video-username=".*?".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+ webpage))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
@@ -1162,6 +1167,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
msg = u'Downloading Youtube mix'
if i > 0:
msg += ', retry #%d' % i
+
webpage = self._download_webpage(url, title, msg)
ids = orderedSet(re.findall(video_re, webpage))
if ids:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8a36e619a..6fe057234 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1141,10 +1141,10 @@ else:
import fcntl
def _lock_file(f, exclusive):
- fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+ fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
def _unlock_file(f):
- fcntl.lockf(f, fcntl.LOCK_UN)
+ fcntl.flock(f, fcntl.LOCK_UN)
class locked_file(object):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 59f6b4736..c9005afe1 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.09.01.1'
+__version__ = '2014.09.01.2'