diff options
| -rw-r--r-- | test/test_all_urls.py | 26 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/anysex.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/beeg.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/comedycentral.py | 36 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/drtuber.py | 50 | ||||
| -rw-r--r-- | youtube_dl/extractor/eporner.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/hornbunny.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/npo.py | 35 | ||||
| -rw-r--r-- | youtube_dl/extractor/sunporno.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 22 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
15 files changed, 167 insertions, 72 deletions
| diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 5f1092939..84b05da39 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -143,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase):          self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])          self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) -    def test_ComedyCentralShows(self): -        self.assertMatch( -            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', -            ['ComedyCentralShows']) -        self.assertMatch( -            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', -            ['ComedyCentralShows']) -      def test_yahoo_https(self):          # https://github.com/rg3/youtube-dl/issues/2701          self.assertMatch( diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index b15695053..bf616e3b6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -876,7 +876,7 @@ def _real_main(argv=None):                  ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')              else:                  if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): -                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') +                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)                      retcode = 141                  else:                      ydl.to_screen( diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dede0cde5..6c7668fe2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -70,6 +70,7 @@ from .daum import DaumIE  from .dfb import DFBIE  from .dotsub import DotsubIE  from .dreisat import DreiSatIE +from .drtuber import DrTuberIE  from .drtv import DRTVIE  from .dump import DumpIE  from .defense import DefenseGouvFrIE diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py index adeacba01..bc64423a3 100644 --- a/youtube_dl/extractor/anysex.py +++ b/youtube_dl/extractor/anysex.py @@ -21,6 +21,7 @@ class AnySexIE(InfoExtractor):              'description': 'md5:de9e418178e2931c10b62966474e1383',              'categories': ['Erotic'],              'duration': 270, +            'age_limit': 18,          }      } @@ -43,7 +44,6 @@ class AnySexIE(InfoExtractor):          duration = parse_duration(self._search_regex(              r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False)) -          view_count = int_or_none(self._html_search_regex(              r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False)) @@ -57,4 +57,5 @@ class AnySexIE(InfoExtractor):              'categories': categories,              'duration': duration,              'view_count': view_count, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c2692cfdc..d7301fe18 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -17,6 +17,7 @@ class BeegIE(InfoExtractor):              'description': 'md5:6db3c6177972822aaba18652ff59c773',              'categories': list,  # NSFW              'thumbnail': 're:https?://.*\.jpg$', +            'age_limit': 18,          }      } @@ -41,7 +42,9 @@ class BeegIE(InfoExtractor):          categories_str = self._html_search_regex(              r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) -        categories = categories_str.split(',') +        categories = ( +            None if categories_str is None +            else categories_str.split(','))          return {              'id': video_id, @@ -50,4 +53,5 @@ class BeegIE(InfoExtractor):              'description': description,              'thumbnail': thumbnail,              'categories': categories, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index c81ce5a96..035046120 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor):                            (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/                           ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|                            (?P<clip> -                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+)) +                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))                                |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))                            )|                            (?P<interview>                                extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))                       (?:[?#].*|$)''' -    _TEST = { +    _TESTS = [{          'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',          'md5': '4e2f5cb088a83cd8cdb7756132f9739d',          'info_dict': { @@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor):              'uploader': 'thedailyshow',              'title': 'thedailyshow kristen-stewart part 1',          } -    } +    }, { +        'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', +        'only_matching': True, +    }, { +        'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', +        'only_matching': True, +    }, { +        'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', +        'only_matching': True, +    }, { +        'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', +        'only_matching': True, +    }, { +        'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', +        'only_matching': True, +    }, { +        'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', +        'only_matching': True, +    }, { +        'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', +        'only_matching': True, +    }, { +        'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', +        'only_matching': True, +    }, { +        'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', +        'only_matching': True, +    }]      _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor):                      'ext': self._video_extensions.get(format, 'mp4'),                      'height': h,                      'width': w, + +                    'format_note': 'HTTP 400 at the moment (patches welcome!)', +                    'preference': -100,                  })                  formats.append({                      'format_id': 'rtmp-%s' % format, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8453321c5..929dd1e97 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -677,9 +677,12 @@ class InfoExtractor(object):                  }                  codecs = last_info.get('CODECS')                  if codecs: -                    video, audio = codecs.split(',') -                    f['vcodec'] = video.partition('.')[0] -                    f['acodec'] = audio.partition('.')[0] +                    # TODO: looks like video codec is not always necessarily goes first +                    va_codecs = codecs.split(',') +                    if va_codecs[0]: +                        f['vcodec'] = va_codecs[0].partition('.')[0] +                    if len(va_codecs) > 1 and va_codecs[1]: +                        f['acodec'] = va_codecs[1].partition('.')[0]                  resolution = last_info.get('RESOLUTION')                  if resolution:                      width_str, height_str = resolution.split('x') diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py new file mode 100644 index 000000000..9a13925b4 --- /dev/null +++ b/youtube_dl/extractor/drtuber.py @@ -0,0 +1,50 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DrTuberIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)' +    _TEST = { +        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', +        'md5': '93e680cf2536ad0dfb7e74d94a89facd', +        'info_dict': { +            'id': '1740434', +            'ext': 'mp4', +            'title': 'Hot Perky Blonde Naked Golf', +            'categories': list,  # NSFW +            'thumbnail': 're:https?://.*\.jpg$', +            'age_limit': 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        video_url = self._html_search_regex( +            r'<source src="([^"]+)"', webpage, 'video URL') + +        title = self._html_search_regex( +            r'<title>([^<]+)\s*-\s*Free', webpage, 'title') + +        thumbnail = self._html_search_regex( +            r'poster="([^"]+)"', +            webpage, 'thumbnail', fatal=False) + +        cats_str = self._html_search_regex( +            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) +        categories = None if cats_str is None else cats_str.split(' ') + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'categories': categories, +            'age_limit': self._rta_search(webpage), +        } diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 4c2c074cb..6926fcda3 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -21,6 +21,7 @@ class EpornerIE(InfoExtractor):              'title': 'Infamous Tiffany Teen Strip Tease Video',              'duration': 194,              'view_count': int, +            'age_limit': 18,          }      } @@ -35,9 +36,10 @@ class EpornerIE(InfoExtractor):              r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,              webpage, 'redirect_code')          redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code) -        webpage2 = self._download_webpage(redirect_url, video_id) +        player_code = self._download_webpage( +            redirect_url, video_id, note='Downloading player config')          video_url = self._html_search_regex( -            r'file: "(.*?)",', webpage2, 'video_url') +            r'file: "(.*?)",', player_code, 'video_url')          duration = parse_duration(self._search_regex(              r'class="mbtim">([0-9:]+)</div>', webpage, 'duration', @@ -52,4 +54,5 @@ class EpornerIE(InfoExtractor):              'title': title,              'duration': duration,              'view_count': view_count, +            'age_limit': self._rta_search(webpage),          } diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py index a42fba0cb..7e7714438 100644 --- a/youtube_dl/extractor/hornbunny.py +++ b/youtube_dl/extractor/hornbunny.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( +    int_or_none, +    parse_duration, +) +  class HornBunnyIE(InfoExtractor):      _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html' @@ -15,7 +19,8 @@ class HornBunnyIE(InfoExtractor):              'id': '5227',              'ext': 'flv',              'title': 'panty slut jerk off instruction', -            'duration': 550 +            'duration': 550, +            'age_limit': 18,          }      } @@ -23,16 +28,22 @@ class HornBunnyIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        webpage = self._download_webpage(url, video_id) -        title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title') -        redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title') +        webpage = self._download_webpage( +            url, video_id, note='Downloading initial webpage') +        title = self._html_search_regex( +            r'class="title">(.*?)</h2>', webpage, 'title') +        redirect_url = self._html_search_regex( +            r'pg&settings=(.*?)\|0"\);', webpage, 'title')          webpage2 = self._download_webpage(redirect_url, video_id) -        video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url') +        video_url = self._html_search_regex( +            r'flvMask:(.*?);', webpage2, 'video_url') -        mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage) -        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None - -        view_count = self._html_search_regex(r'<strong>Views:</strong>  (\d+)</div>', webpage, 'view count', fatal=False) +        duration = parse_duration(self._search_regex( +            r'<strong>Runtime:</strong>\s*([0-9:]+)</div>', +            webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._search_regex( +            r'<strong>Views:</strong>\s*(\d+)</div>', +            webpage, 'view count', fatal=False))          return {              'id': video_id, @@ -40,5 +51,6 @@ class HornBunnyIE(InfoExtractor):              'title': title,              'ext': 'flv',              'duration': duration, -            'view_count': int_or_none(view_count), +            'view_count': view_count, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 12e85a716..902d62944 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re  from .common import InfoExtractor  from ..utils import (      unified_strdate, +    qualities,  ) @@ -17,7 +18,7 @@ class NPOIE(InfoExtractor):          'md5': '4b3f9c429157ec4775f2c9cb7b911016',          'info_dict': {              'id': 'VPWON_1220719', -            'ext': 'mp4', +            'ext': 'm4v',              'title': 'Nieuwsuur',              'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',              'upload_date': '20140622', @@ -39,24 +40,32 @@ class NPOIE(InfoExtractor):              video_id,              note='Downloading token'          ) -        token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') -        streams_info = self._download_json( -            'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), -            video_id -        ) +        token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') -        stream_info = self._download_json( -            streams_info['streams'][0] + '&type=json', -            video_id, -            'Downloading stream info' -        ) +        formats = [] +        quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std']) +        for format_id in metadata['pubopties']: +            streams_info = self._download_json( +                'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), +                video_id, 'Downloading %s streams info' % format_id) +            stream_info = self._download_json( +                streams_info['streams'][0] + '&type=json', +                video_id, 'Downloading %s stream info' % format_id) +            if format_id == 'adaptive': +                formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id)) +            else: +                formats.append({ +                    'url': stream_info['url'], +                    'format_id': format_id, +                    'quality': quality(format_id), +                }) +        self._sort_formats(formats)          return {              'id': video_id,              'title': metadata['titel'], -            'ext': 'mp4', -            'url': stream_info['url'],              'description': metadata['info'],              'thumbnail': metadata['images'][-1]['url'],              'upload_date': unified_strdate(metadata['gidsdatum']), +            'formats': formats,          } diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index c7a46eb71..7de3c9dd5 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -23,6 +23,7 @@ class SunPornoIE(InfoExtractor):              'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',              'thumbnail': 're:^https?://.*\.jpg$',              'duration': 302, +            'age_limit': 18,          }      } @@ -65,4 +66,5 @@ class SunPornoIE(InfoExtractor):              'view_count': view_count,              'comment_count': comment_count,              'formats': formats, +            'age_limit': 18,          } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78f3b7e7b..08a04737c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1055,21 +1055,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          self._login()      def _ids_to_results(self, ids): -        return [self.url_result(vid_id, 'Youtube', video_id=vid_id) -                       for vid_id in ids] +        return [ +            self.url_result(vid_id, 'Youtube', video_id=vid_id) +            for vid_id in ids]      def _extract_mix(self, playlist_id):          # The mixes are generated from a a single video          # the id of the playlist is just 'RD' + video_id          url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) -        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') +        webpage = self._download_webpage( +            url, playlist_id, u'Downloading Youtube mix')          search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) -        title_span = (search_title('playlist-title') or -            search_title('title long-title') or search_title('title')) +        title_span = ( +            search_title('playlist-title') or +            search_title('title long-title') or +            search_title('title'))          title = clean_html(title_span) -        video_re = r'''(?x)data-video-username=".*?".*? -                       href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) -        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) +        ids = orderedSet(re.findall( +            r'''(?xs)data-video-username=".*?".*? +                       href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), +            webpage))          url_results = self._ids_to_results(ids)          return self.playlist_result(url_results, playlist_id, title) @@ -1162,6 +1167,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):              msg = u'Downloading Youtube mix'              if i > 0:                  msg += ', retry #%d' % i +              webpage = self._download_webpage(url, title, msg)              ids = orderedSet(re.findall(video_re, webpage))              if ids: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8a36e619a..6fe057234 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1141,10 +1141,10 @@ else:      import fcntl      def _lock_file(f, exclusive): -        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) +        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)      def _unlock_file(f): -        fcntl.lockf(f, fcntl.LOCK_UN) +        fcntl.flock(f, fcntl.LOCK_UN)  class locked_file(object): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 59f6b4736..c9005afe1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.01.1' +__version__ = '2014.09.01.2' | 
