diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/adultswim.py | 139 | ||||
-rw-r--r-- | youtube_dl/extractor/comedycentral.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/cracked.py | 61 | ||||
-rw-r--r-- | youtube_dl/extractor/dfb.py | 44 | ||||
-rw-r--r-- | youtube_dl/extractor/npo.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/redtube.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/rtbf.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 44 | ||||
-rw-r--r-- | youtube_dl/utils.py | 2 |
12 files changed, 261 insertions, 48 deletions
@@ -12,7 +12,7 @@ To install it right away for all UNIX users (Linux, OS X, etc.), type: If you do not have curl, you can alternatively use a recent wget: - sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl + sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+x /usr/local/bin/youtube-dl Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5e16a5491..6e2359b28 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -64,6 +64,7 @@ __authors__ = ( 'Adam Malcontenti-Wilson', 'Tobias Bell', 'Naglis Jonaitis', + 'Charles Chen', ) __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ca372496a..f78aa066f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,5 +1,6 @@ from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE +from .adultswim import AdultSwimIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE @@ -63,6 +64,7 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE +from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .drtv import DRTVIE diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py new file mode 100644 index 000000000..a00bfcb35 --- /dev/null +++ b/youtube_dl/extractor/adultswim.py @@ -0,0 +1,139 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class AdultSwimIE(InfoExtractor): + _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' + _TEST = { + 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', + 'playlist': [ + { + 'md5': '4da359ec73b58df4575cd01a610ba5dc', + 'info_dict': { + 'id': '8a250ba1450996e901453d7f02ca02f5', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'ffbdf55af9331c509d95350bd0cc1819', + 'info_dict': { + 'id': '8a250ba1450996e901453d7f4bd102f6', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'b92409635540304280b4b6c36bd14a0a', + 'info_dict': { + 'id': '8a250ba1450996e901453d7fa73c02f7', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'e8818891d60e47b29cd89d7b0278156d', + 'info_dict': { + 'id': '8a250ba1450996e901453d7fc8ba02f8', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + } + ] + } + + _video_extensions = { + '3500': 'flv', + '640': 'mp4', + '150': 'mp4', + 'ipad': 'm3u8', + 'iphone': 'm3u8' + } + _video_dimensions = { + '3500': (1280, 720), + '640': (480, 270), + '150': (320, 180) + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_path = mobj.group('path') + + webpage = self._download_webpage(url, video_path) + episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id') + title = self._og_search_title(webpage) + + index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id + idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') + + episode_el = idoc.find('.//episode') + show_title = episode_el.attrib.get('collectionTitle') + episode_title = episode_el.attrib.get('title') + thumbnail = episode_el.attrib.get('thumbnailUrl') + description = episode_el.find('./description').text.strip() + + entries = [] + segment_els = episode_el.findall('./segments/segment') + + for part_num, segment_el in enumerate(segment_els): + segment_id = segment_el.attrib.get('id') + segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) + thumbnail = segment_el.attrib.get('thumbnailUrl') + duration = segment_el.attrib.get('duration') + + segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id + idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') + + formats = [] + file_els = idoc.findall('.//files/file') + + for file_el in file_els: + bitrate = file_el.attrib.get('bitrate') + type = file_el.attrib.get('type') + width, height = self._video_dimensions.get(bitrate, (None, None)) + formats.append({ + 'format_id': '%s-%s' % (bitrate, type), + 'url': file_el.text, + 'ext': self._video_extensions.get(bitrate, 'mp4'), + # The bitrate may not be a number (for example: 'iphone') + 'tbr': int(bitrate) if bitrate.isdigit() else None, + 'height': height, + 'width': width + }) + + self._sort_formats(formats) + + entries.append({ + 'id': segment_id, + 'title': segment_title, + 'formats': formats, + 'uploader': show_title, + 'thumbnail': thumbnail, + 'duration': duration, + 'description': description + }) + + return { + '_type': 'playlist', + 'id': episode_id, + 'display_id': video_path, + 'entries': entries, + 'title': '%s %s' % (show_title, episode_title), + 'description': description, + 'thumbnail': thumbnail + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 8af0abade..c81ce5a96 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -14,13 +14,13 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ - (video-clips|episodes|cc-studios|video-collections) + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (video-clips|episodes|cc-studios|video-collections|full-episodes) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' _TEST = { - 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', + 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', 'md5': 'c4f48e9eda1b16dd10add0744344b6d8', 'info_dict': { 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 37c0f7ffb..74b880ffc 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -1,23 +1,26 @@ -# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + str_to_int, +) + class CrackedIE(InfoExtractor): - _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P<id>.*)_.*' + _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' _TEST = { - 'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html', - + 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', + 'md5': '4b29a5eeec292cd5eca6388c7558db9e', 'info_dict': { - 'id': '18803', + 'id': '19006', 'ext': 'mp4', - 'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com", - 'height': 375, - 'width': 666, - - + 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies', + 'description': 'md5:3b909e752661db86007d10e5ec2df769', + 'timestamp': 1405659600, + 'upload_date': '20140718', } } @@ -26,21 +29,37 @@ class CrackedIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'<title>(.*?)</title>',webpage,'title') - video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url') - width = self._search_regex(r'width="(.*?)"',webpage,'width') - height = re.findall(r'height="(.*?)"',webpage)[1] + video_url = self._html_search_regex( + [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL') + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False) + if timestamp: + timestamp = parse_iso8601(timestamp[:-6]) - return { - 'url':video_url, - 'id': video_id, - 'ext':'mp4', - 'title':title, - 'height':int(height), - 'width':int(width) + view_count = str_to_int(self._html_search_regex( + r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( + r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) + m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url) + if m: + width = int(m.group('width')) + height = int(m.group('height')) + else: + width = height = None + return { + 'id': video_id, + 'url':video_url, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'view_count': view_count, + 'comment_count': comment_count, + 'height': height, + 'width': width, }
\ No newline at end of file diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py new file mode 100644 index 000000000..cb8e06822 --- /dev/null +++ b/youtube_dl/extractor/dfb.py @@ -0,0 +1,44 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DFBIE(InfoExtractor): + IE_NAME = 'tv.dfb.de' + _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)' + + _TEST = { + 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/', + # The md5 is different each time + 'info_dict': { + 'id': '9070', + 'ext': 'flv', + 'title': 'Highlights des Empfangs in Berlin', + 'upload_date': '20140716', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + player_info = self._download_xml( + 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, + video_id) + video_info = player_info.find('video') + + f4m_info = self._download_xml(video_info.find('url').text, video_id) + token_el = f4m_info.find('token') + manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' + + return { + 'id': video_id, + 'title': video_info.find('title').text, + 'url': manifest_url, + 'ext': 'flv', + 'thumbnail': self._og_search_thumbnail(webpage), + 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]), + } diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fbcbe1f40..12e85a716 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -32,7 +32,7 @@ class NPOIE(InfoExtractor): 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, video_id, # We have to remove the javascript callback - transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) + transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j) ) token_page = self._download_webpage( 'http://ida.omroep.nl/npoplayer/i.js', diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 4295cf93a..d1e12dd8d 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -35,9 +35,7 @@ class RedTubeIE(InfoExtractor): r'<h1 class="videoTitle[^"]*">(.+?)</h1>', webpage, u'title') - video_thumbnail = self._html_search_regex( - r'playerInnerHTML.+?<img\s+src="(.+?)"', - webpage, u'thumbnail', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) # No self-labeling, but they describe themselves as # "Home of Videos Porno" diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index 205f8a167..dce64e151 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -30,7 +30,7 @@ class RTBFIE(InfoExtractor): page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) data = json.loads(self._html_search_regex( - r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data'] + r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data'] video_url = data.get('downloadUrl') or data.get('url') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6123e1256..5449df8e0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -347,8 +347,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'RTMP download detected') def _extract_signature_function(self, video_id, player_url, slen): - id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', - player_url) + id_m = re.match( + r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$', + player_url) player_type = id_m.group('ext') player_id = id_m.group('id') @@ -1220,31 +1221,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url += '&signature=' + url_data['sig'][0] elif 's' in url_data: encrypted_sig = url_data['s'][0] + + if not age_gate: + jsplayer_url_json = self._search_regex( + r'"assets":.+?"js":\s*("[^"]+")', + video_webpage, u'JS player URL') + player_url = json.loads(jsplayer_url_json) + if player_url is None: + player_url_json = self._search_regex( + r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', + video_webpage, u'age gate player URL') + player_url = json.loads(player_url_json) + if self._downloader.params.get('verbose'): - if age_gate: - if player_url is None: - player_version = 'unknown' - else: + if player_url is None: + player_version = 'unknown' + player_desc = 'unknown' + else: + if player_url.endswith('swf'): player_version = self._search_regex( r'-(.+)\.swf$', player_url, u'flash player', fatal=False) - player_desc = 'flash player %s' % player_version - else: - player_version = self._search_regex( - r'html5player-(.+?)\.js', video_webpage, - 'html5 player', fatal=False) - player_desc = u'html5 player %s' % player_version + player_desc = 'flash player %s' % player_version + else: + player_version = self._search_regex( + r'html5player-(.+?)\.js', video_webpage, + 'html5 player', fatal=False) + player_desc = u'html5 player %s' % player_version parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) - if not age_gate: - jsplayer_url_json = self._search_regex( - r'"assets":.+?"js":\s*("[^"]+")', - video_webpage, u'JS player URL') - player_url = json.loads(jsplayer_url_json) - signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) url += '&signature=' + signature diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 64a9618ca..919603c62 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1194,6 +1194,8 @@ def format_bytes(bytes): def str_to_int(int_str): + if int_str is None: + return None int_str = re.sub(r'[,\.]', u'', int_str) return int(int_str) |