diff options
-rw-r--r-- | docs/supportedsites.md | 1 | ||||
-rw-r--r-- | test/test_utils.py | 4 | ||||
-rwxr-xr-x | youtube_dl/YoutubeDL.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/dplay.py | 51 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/neteasemusic.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/pbs.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/rtve.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/udn.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 25 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
-rw-r--r-- | youtube_dl/utils.py | 8 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
14 files changed, 90 insertions, 30 deletions
diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2e5283747..f56a6986b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -129,6 +129,7 @@ - **Discovery** - **Dotsub** - **DouyuTV**: 斗鱼 + - **DPlay** - **dramafever** - **dramafever:series** - **DRBonanza** diff --git a/test/test_utils.py b/test/test_utils.py index 01829f71e..ea1ff0547 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -210,8 +210,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('/'), '/') - self.assertEqual( - unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('�'), '�') def test_daterange(self): _20century = DateRange("19000101", "20000101") diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 422f3ffca..2e824117a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -833,6 +833,7 @@ class YoutubeDL(object): extra_info=extra) playlist_results.append(entry_result) ie_result['entries'] = playlist_results + self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result elif result_type == 'compat_list': self.report_warning( diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 59c82f65d..26e5745d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -132,6 +132,7 @@ from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE +from .dplay import DPlayIE from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py new file mode 100644 index 000000000..6cda56a7f --- /dev/null +++ b/youtube_dl/extractor/dplay.py @@ -0,0 +1,51 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import time + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DPlayIE(InfoExtractor): + _VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)' + + _TEST = { + 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', + 'info_dict': { + 'id': '3172', + 'ext': 'mp4', + 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', + 'title': 'Svensken lär sig njuta av livet', + 'duration': 2650, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-video-id="(\d+)"', webpage, 'video id') + + info = self._download_json( + 'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id, + video_id)['data'][0] + + self._set_cookie( + 'secure.dplay.se', 'dsc-geo', + '{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000)) + # TODO: consider adding support for 'stream_type=hds', it seems to + # require setting some cookies + manifest_url = self._download_json( + 'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id, + video_id, 'Getting manifest url for hls stream')['hls'] + formats = self._extract_m3u8_formats( + manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': info['title'], + 'formats': formats, + 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51516a38a..b483eba65 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1739,7 +1739,7 @@ class GenericIE(InfoExtractor): # Look for UDN embeds mobj = re.search( - r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) + r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) if mobj is not None: return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py index a8e0a64ed..bb3362069 100644 --- a/youtube_dl/extractor/neteasemusic.py +++ b/youtube_dl/extractor/neteasemusic.py @@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor): if not details: continue formats.append({ - 'url': 'http://m1.music.126.net/%s/%s.%s' % + 'url': 'http://m5.music.126.net/%s/%s.%s' % (cls._encrypt(details['dfsId']), details['dfsId'], details['extension']), 'ext': details.get('extension'), diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8fb9b1849..b787e2a73 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -22,7 +22,7 @@ class PBSIE(InfoExtractor): # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player - video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ + (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ ) ''' @@ -170,6 +170,10 @@ class PBSIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, + { + 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', + 'only_matching': True, } ] _ERRORS = { @@ -259,7 +263,7 @@ class PBSIE(InfoExtractor): return self.playlist_result(entries, display_id) info = self._download_json( - 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, + 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, display_id) formats = [] diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 5b97d33ca..0fe6356db 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,7 +6,7 @@ import re import time from .common import InfoExtractor -from ..compat import compat_urllib_request, compat_urlparse +from ..compat import compat_urllib_request from ..utils import ( ExtractorError, float_or_none, @@ -107,15 +107,9 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - auth_url = video_url.replace( + video_url = video_url.replace( 'resources/', 'auth/resources/' ).replace('.net.rtve', '.multimedia.cdn.rtve') - video_path = self._download_webpage( - auth_url, video_id, 'Getting video url') - # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get - # the right Content-Length header and the mp4 format - video_url = compat_urlparse.urljoin( - 'http://mvod1.akcdn.rtve.es/', video_path) subtitles = None if info.get('sbtFile') is not None: diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 2151f8338..ee35b7227 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -12,7 +12,8 @@ from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): IE_DESC = '聯合影音' - _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' + _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' + _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b72341a2b..057c72f39 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'note': 'Video not completely processed, "failed" seed status', 'only_matching': True, }, + { + 'url': 'https://vimeo.com/groups/travelhd/videos/22439234', + 'only_matching': True, + }, ] @staticmethod @@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): password_request, list_id, 'Verifying the password', 'Wrong password') - def _extract_videos(self, list_id, base_url): - video_ids = [] + def _title_and_entries(self, list_id, base_url): for pagenum in itertools.count(1): page_url = self._page_url(base_url, pagenum) webpage = self._download_webpage( @@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): if pagenum == 1: webpage = self._login_list_password(page_url, list_id, webpage) + yield self._extract_list_title(webpage) + + for video_id in re.findall(r'id="clip_(\d+?)"', webpage): + yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') - video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') - for video_id in video_ids] - return {'_type': 'playlist', - 'id': list_id, - 'title': self._extract_list_title(webpage), - 'entries': entries, - } + def _extract_videos(self, list_id, base_url): + title_and_entries = self._title_and_entries(list_id, base_url) + list_title = next(title_and_entries) + return self.playlist_result(title_and_entries, list_id, list_title) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE): class VimeoGroupsIE(VimeoAlbumIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)' + _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ 'url': 'https://vimeo.com/groups/rolexawards', 'info_dict': { diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 687e0b4db..364ca102a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1615,7 +1615,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract self.report_warning('Youtube gives an alert message: ' + match) playlist_title = self._html_search_regex( - r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', + r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>', page, 'title') return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d39f313a4..d00b14b86 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -396,10 +396,14 @@ def _htmlentity_transform(entity): numstr = '0%s' % numstr else: base = 10 - return compat_chr(int(numstr, base)) + # See https://github.com/rg3/youtube-dl/issues/7518 + try: + return compat_chr(int(numstr, base)) + except ValueError: + pass # Unknown entity in name, return its literal representation - return ('&%s;' % entity) + return '&%s;' % entity def unescapeHTML(s): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6f601cbb1..b3b1b90ba 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.11.15' +__version__ = '2015.11.19' |