diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/dplay.py | 51 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/neteasemusic.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/pbs.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/rtve.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/udn.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 25 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 |
9 files changed, 79 insertions, 25 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 59c82f65d..26e5745d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -132,6 +132,7 @@ from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE +from .dplay import DPlayIE from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py new file mode 100644 index 000000000..6cda56a7f --- /dev/null +++ b/youtube_dl/extractor/dplay.py @@ -0,0 +1,51 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import time + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DPlayIE(InfoExtractor): + _VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)' + + _TEST = { + 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', + 'info_dict': { + 'id': '3172', + 'ext': 'mp4', + 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', + 'title': 'Svensken lär sig njuta av livet', + 'duration': 2650, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-video-id="(\d+)"', webpage, 'video id') + + info = self._download_json( + 'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id, + video_id)['data'][0] + + self._set_cookie( + 'secure.dplay.se', 'dsc-geo', + '{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000)) + # TODO: consider adding support for 'stream_type=hds', it seems to + # require setting some cookies + manifest_url = self._download_json( + 'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id, + video_id, 'Getting manifest url for hls stream')['hls'] + formats = self._extract_m3u8_formats( + manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': info['title'], + 'formats': formats, + 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51516a38a..b483eba65 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1739,7 +1739,7 @@ class GenericIE(InfoExtractor): # Look for UDN embeds mobj = re.search( - r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) + r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) if mobj is not None: return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py index a8e0a64ed..bb3362069 100644 --- a/youtube_dl/extractor/neteasemusic.py +++ b/youtube_dl/extractor/neteasemusic.py @@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor): if not details: continue formats.append({ - 'url': 'http://m1.music.126.net/%s/%s.%s' % + 'url': 'http://m5.music.126.net/%s/%s.%s' % (cls._encrypt(details['dfsId']), details['dfsId'], details['extension']), 'ext': details.get('extension'), diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8fb9b1849..b787e2a73 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -22,7 +22,7 @@ class PBSIE(InfoExtractor): # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player - video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ + (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ ) ''' @@ -170,6 +170,10 @@ class PBSIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, + { + 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', + 'only_matching': True, } ] _ERRORS = { @@ -259,7 +263,7 @@ class PBSIE(InfoExtractor): return self.playlist_result(entries, display_id) info = self._download_json( - 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, + 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, display_id) formats = [] diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 5b97d33ca..0fe6356db 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,7 +6,7 @@ import re import time from .common import InfoExtractor -from ..compat import compat_urllib_request, compat_urlparse +from ..compat import compat_urllib_request from ..utils import ( ExtractorError, float_or_none, @@ -107,15 +107,9 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - auth_url = video_url.replace( + video_url = video_url.replace( 'resources/', 'auth/resources/' ).replace('.net.rtve', '.multimedia.cdn.rtve') - video_path = self._download_webpage( - auth_url, video_id, 'Getting video url') - # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get - # the right Content-Length header and the mp4 format - video_url = compat_urlparse.urljoin( - 'http://mvod1.akcdn.rtve.es/', video_path) subtitles = None if info.get('sbtFile') is not None: diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 2151f8338..ee35b7227 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -12,7 +12,8 @@ from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): IE_DESC = '聯合影音' - _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' + _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' + _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b72341a2b..057c72f39 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'note': 'Video not completely processed, "failed" seed status', 'only_matching': True, }, + { + 'url': 'https://vimeo.com/groups/travelhd/videos/22439234', + 'only_matching': True, + }, ] @staticmethod @@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): password_request, list_id, 'Verifying the password', 'Wrong password') - def _extract_videos(self, list_id, base_url): - video_ids = [] + def _title_and_entries(self, list_id, base_url): for pagenum in itertools.count(1): page_url = self._page_url(base_url, pagenum) webpage = self._download_webpage( @@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): if pagenum == 1: webpage = self._login_list_password(page_url, list_id, webpage) + yield self._extract_list_title(webpage) + + for video_id in re.findall(r'id="clip_(\d+?)"', webpage): + yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') - video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') - for video_id in video_ids] - return {'_type': 'playlist', - 'id': list_id, - 'title': self._extract_list_title(webpage), - 'entries': entries, - } + def _extract_videos(self, list_id, base_url): + title_and_entries = self._title_and_entries(list_id, base_url) + list_title = next(title_and_entries) + return self.playlist_result(title_and_entries, list_id, list_title) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE): class VimeoGroupsIE(VimeoAlbumIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)' + _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ 'url': 'https://vimeo.com/groups/rolexawards', 'info_dict': { diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 687e0b4db..364ca102a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1615,7 +1615,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract self.report_warning('Youtube gives an alert message: ' + match) playlist_title = self._html_search_regex( - r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', + r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>', page, 'title') return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) |