aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/dplay.py51
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/neteasemusic.py2
-rw-r--r--youtube_dl/extractor/pbs.py8
-rw-r--r--youtube_dl/extractor/rtve.py10
-rw-r--r--youtube_dl/extractor/udn.py3
-rw-r--r--youtube_dl/extractor/vimeo.py25
-rw-r--r--youtube_dl/extractor/youtube.py2
9 files changed, 79 insertions, 25 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 59c82f65d..26e5745d6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -132,6 +132,7 @@ from .dfb import DFBIE
from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
+from .dplay import DPlayIE
from .dramafever import (
DramaFeverIE,
DramaFeverSeriesIE,
diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py
new file mode 100644
index 000000000..6cda56a7f
--- /dev/null
+++ b/youtube_dl/extractor/dplay.py
@@ -0,0 +1,51 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import time
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DPlayIE(InfoExtractor):
+ _VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
+
+ _TEST = {
+ 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
+ 'info_dict': {
+ 'id': '3172',
+ 'ext': 'mp4',
+ 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
+ 'title': 'Svensken lär sig njuta av livet',
+ 'duration': 2650,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-video-id="(\d+)"', webpage, 'video id')
+
+ info = self._download_json(
+ 'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
+ video_id)['data'][0]
+
+ self._set_cookie(
+ 'secure.dplay.se', 'dsc-geo',
+ '{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
+ # TODO: consider adding support for 'stream_type=hds', it seems to
+ # require setting some cookies
+ manifest_url = self._download_json(
+ 'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
+ video_id, 'Getting manifest url for hls stream')['hls']
+ formats = self._extract_m3u8_formats(
+ manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': info['title'],
+ 'formats': formats,
+ 'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 51516a38a..b483eba65 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1739,7 +1739,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
- r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+ r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index a8e0a64ed..bb3362069 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
if not details:
continue
formats.append({
- 'url': 'http://m1.music.126.net/%s/%s.%s' %
+ 'url': 'http://m5.music.126.net/%s/%s.%s' %
(cls._encrypt(details['dfsId']), details['dfsId'],
details['extension']),
'ext': details.get('extension'),
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 8fb9b1849..b787e2a73 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -22,7 +22,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
- video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
+ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
@@ -170,6 +170,10 @@ class PBSIE(InfoExtractor):
'params': {
'skip_download': True, # requires ffmpeg
},
+ },
+ {
+ 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
+ 'only_matching': True,
}
]
_ERRORS = {
@@ -259,7 +263,7 @@ class PBSIE(InfoExtractor):
return self.playlist_result(entries, display_id)
info = self._download_json(
- 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id)
formats = []
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 5b97d33ca..0fe6356db 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -6,7 +6,7 @@ import re
import time
from .common import InfoExtractor
-from ..compat import compat_urllib_request, compat_urlparse
+from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
float_or_none,
@@ -107,15 +107,9 @@ class RTVEALaCartaIE(InfoExtractor):
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
- auth_url = video_url.replace(
+ video_url = video_url.replace(
'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve')
- video_path = self._download_webpage(
- auth_url, video_id, 'Getting video url')
- # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
- # the right Content-Length header and the mp4 format
- video_url = compat_urlparse.urljoin(
- 'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None
if info.get('sbtFile') is not None:
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
index 2151f8338..ee35b7227 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -12,7 +12,8 @@ from ..compat import compat_urlparse
class UDNEmbedIE(InfoExtractor):
IE_DESC = '聯合影音'
- _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+ _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+ _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
_TESTS = [{
'url': 'http://video.udn.com/embed/news/300040',
'md5': 'de06b4c90b042c128395a88f0384817e',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index b72341a2b..057c72f39 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -189,6 +189,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'note': 'Video not completely processed, "failed" seed status',
'only_matching': True,
},
+ {
+ 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
+ 'only_matching': True,
+ },
]
@staticmethod
@@ -486,8 +490,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_request, list_id,
'Verifying the password', 'Wrong password')
- def _extract_videos(self, list_id, base_url):
- video_ids = []
+ def _title_and_entries(self, list_id, base_url):
for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage(
@@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage)
+ yield self._extract_list_title(webpage)
+
+ for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
+ yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
- video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
- entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
- for video_id in video_ids]
- return {'_type': 'playlist',
- 'id': list_id,
- 'title': self._extract_list_title(webpage),
- 'entries': entries,
- }
+ def _extract_videos(self, list_id, base_url):
+ title_and_entries = self._title_and_entries(list_id, base_url)
+ list_title = next(title_and_entries)
+ return self.playlist_result(title_and_entries, list_id, list_title)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{
'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 687e0b4db..364ca102a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1615,7 +1615,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract
self.report_warning('Youtube gives an alert message: ' + match)
playlist_title = self._html_search_regex(
- r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
+ r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
page, 'title')
return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)