aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/prosiebensat1.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/prosiebensat1.py')
-rw-r--r--youtube_dl/extractor/prosiebensat1.py65
1 files changed, 51 insertions, 14 deletions
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 7335dc2af..873d4f981 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -122,7 +122,17 @@ class ProSiebenSat1BaseIE(InfoExtractor):
class ProSiebenSat1IE(ProSiebenSat1BaseIE):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ (?:
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
+ )\.(?:de|at|ch)|
+ ran\.de|fem\.com|advopedia\.de
+ )
+ /(?P<id>.+)
+ '''
_TESTS = [
{
@@ -290,6 +300,24 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'skip_download': True,
},
},
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+ 'only_matching': True,
+ },
]
_TOKEN = 'prosieben'
@@ -361,19 +389,28 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
- for regex in self._PLAYLIST_CLIP_REGEXES:
- playlist_clips = re.findall(regex, webpage)
- if playlist_clips:
- title = self._html_search_regex(
- self._TITLE_REGEXES, webpage, 'title')
- description = self._html_search_regex(
- self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
- entries = [
- self.url_result(
- re.match('(.+?//.+?)/', url).group(1) + clip_path,
- 'ProSiebenSat1')
- for clip_path in playlist_clips]
- return self.playlist_result(entries, playlist_id, title, description)
+ playlist = self._parse_json(
+ self._search_regex(
+ 'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ webpage, 'playlist'),
+ playlist_id)
+ entries = []
+ for item in playlist:
+ clip_id = item.get('id') or item.get('upc')
+ if not clip_id:
+ continue
+ info = self._extract_video_info(url, clip_id)
+ info.update({
+ 'id': clip_id,
+ 'title': item.get('title') or item.get('teaser', {}).get('headline'),
+ 'description': item.get('teaser', {}).get('description'),
+ 'thumbnail': item.get('poster'),
+ 'duration': float_or_none(item.get('duration')),
+ 'series': item.get('tvShowTitle'),
+ 'uploader': item.get('broadcastPublisher'),
+ })
+ entries.append(info)
+ return self.playlist_result(entries, playlist_id)
def _real_extract(self, url):
video_id = self._match_id(url)