aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-07-17 00:34:24 +0600
committerSergey M․ <dstftw@gmail.com>2015-07-17 00:34:24 +0600
commit611ac379bb466267aded6726f9c85e79b08168c4 (patch)
tree601b919405fb5e9aae897b623b53170af0e1c4ee
parent03f32a7eadf9d832aef55673edf38023a8daff95 (diff)
[vpro] Fix extraction and add support for vpro playlists
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/npo.py35
2 files changed, 26 insertions, 11 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 1d55275dc..06f21064b 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -389,7 +389,7 @@ from .npo import (
NPOLiveIE,
NPORadioIE,
NPORadioFragmentIE,
- TegenlichtVproIE,
+ VPROIE,
WNLIE
)
from .nrk import (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index c6bf7619d..28d5c90b3 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -404,9 +404,8 @@ class NPORadioFragmentIE(InfoExtractor):
}
-class TegenlichtVproIE(NPOIE):
- IE_NAME = 'tegenlicht.vpro.nl'
- _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+class VPROIE(NPOIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
_TESTS = [
{
@@ -416,19 +415,35 @@ class TegenlichtVproIE(NPOIE):
'id': 'VPWON_1169289',
'ext': 'm4v',
'title': 'Tegenlicht',
- 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
},
},
+ {
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+ 'info_dict': {
+ 'id': 'sergio-herman',
+ 'title': 'Sergio Herman: Fucking perfect',
+ },
+ 'playlist_count': 2,
+ }
]
def _real_extract(self, url):
- name = url_basename(url)
- webpage = self._download_webpage(url, name)
- urn = self._html_search_meta('mediaurn', webpage)
- info_page = self._download_json(
- 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
- return self._get_info(info_page['mid'])
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id, 'NPO')
+ for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
+ ]
+
+ playlist_title = self._search_regex(
+ r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
+ webpage, 'playlist title', default=None) or self._og_search_title(webpage)
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
class WNLIE(InfoExtractor):