aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/npo.py83
2 files changed, 83 insertions, 2 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 095feabf9..a4fab540b 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -318,6 +318,8 @@ from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
TegenlichtVproIE,
)
from .nrk import (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 54be06a4e..c075618e8 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..utils import (
fix_xml_ampersands,
parse_duration,
@@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):
class NPOIE(NPOBaseIE):
IE_NAME = 'npo.nl'
- _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
_TESTS = [
{
@@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
- _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
_TEST = {
'url': 'http://www.npo.nl/live/npo-1',
@@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):
}
+class NPORadioIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-1',
+ 'info_dict': {
+ 'id': 'radio-1',
+ 'ext': 'mp3',
+ 'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ @staticmethod
+ def _html_get_attribute_regex(attribute):
+ return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+ stream = self._parse_json(
+ self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+ video_id)
+
+ codec = stream.get('codec')
+
+ return {
+ 'id': video_id,
+ 'url': stream['url'],
+ 'title': self._live_title(title),
+ 'acodec': codec,
+ 'ext': codec,
+ 'is_live': True,
+ }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio:fragment'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+ 'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+ 'info_dict': {
+ 'id': '174356',
+ 'ext': 'mp3',
+ 'title': 'Jubileumconcert Willeke Alberti',
+ },
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._html_search_regex(
+ r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+ webpage, 'title')
+
+ audio_url = self._search_regex(
+ r"data-streams='([^']+)'", webpage, 'audio url')
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ }
+
+
class TegenlichtVproIE(NPOIE):
IE_NAME = 'tegenlicht.vpro.nl'
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'