diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/npo.py | 83 | ||||
| -rw-r--r-- | youtube_dl/extractor/nporadio.py | 39 | 
3 files changed, 83 insertions, 42 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b118c3d1d..a4fab540b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -318,9 +318,10 @@ from .nowvideo import NowVideoIE  from .npo import (      NPOIE,      NPOLiveIE, +    NPORadioIE, +    NPORadioFragmentIE,      TegenlichtVproIE,  ) -from .nporadio import NPORadioIE  from .nrk import (      NRKIE,      NRKTVIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 54be06a4e..c075618e8 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -1,6 +1,7 @@  from __future__ import unicode_literals  from .subtitles import SubtitlesInfoExtractor +from .common import InfoExtractor  from ..utils import (      fix_xml_ampersands,      parse_duration, @@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):  class NPOIE(NPOBaseIE):      IE_NAME = 'npo.nl' -    _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' +    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'      _TESTS = [          { @@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE):  class NPOLiveIE(NPOBaseIE):      IE_NAME = 'npo.nl:live' -    _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)' +    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'      _TEST = {          'url': 'http://www.npo.nl/live/npo-1', @@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):          } +class NPORadioIE(InfoExtractor): +    IE_NAME = 'npo.nl:radio' +    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$' + +    _TEST = { +        'url': 'http://www.npo.nl/radio/radio-1', +        'info_dict': { +            'id': 'radio-1', +            'ext': 'mp3', +            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'is_live': True, +        }, +        'params': { +            'skip_download': True, +        } +    } + +    @staticmethod +    def _html_get_attribute_regex(attribute): +        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex( +            self._html_get_attribute_regex('data-channel'), webpage, 'title') + +        stream = self._parse_json( +            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'), +            video_id) + +        codec = stream.get('codec') + +        return { +            'id': video_id, +            'url': stream['url'], +            'title': self._live_title(title), +            'acodec': codec, +            'ext': codec, +            'is_live': True, +        } + + +class NPORadioFragmentIE(InfoExtractor): +    IE_NAME = 'npo.nl:radio:fragment' +    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)' + +    _TEST = { +        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356', +        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2', +        'info_dict': { +            'id': '174356', +            'ext': 'mp3', +            'title': 'Jubileumconcert Willeke Alberti', +        }, +    } + +    def _real_extract(self, url): +        audio_id = self._match_id(url) + +        webpage = self._download_webpage(url, audio_id) + +        title = self._html_search_regex( +            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id, +            webpage, 'title') + +        audio_url = self._search_regex( +            r"data-streams='([^']+)'", webpage, 'audio url') + +        return { +            'id': audio_id, +            'url': audio_url, +            'title': title, +        } + +  class TegenlichtVproIE(NPOIE):      IE_NAME = 'tegenlicht.vpro.nl'      _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' diff --git a/youtube_dl/extractor/nporadio.py b/youtube_dl/extractor/nporadio.py deleted file mode 100644 index d1c4ec167..000000000 --- a/youtube_dl/extractor/nporadio.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor - - -class NPORadioIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>.*)' -    _TEST = { -        'url': 'http://www.npo.nl/radio/radio-1', -        'info_dict': { -            'id': 'radio-1', -            'ext': 'mp3', -            'title': 'NPO Radio 1', -        } -    } - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - -        title = self._html_search_regex( -            self._html_get_attribute_regex('data-channel'), webpage, 'title') - -        json_data = json.loads( -            self._html_search_regex( -                self._html_get_attribute_regex('data-streams'), webpage, 'data-streams')) - -        return { -            'id': video_id, -            'title': title, -            'ext': json_data['codec'], -            'url': json_data['url'] -        } - -    def _html_get_attribute_regex(self, attribute): -        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) | 
