diff options
Diffstat (limited to 'youtube_dl/extractor/nrk.py')
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 126 | 
1 files changed, 103 insertions, 23 deletions
| diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 43e8e619f..321ce5ce7 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):  class NRKTVIE(InfoExtractor): -    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' +    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'      _TESTS = [          { @@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a',                  'upload_date': '20140523',                  'duration': 1741.52, -            } +            },          },          {              'url': 'http://tv.nrk.no/program/mdfp15000514', @@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:654c12511f035aed1e42bdf5db3b206a',                  'upload_date': '20140524',                  'duration': 4605.0, -            } +            },          }, +        { +            # single playlist video +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', +            'md5': 'adbd1dbd813edaf532b0a253780719c2', +            'info_dict': { +                'id': 'MSPO40010515-part2', +                'ext': 'flv', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +            }, +            'skip': 'Only works from Norway', +        }, +        { +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', +            'playlist': [ +                { +                    'md5': '9480285eff92d64f06e02a5367970a7a', +                    'info_dict': { +                        'id': 'MSPO40010515-part1', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +                { +                    'md5': 'adbd1dbd813edaf532b0a253780719c2', +                    'info_dict': { +                        'id': 'MSPO40010515-part2', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +            ], +            'info_dict': { +                'id': 'MSPO40010515', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +                'duration': 6947.5199999999995, +            }, +            'skip': 'Only works from Norway', +        }      ] +    def _extract_f4m(self, manifest_url, video_id): +        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') - -        page = self._download_webpage(url, video_id) - -        title = self._html_search_meta('title', page, 'title') -        description = self._html_search_meta('description', page, 'description') -        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) -        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) -        duration = float_or_none( -            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)) +        part_id = mobj.group('part_id') + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_meta( +            'title', webpage, 'title') +        description = self._html_search_meta( +            'description', webpage, 'description') + +        thumbnail = self._html_search_regex( +            r'data-posterimage="([^"]+)"', +            webpage, 'thumbnail', fatal=False) +        upload_date = unified_strdate(self._html_search_meta( +            'rightsfrom', webpage, 'upload date', fatal=False)) +        duration = float_or_none(self._html_search_regex( +            r'data-duration="([^"]+)"', +            webpage, 'duration', fatal=False)) + +        # playlist +        parts = re.findall( +            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage) +        if parts: +            entries = [] +            for current_part_id, stream_url, part_title in parts: +                if part_id and current_part_id != part_id: +                    continue +                video_part_id = '%s-part%s' % (video_id, current_part_id) +                formats = self._extract_f4m(stream_url, video_part_id) +                entries.append({ +                    'id': video_part_id, +                    'title': part_title, +                    'description': description, +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'formats': formats, +                }) +            if part_id: +                if entries: +                    return entries[0] +            else: +                playlist = self.playlist_result(entries, video_id, title, description) +                playlist.update({ +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'duration': duration, +                }) +                return playlist          formats = [] -        f4m_url = re.search(r'data-media="([^"]+)"', page) +        f4m_url = re.search(r'data-media="([^"]+)"', webpage)          if f4m_url: -            formats.append({ -                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', -                'format_id': 'f4m', -                'ext': 'flv', -            }) +            formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) -        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page) +        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)          if m3u8_url: -            formats.append({ -                'url': m3u8_url.group(1), -                'format_id': 'm3u8', -            }) +            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))          self._sort_formats(formats) | 
