diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-09-22 12:53:41 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-09-22 12:53:41 +0200 | 
| commit | 45c85d7ba1dbca09c7ded9130fa5670b302e099b (patch) | |
| tree | 8352b3afcdb8f4599257533216c52b678b0b0d4a | |
| parent | df8f53f752c0f01577dcc5d63c6d9a81d924770b (diff) | |
| parent | d0df92928bc099775e18f6413e387713839012ba (diff) | |
Merge remote-tracking branch 'origin/master'
| -rw-r--r-- | youtube_dl/downloader/f4m.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/npo.py | 30 | ||||
| -rw-r--r-- | youtube_dl/extractor/sbs.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/theplatform.py | 53 | 
6 files changed, 73 insertions, 26 deletions
| diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 71353f607..b3be16ff1 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -16,6 +16,7 @@ from ..utils import (      format_bytes,      encodeFilename,      sanitize_open, +    xpath_text,  ) @@ -251,6 +252,8 @@ class F4mFD(FileDownloader):              # We only download the first fragment              fragments_list = fragments_list[:1]          total_frags = len(fragments_list) +        # For some akamai manifests we'll need to add a query to the fragment url +        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))          tmpfilename = self.temp_name(filename)          (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') @@ -290,6 +293,8 @@ class F4mFD(FileDownloader):          for (seg_i, frag_i) in fragments_list:              name = 'Seg%d-Frag%d' % (seg_i, frag_i)              url = base_url + name +            if akamai_pv: +                url += '?' + akamai_pv.strip(';')              frag_filename = '%s-%s' % (tmpfilename, name)              success = http_dl.download(frag_filename, {'url': url})              if not success: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8a5eb8cf1..244d22297 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE  from .novamov import NovaMovIE  from .nowness import NownessIE  from .nowvideo import NowVideoIE -from .npo import NPOIE +from .npo import ( +    NPOIE, +    TegenlichtVproIE, +)  from .nrk import (      NRKIE,      NRKTVIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2e4acbad..e75ab7c39 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):      _TEST = {          'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', -        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', +        # md5 checksum is not stable          'info_dict': { -            'id': 'u1RInQZRN7QJ', +            'id': 'bTmnLCvIbaaH',              'ext': 'flv',              'title': 'I Am a Firefighter',              'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7a154e94a..f36d446d2 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -7,6 +7,7 @@ from ..utils import (      unified_strdate,      parse_duration,      qualities, +    url_basename,  ) @@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') +        return self._get_info(video_id) +    def _get_info(self, video_id):          metadata = self._download_json(              'http://e.omroep.nl/metadata/aflevering/%s' % video_id,              video_id, @@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):              'duration': parse_duration(metadata.get('tijdsduur')),              'formats': formats,          } + + +class TegenlichtVproIE(NPOIE): +    IE_NAME = 'tegenlicht.vpro.nl' +    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' + +    _TESTS = [ +        { +            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', +            'md5': 'f8065e4e5a7824068ed3c7e783178f2c', +            'info_dict': { +                'id': 'VPWON_1169289', +                'ext': 'm4v', +                'title': 'Tegenlicht', +                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', +                'upload_date': '20130225', +            }, +        }, +    ] + +    def _real_extract(self, url): +        name = url_basename(url) +        webpage = self._download_webpage(url, name) +        urn = self._html_search_meta('mediaurn', webpage) +        info_page = self._download_json( +            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) +        return self._get_info(info_page['mid']) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 34058fd4b..214990e7a 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):          'md5': '3150cf278965eeabb5b4cea1c963fe0a',          'info_dict': {              'id': '320403011771', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Dingo Conservation',              'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',              'thumbnail': 're:http://.*\.jpg', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index b6b2dba9c..0be793b1c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -5,6 +5,7 @@ import json  from .common import InfoExtractor  from ..utils import ( +    compat_str,      ExtractorError,      xpath_with_ns,  ) @@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):          body = meta.find(_x('smil:body'))          f4m_node = body.find(_x('smil:seq//smil:video')) -        if f4m_node is not None: +        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:              f4m_url = f4m_node.attrib['src']              if 'manifest.f4m?' not in f4m_url:                  f4m_url += '?'              # the parameters are from syfy.com, other sites may use others,              # they also work for nbc.com              f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' -            formats = [{ -                'ext': 'flv', -                'url': f4m_url, -            }] +            formats = self._extract_f4m_formats(f4m_url, video_id)          else: -            base_url = head.find(_x('smil:meta')).attrib['base'] -            switch = body.find(_x('smil:switch'))              formats = [] -            for f in switch.findall(_x('smil:video')): -                attr = f.attrib -                width = int(attr['width']) -                height = int(attr['height']) -                vbr = int(attr['system-bitrate']) // 1000 -                format_id = '%dx%d_%dk' % (width, height, vbr) -                formats.append({ -                    'format_id': format_id, -                    'url': base_url, -                    'play_path': 'mp4:' + attr['src'], -                    'ext': 'flv', -                    'width': width, -                    'height': height, -                    'vbr': vbr, -                }) +            switch = body.find(_x('smil:switch')) +            if switch is not None: +                base_url = head.find(_x('smil:meta')).attrib['base'] +                for f in switch.findall(_x('smil:video')): +                    attr = f.attrib +                    width = int(attr['width']) +                    height = int(attr['height']) +                    vbr = int(attr['system-bitrate']) // 1000 +                    format_id = '%dx%d_%dk' % (width, height, vbr) +                    formats.append({ +                        'format_id': format_id, +                        'url': base_url, +                        'play_path': 'mp4:' + attr['src'], +                        'ext': 'flv', +                        'width': width, +                        'height': height, +                        'vbr': vbr, +                    }) +            else: +                switch = body.find(_x('smil:seq//smil:switch')) +                for f in switch.findall(_x('smil:video')): +                    attr = f.attrib +                    vbr = int(attr['system-bitrate']) // 1000 +                    formats.append({ +                        'format_id': compat_str(vbr), +                        'url': attr['src'], +                        'vbr': vbr, +                    })              self._sort_formats(formats)          return { | 
