diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-09-12 20:44:31 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-09-12 20:44:31 +0600 | 
| commit | 3ebc121293bde30ffafed6731ee8507911b9f952 (patch) | |
| tree | df36153669c4625dbebea947ae1a22c48aa571b5 | |
| parent | 60ed60353b9ca57e8181f0b14d525ce487e673ed (diff) | |
| parent | 376e1ad081abbeb234419c08be121fe81eb9b51d (diff) | |
Merge branch 'ndr' of https://github.com/remitamine/youtube-dl into remitamine-ndr
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/ndr.py | 165 | 
2 files changed, 111 insertions, 56 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2e7272931..44ab7ce3c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -367,6 +367,8 @@ from .nbc import (  from .ndr import (      NDRIE,      NJoyIE, +    NDREmbedIE, +    NJoyEmbedIE,  )  from .ndtv import NDTVIE  from .netzkino import NetzkinoIE diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 79a13958b..87f3edbbe 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -1,77 +1,92 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      ExtractorError,      int_or_none,      qualities, -    parse_duration,  ) -class NDRBaseIE(InfoExtractor): -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +preference = qualities(['xs', 's', 'm','l', 'xl']) -        page = self._download_webpage(url, video_id, 'Downloading page') -        title = self._og_search_title(page).strip() -        description = self._og_search_description(page) -        if description: -            description = description.strip() - -        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None)) -        if not duration: -            duration = parse_duration(self._html_search_regex( -                r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)', -                page, 'duration', default=None)) +class NDRBaseIE(InfoExtractor): +    def extract_video_info(self, playlist, video_id):          formats = [] - -        mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) -        if mp3_url: -            formats.append({ -                'url': mp3_url.group('audio'), -                'format_id': 'mp3', -            }) - -        thumbnail = None - -        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page) -        if video_url: -            thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) -            if thumbnails: -                quality_key = qualities(['xs', 's', 'm', 'l', 'xl']) -                largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1])) -                thumbnail = 'http://www.ndr.de' + largest[0] - -            for format_id in 'lo', 'hi', 'hq': -                formats.append({ -                    'url': '%s.%s.mp4' % (video_url.group('video'), format_id), -                    'format_id': format_id, -                }) - -        if not formats: +        streamType = playlist.get('config').get('streamType') +        if streamType == 'httpVideo': +            for key, f in playlist.items(): +                if key != 'config': +                    src = f['src'] +                    if '.f4m' in src: +                        formats.extend(self._extract_f4m_formats(src, video_id)) +                    elif '.m3u8' in src: +                        formats.extend(self._extract_m3u8_formats(src, video_id, fatal=False)) +                    else: +                        quality = f.get('quality') +                        formats.append({ +                            'url': src, +                            'format_id': quality, +                            'preference': preference(quality), +                        }) +        elif streamType == 'httpAudio': +            for key, f in playlist.items(): +                if key != 'config': +                    formats.append({ +                        'url': f['src'], +                        'format_id': 'mp3', +                        'vcodec': 'none', +                    }) +        else:              raise ExtractorError('No media links available for %s' % video_id) +        self._sort_formats(formats) + +        config = playlist.get('config') + +        title = config['title'] +        duration = int_or_none(config.get('duration')) +        thumbnails = [{ +            'id': thumbnail.get('quality'), +            'url': thumbnail.get('src'), +            'preference': preference(thumbnail.get('quality')) +        } for thumbnail in config.get('poster').values()] +          return {              'id': video_id,              'title': title, -            'description': description, -            'thumbnail': thumbnail, +            'thumbnails': thumbnails,              'duration': duration,              'formats': formats,          } +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        json_data = self._download_json('http://www.ndr.de/%s-ppjson.json' % video_id, video_id, fatal=False) + +        if not json_data: +            webpage = self._download_webpage(url, video_id) +            embed_url = self._html_search_regex(r'<iframe[^>]+id="pp_\w+"[^>]+src="(/.*)"', webpage, 'embed url', None, False) +            if not embed_url: +                embed_url = self._html_search_meta('embedURL', webpage, fatal=False) +            if embed_url: +                if embed_url.startswith('/'): +                    return self.url_result('http://www.ndr.de%s' % embed_url, 'NDREmbed') +                else: +                    return self.url_result(embed_url, 'NDREmbed') +            raise ExtractorError('No media links available for %s' % video_id) + +        return self.extract_video_info(json_data['playlist'], video_id) +  class NDRIE(NDRBaseIE):      IE_NAME = 'ndr'      IE_DESC = 'NDR.de - Mediathek' -    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html' +    _VALID_URL = r'https?://www\.ndr\.de/.+?,(?P<id>\w+)\.html'      _TESTS = [          { @@ -79,10 +94,9 @@ class NDRIE(NDRBaseIE):              'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',              'note': 'Video file',              'info_dict': { -                'id': '25866', +                'id': 'nordmagazin25866',                  'ext': 'mp4',                  'title': 'Kartoffeltage in der Lewitz', -                'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',                  'duration': 166,              },              'skip': '404 Not found', @@ -91,22 +105,20 @@ class NDRIE(NDRBaseIE):              'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',              'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',              'info_dict': { -                'id': '988', +                'id': 'hafengeburtstag988',                  'ext': 'mp4',                  'title': 'Party, Pötte und Parade', -                'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',                  'duration': 3498,              },          },          { -            'url': 'http://www.ndr.de/info/audio51535.html', +            'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',              'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',              'note': 'Audio file',              'info_dict': { -                'id': '51535', +                'id': 'audio51535',                  'ext': 'mp3',                  'title': 'La Valette entgeht der Hinrichtung', -                'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',                  'duration': 884,              }          } @@ -115,16 +127,57 @@ class NDRIE(NDRBaseIE):  class NJoyIE(NDRBaseIE):      IE_NAME = 'N-JOY' -    _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html' +    _VALID_URL = r'https?://www\.n-joy\.de/.+?,(?P<id>\w+)\.html'      _TEST = {          'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',          'md5': 'cb63be60cd6f9dd75218803146d8dc67',          'info_dict': { -            'id': '2480', +            'id': 'comedycontest2480',              'ext': 'mp4',              'title': 'Benaissa beim NDR Comedy Contest', -            'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',              'duration': 654,          }      } + + +class NDREmbedBaseIE(NDRBaseIE): + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        json_data = self._download_json('http://www.ndr.de/%s-ppjson.json' % video_id, video_id, fatal=False) +        if not json_data: +            raise ExtractorError('No media links available for %s' % video_id) +        return self.extract_video_info(json_data['playlist'], video_id) + + +class NDREmbedIE(NDREmbedBaseIE): +    IE_NAME = 'ndr:embed' +    _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[a-z0-9]+)-(?:player|externalPlayer)\.html' + +    _TEST = { +        'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', +        'md5': 'cb63be60cd6f9dd75218803146d8dc67', +        'info_dict': { +            'id': 'ndraktuell28488', +            'ext': 'mp4', +            'title': 'Norddeutschland begrüßt Flüchtlinge', +            'duration': 132, +        } +    } + + +class NJoyEmbedIE(NDREmbedBaseIE): +    IE_NAME = 'N-JOY:embed' +    _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[a-z0-9]+)-(?:player|externalPlayer)\.html' + +    _TEST = { +        'url': 'http://www.n-joy.de/entertainment/film/portraet374-player_image-832d9b79-fa8a-4026-92e2-e0fd99deb2f9_theme-n-joy.html', +        'md5': 'cb63be60cd6f9dd75218803146d8dc67', +        'info_dict': { +            'id': 'portraet374', +            'ext': 'mp4', +            'title': 'Viviane Andereggen - "Schuld um Schuld"', +            'duration': 129, +        } +    }  | 
