diff options
| -rw-r--r-- | youtube_dl/extractor/br.py | 146 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | 
2 files changed, 147 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 2c32b6ae2..9bde7f2d8 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -1,20 +1,23 @@  # coding: utf-8  from __future__ import unicode_literals +import json  import re  from .common import InfoExtractor  from ..utils import ( +    determine_ext,      ExtractorError,      int_or_none,      parse_duration, +    parse_iso8601,      xpath_element,      xpath_text,  )  class BRIE(InfoExtractor): -    IE_DESC = 'Bayerischer Rundfunk Mediathek' +    IE_DESC = 'Bayerischer Rundfunk'      _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'      _TESTS = [ @@ -123,10 +126,10 @@ class BRIE(InfoExtractor):          for asset in assets.findall('asset'):              format_url = xpath_text(asset, ['downloadUrl', 'url'])              asset_type = asset.get('type') -            if asset_type == 'HDS': +            if asset_type.startswith('HDS'):                  formats.extend(self._extract_f4m_formats(                      format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) -            elif asset_type == 'HLS': +            elif asset_type.startswith('HLS'):                  formats.extend(self._extract_m3u8_formats(                      format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))              else: @@ -169,3 +172,140 @@ class BRIE(InfoExtractor):          } for variant in variants.findall('variant') if xpath_text(variant, 'url')]          thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)          return thumbnails + + +class BRMediathekIE(InfoExtractor): +    IE_DESC = 'Bayerischer Rundfunk Mediathek' +    _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' + +    _TESTS = [{ +        'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', +        'md5': 'fdc3d485835966d1622587d08ba632ec', +        'info_dict': { +            'id': 'av:5a1e6a6e8fce6d001871cc8e', +            'ext': 'mp4', +            'title': 'Die Sendung vom 28.11.2017', +            'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', +            'timestamp': 1511942766, +            'upload_date': '20171129', +        } +    }] + +    def _real_extract(self, url): +        clip_id = self._match_id(url) + +        clip = self._download_json( +            'https://proxy-base.master.mango.express/graphql', +            clip_id, data=json.dumps({ +                "query": """{ +  viewer { +    clip(id: "%s") { +      title +      description +      duration +      createdAt +      ageRestriction +      videoFiles { +        edges { +          node { +            publicLocation +            fileSize +            videoProfile { +              width +              height +              bitrate +              encoding +            } +          } +        } +      } +      captionFiles { +        edges { +          node { +            publicLocation +          } +        } +      } +      teaserImages { +        edges { +          node { +            imageFiles { +              edges { +                node { +                  publicLocation +                  width +                  height +                } +              } +            } +          } +        } +      } +    } +  } +}""" % clip_id}).encode(), headers={ +                'Content-Type': 'application/json', +            })['data']['viewer']['clip'] +        title = clip['title'] + +        formats = [] +        for edge in clip.get('videoFiles', {}).get('edges', []): +            node = edge.get('node', {}) +            n_url = node.get('publicLocation') +            if not n_url: +                continue +            ext = determine_ext(n_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    n_url, clip_id, 'mp4', 'm3u8_native', +                    m3u8_id='hls', fatal=False)) +            else: +                video_profile = node.get('videoProfile', {}) +                tbr = int_or_none(video_profile.get('bitrate')) +                format_id = 'http' +                if tbr: +                    format_id += '-%d' % tbr +                formats.append({ +                    'format_id': format_id, +                    'url': n_url, +                    'width': int_or_none(video_profile.get('width')), +                    'height': int_or_none(video_profile.get('height')), +                    'tbr': tbr, +                    'filesize': int_or_none(node.get('fileSize')), +                }) +        self._sort_formats(formats) + +        subtitles = {} +        for edge in clip.get('captionFiles', {}).get('edges', []): +            node = edge.get('node', {}) +            n_url = node.get('publicLocation') +            if not n_url: +                continue +            subtitles.setdefault('de', []).append({ +                'url': n_url, +            }) + +        thumbnails = [] +        for edge in clip.get('teaserImages', {}).get('edges', []): +            for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): +                node = image_edge.get('node', {}) +                n_url = node.get('publicLocation') +                if not n_url: +                    continue +                thumbnails.append({ +                    'url': n_url, +                    'width': int_or_none(node.get('width')), +                    'height': int_or_none(node.get('height')), +                }) + +        return { +            'id': clip_id, +            'title': title, +            'description': clip.get('description'), +            'duration': int_or_none(clip.get('duration')), +            'timestamp': parse_iso8601(clip.get('createdAt')), +            'age_limit': int_or_none(clip.get('ageRestriction')), +            'formats': formats, +            'subtitles': subtitles, +            'thumbnails': thumbnails, +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6db1297f1..2cc3bc463 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -127,7 +127,10 @@ from .bloomberg import BloombergIE  from .bokecc import BokeCCIE  from .bostonglobe import BostonGlobeIE  from .bpb import BpbIE -from .br import BRIE +from .br import ( +    BRIE, +    BRMediathekIE, +)  from .bravotv import BravoTVIE  from .breakcom import BreakIE  from .brightcove import ( | 
