diff options
Diffstat (limited to 'youtube_dl/extractor/br.py')
| -rw-r--r-- | youtube_dl/extractor/br.py | 123 | 
1 files changed, 60 insertions, 63 deletions
| diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 41e16f6cb..2256f47bb 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -1,82 +1,79 @@  # coding: utf-8 +from __future__ import unicode_literals + +import re  from .common import InfoExtractor -class BRIE(InfoExtractor): -    IE_DESC = u"Bayerischer Rundfunk Mediathek" -    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-]+\.html)$" -    _BASE_URL = u"http://www.br.de" +class BRIE(InfoExtractor): +    IE_DESC = "Bayerischer Rundfunk Mediathek" +    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$" +    _BASE_URL = "http://www.br.de" -    _TESTS = [ -        { -            u"url": u"http://www.br.de/mediathek/video/anselm-gruen-114.html", -            u"file": u"2c8d81c5-6fb7-4a74-88d4-e768e5856532.mp4", -            u"md5": u"c4f83cf0f023ba5875aba0bf46860df2", -            u"info_dict": { -                u"title": u"Feiern und Verzichten", -                u"description": u"Anselm Grün: Feiern und Verzichten", -                u"uploader": u"BR/Birgit Baier", -                u"upload_date": u"20140301" -            } +    _TEST = { +        "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", +        "md5": "c4f83cf0f023ba5875aba0bf46860df2", +        "info_dict": { +            "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", +            "ext": "mp4", +            "title": "Feiern und Verzichten", +            "description": "Anselm Grün: Feiern und Verzichten", +            "uploader": "BR/Birgit Baier", +            "upload_date": "20140301"          } -    ] +    }      def _real_extract(self, url): -        page = self._download_webpage(url, None) -        xml_url = self._search_regex(r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") +        mobj = re.match(self._VALID_URL, url) +        display_id = mobj.group('id') +        page = self._download_webpage(url, display_id) +        xml_url = self._search_regex( +            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")          xml = self._download_xml(self._BASE_URL + xml_url, None) -        videos = [] -        for xml_video in xml.findall("video"): -            video = {} -            video["id"] = xml_video.get("externalId") -            video["title"] = xml_video.find("title").text -            video["formats"] = self._extract_formats(xml_video.find("assets")) -            video["thumbnails"] = self._extract_thumbnails(xml_video.find("teaserImage/variants")) -            video["thumbnail"] = video["thumbnails"][0]["url"] -            video["description"] = " ".join(xml_video.find("shareTitle").text.splitlines()) -            video["uploader"] = xml_video.find("author").text -            video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split("."))) -            video["webpage_url"] = xml_video.find("permalink").text -            videos.append(video) +        videos = [{ +            "id": xml_video.get("externalId"), +            "title": xml_video.find("title").text, +            "formats": self._extract_formats(xml_video.find("assets")), +            "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), +            "description": " ".join(xml_video.find("shareTitle").text.splitlines()), +            "uploader": xml_video.find("author").text, +            "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))), +            "webpage_url": xml_video.find("permalink").text, +        } for xml_video in xml.findall("video")]          if len(videos) > 1: -            self._downloader.report_warning(u'found multiple videos; please' -                u'report this with the video URL to http://yt-dl.org/bug') +            self._downloader.report_warning( +                'found multiple videos; please ' +                'report this with the video URL to http://yt-dl.org/bug') +        if not videos: +            raise ExtractorError('No video entries found')          return videos[0]      def _extract_formats(self, assets): -        vformats = [] -        for asset in assets.findall("asset"): -            if asset.find("downloadUrl") is None: -                continue -            vformat = {} -            vformat["url"] = asset.find("downloadUrl").text -            vformat["ext"] = asset.find("mediaType").text -            vformat["format_id"] = asset.get("type") -            vformat["width"] = int(asset.find("frameWidth").text) -            vformat["height"] = int(asset.find("frameHeight").text) -            vformat["resolution"] = "%ix%i" % (vformat["width"], vformat["height"]) -            vformat["tbr"] = int(asset.find("bitrateVideo").text) -            vformat["abr"] = int(asset.find("bitrateAudio").text) -            vformat["vcodec"] = asset.find("codecVideo").text -            vformat["container"] = vformat["ext"] -            vformat["filesize"] = int(asset.find("size").text) -            vformat["preference"] = vformat["quality"] = -1 -            vformat["format"] = "%s container with %i Kbps %s" % (vformat["container"], vformat["tbr"], vformat["vcodec"]) -            vformats.append(vformat) -        self._sort_formats(vformats) -        return vformats +        formats = [{ +            "url": asset.find("downloadUrl").text, +            "ext": asset.find("mediaType").text, +            "format_id": asset.get("type"), +            "width": int(asset.find("frameWidth").text), +            "height": int(asset.find("frameHeight").text), +            "tbr": int(asset.find("bitrateVideo").text), +            "abr": int(asset.find("bitrateAudio").text), +            "vcodec": asset.find("codecVideo").text, +            "container": asset.find("mediaType").text, +            "filesize": int(asset.find("size").text), +        } for asset in assets.findall("asset") +            if asset.find("downloadUrl") is not None] + +        self._sort_formats(formats) +        return formats      def _extract_thumbnails(self, variants): -        thumbnails = [] -        for variant in variants.findall("variant"): -            thumbnail = {} -            thumbnail["url"] = self._BASE_URL + variant.find("url").text -            thumbnail["width"] = int(variant.find("width").text) -            thumbnail["height"] = int(variant.find("height").text) -            thumbnail["resolution"] = "%ix%i" % (thumbnail["width"], thumbnail["height"]) -            thumbnails.append(thumbnail) -        thumbnails.sort(key = lambda x: x["width"] * x["height"], reverse=True) +        thumbnails = [{ +            "url": self._BASE_URL + variant.find("url").text, +            "width": int(variant.find("width").text), +            "height": int(variant.find("height").text), +        } for variant in variants.findall("variant")] +        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)          return thumbnails | 
