diff options
author | David Triendl <david@triendl.name> | 2014-03-13 14:01:20 +0100 |
---|---|---|
committer | David Triendl <david@triendl.name> | 2014-03-13 14:08:34 +0100 |
commit | c21215b421e32ab3cc9e4672e98f9e50bc6714e1 (patch) | |
tree | 7ff1c753851bbdd515d17d9b49985dadef1c6de3 | |
parent | 98ff9d82d40fef70c2a2bdaecc2a66c9b7506cc8 (diff) |
[br] Allow '/' in URL, allow empty author + broadcastDate fields
* Allow URLs that have a 'subdirectory' before the actual program name, e.g.
'xyz/xyz-episode-1'.
* The author and broadcastDate fields in the XML file may be empty.
* Add test case for the two problems above.
-rw-r--r-- | youtube_dl/extractor/br.py | 63 |
1 files changed, 41 insertions, 22 deletions
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 5fcc1084a..7cc159e20 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -9,21 +9,35 @@ from ..utils import ExtractorError class BRIE(InfoExtractor): IE_DESC = "Bayerischer Rundfunk Mediathek" - _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$" + _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" _BASE_URL = "http://www.br.de" - _TEST = { - "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", - "md5": "c4f83cf0f023ba5875aba0bf46860df2", - "info_dict": { - "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", - "ext": "mp4", - "title": "Feiern und Verzichten", - "description": "Anselm Grün: Feiern und Verzichten", - "uploader": "BR/Birgit Baier", - "upload_date": "20140301" + _TESTS = [ + { + "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", + "md5": "c4f83cf0f023ba5875aba0bf46860df2", + "info_dict": { + "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", + "ext": "mp4", + "title": "Feiern und Verzichten", + "description": "Anselm Grün: Feiern und Verzichten", + "uploader": "BR/Birgit Baier", + "upload_date": "20140301" + } + }, + { + "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", + "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", + "info_dict": { + "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", + "ext": "mp4", + "title": "Über den Pass", + "description": "Die Eroberung der Alpen: Über den Pass", + "uploader": None, + "upload_date": None + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -33,16 +47,21 @@ class BRIE(InfoExtractor): r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") xml = self._download_xml(self._BASE_URL + xml_url, None) - videos = [{ - "id": xml_video.get("externalId"), - "title": xml_video.find("title").text, - "formats": self._extract_formats(xml_video.find("assets")), - "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), - "description": " ".join(xml_video.find("shareTitle").text.splitlines()), - "uploader": xml_video.find("author").text, - "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))), - "webpage_url": xml_video.find("permalink").text, - } for xml_video in xml.findall("video")] + videos = [] + for xml_video in xml.findall("video"): + video = { + "id": xml_video.get("externalId"), + "title": xml_video.find("title").text, + "formats": self._extract_formats(xml_video.find("assets")), + "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), + "description": " ".join(xml_video.find("shareTitle").text.splitlines()), + "webpage_url": xml_video.find("permalink").text + } + if xml_video.find("author").text: + video["uploader"] = xml_video.find("author").text + if xml_video.find("broadcastDate").text: + video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split("."))) + videos.append(video) if len(videos) > 1: self._downloader.report_warning( |