diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-07-18 01:45:36 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-07-18 01:45:36 +0600 |
commit | 9441f77faaf078aca2947b7613f61f46b6d0aea2 (patch) | |
tree | 852061855dd2720e89314ff177d920e7a58434b1 | |
parent | 3cc8b4c327585ff4dbb045d17d6f5c160df6fdb8 (diff) |
[bliptv] Use xpath for extraction and fix test
-rw-r--r-- | youtube_dl/extractor/bliptv.py | 51 |
1 files changed, 26 insertions, 25 deletions
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index fb56cd78d..cd06dbcea 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -14,6 +14,8 @@ from ..utils import ( int_or_none, parse_iso8601, unescapeHTML, + xpath_text, + xpath_with_ns, ) @@ -23,10 +25,10 @@ class BlipTVIE(InfoExtractor): _TESTS = [ { 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', - 'md5': 'c6934ad0b6acf2bd920720ec888eb812', + 'md5': '80baf1ec5c3d2019037c1c707d676b9f', 'info_dict': { 'id': '5779306', - 'ext': 'mov', + 'ext': 'm4v', 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', 'timestamp': 1323138843, @@ -128,35 +130,34 @@ class BlipTVIE(InfoExtractor): rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') - def blip(s): - return '{http://blip.tv/dtd/blip/1.0}%s' % s - - def media(s): - return '{http://search.yahoo.com/mrss/}%s' % s - - def itunes(s): - return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s + def _x(p): + return xpath_with_ns(p, { + 'blip': 'http://blip.tv/dtd/blip/1.0', + 'media': 'http://search.yahoo.com/mrss/', + 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', + }) item = rss.find('channel/item') - video_id = item.find(blip('item_id')).text - title = item.find('./title').text - description = clean_html(compat_str(item.find(blip('puredescription')).text)) - timestamp = parse_iso8601(item.find(blip('datestamp')).text) - uploader = item.find(blip('user')).text - uploader_id = item.find(blip('userid')).text - duration = int(item.find(blip('runtime')).text) - media_thumbnail = item.find(media('thumbnail')) - thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text - categories = [category.text for category in item.findall('category')] + video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id + title = xpath_text(item, 'title', 'title', fatal=True) + description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description')) + timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp')) + uploader = xpath_text(item, _x('blip:user'), 'uploader') + uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id') + duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration')) + media_thumbnail = item.find(_x('media:thumbnail')) + thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None + else xpath_text(item, 'image', 'thumbnail')) + categories = [category.text for category in item.findall('category') if category is not None] formats = [] subtitles_urls = {} - media_group = item.find(media('group')) - for media_content in media_group.findall(media('content')): + media_group = item.find(_x('media:group')) + for media_content in media_group.findall(_x('media:content')): url = media_content.get('url') - role = media_content.get(blip('role')) + role = media_content.get(_x('blip:role')) msg = self._download_webpage( url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url', video_id, 'Resolving URL for %s' % role) @@ -175,8 +176,8 @@ class BlipTVIE(InfoExtractor): 'url': real_url, 'format_id': role, 'format_note': media_type, - 'vcodec': media_content.get(blip('vcodec')) or 'none', - 'acodec': media_content.get(blip('acodec')), + 'vcodec': media_content.get(_x('blip:vcodec')) or 'none', + 'acodec': media_content.get(_x('blip:acodec')), 'filesize': media_content.get('filesize'), 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), |