diff options
Diffstat (limited to 'youtube_dl/extractor/appletrailers.py')
-rw-r--r-- | youtube_dl/extractor/appletrailers.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 4359b88d1..7cd0482c7 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,8 +4,8 @@ import re import json from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, int_or_none, ) @@ -70,15 +70,17 @@ class AppleTrailersIE(InfoExtractor): uploader_id = mobj.group('company') playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + def fix_html(s): s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) # The ' in the onClick attributes are not escaped, it couldn't be parsed # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') s = re.sub(self._JSON_RE, _clean_json, s) - s = '<html>' + s + u'</html>' + s = '<html>%s</html>' % s return s doc = self._download_xml(playlist_url, movie, transform_source=fix_html) @@ -86,7 +88,7 @@ class AppleTrailersIE(InfoExtractor): for li in doc.findall('./div/ul/li'): on_click = li.find('.//a').attrib['onClick'] trailer_info_json = self._search_regex(self._JSON_RE, - on_click, 'trailer info') + on_click, 'trailer info') trailer_info = json.loads(trailer_info_json) title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() |