diff options
Diffstat (limited to 'youtube_dl/extractor/appletrailers.py')
| -rw-r--r-- | youtube_dl/extractor/appletrailers.py | 11 | 
1 files changed, 7 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index dc8657b67..0c01fa1a1 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -6,6 +6,7 @@ import json  from .common import InfoExtractor  from ..utils import (      compat_urlparse, +    int_or_none,  ) @@ -69,15 +70,17 @@ class AppleTrailersIE(InfoExtractor):          uploader_id = mobj.group('company')          playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') +          def fix_html(s):              s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)              s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)              # The ' in the onClick attributes are not escaped, it couldn't be parsed              # like: http://trailers.apple.com/trailers/wb/gravity/ +              def _clean_json(m):                  return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')              s = re.sub(self._JSON_RE, _clean_json, s) -            s = '<html>' + s + u'</html>' +            s = '<html>%s</html>' % s              return s          doc = self._download_xml(playlist_url, movie, transform_source=fix_html) @@ -85,7 +88,7 @@ class AppleTrailersIE(InfoExtractor):          for li in doc.findall('./div/ul/li'):              on_click = li.find('.//a').attrib['onClick']              trailer_info_json = self._search_regex(self._JSON_RE, -                on_click, 'trailer info') +                                                   on_click, 'trailer info')              trailer_info = json.loads(trailer_info_json)              title = trailer_info['title']              video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() @@ -110,8 +113,8 @@ class AppleTrailersIE(InfoExtractor):                  formats.append({                      'url': format_url,                      'format': format['type'], -                    'width': format['width'], -                    'height': int(format['height']), +                    'width': int_or_none(format['width']), +                    'height': int_or_none(format['height']),                  })              self._sort_formats(formats) | 
