diff options
Diffstat (limited to 'youtube_dl/extractor/appletrailers.py')
| -rw-r--r-- | youtube_dl/extractor/appletrailers.py | 86 | 
1 files changed, 84 insertions, 2 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index f68dc3236..62ed0c918 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -11,6 +11,7 @@ from ..utils import (  class AppleTrailersIE(InfoExtractor): +    IE_NAME = 'appletrailers'      _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'      _TESTS = [{          'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', @@ -64,6 +65,12 @@ class AppleTrailersIE(InfoExtractor):              },          ]      }, { +        'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', +        'info_dict': { +            'id': 'blackthorn', +        }, +        'playlist_mincount': 2, +    }, {          'url': 'http://trailers.apple.com/ca/metropole/autrui/',          'only_matching': True,      }] @@ -79,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):          def fix_html(s):              s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) -            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) +            s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)              # The ' in the onClick attributes are not escaped, it couldn't be parsed              # like: http://trailers.apple.com/trailers/wb/gravity/ @@ -96,6 +103,9 @@ class AppleTrailersIE(InfoExtractor):              trailer_info_json = self._search_regex(self._JSON_RE,                                                     on_click, 'trailer info')              trailer_info = json.loads(trailer_info_json) +            first_url = trailer_info.get('url') +            if not first_url: +                continue              title = trailer_info['title']              video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()              thumbnail = li.find('.//img').attrib['src'] @@ -107,7 +117,6 @@ class AppleTrailersIE(InfoExtractor):              if m:                  duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) -            first_url = trailer_info['url']              trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()              settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)              settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') @@ -144,3 +153,76 @@ class AppleTrailersIE(InfoExtractor):              'id': movie,              'entries': playlist,          } + + +class AppleTrailersSectionIE(InfoExtractor): +    IE_NAME = 'appletrailers:section' +    _SECTIONS = { +        'justadded': { +            'feed_path': 'just_added', +            'title': 'Just Added', +        }, +        'exclusive': { +            'feed_path': 'exclusive', +            'title': 'Exclusive', +        }, +        'justhd': { +            'feed_path': 'just_hd', +            'title': 'Just HD', +        }, +        'mostpopular': { +            'feed_path': 'most_pop', +            'title': 'Most Popular', +        }, +        'moviestudios': { +            'feed_path': 'studios', +            'title': 'Movie Studios', +        }, +    } +    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) +    _TESTS = [{ +        'url': 'http://trailers.apple.com/#section=justadded', +        'info_dict': { +            'title': 'Just Added', +            'id': 'justadded', +        }, +        'playlist_mincount': 80, +    }, { +        'url': 'http://trailers.apple.com/#section=exclusive', +        'info_dict': { +            'title': 'Exclusive', +            'id': 'exclusive', +        }, +        'playlist_mincount': 80, +    }, { +        'url': 'http://trailers.apple.com/#section=justhd', +        'info_dict': { +            'title': 'Just HD', +            'id': 'justhd', +        }, +        'playlist_mincount': 80, +    }, { +        'url': 'http://trailers.apple.com/#section=mostpopular', +        'info_dict': { +            'title': 'Most Popular', +            'id': 'mostpopular', +        }, +        'playlist_mincount': 80, +    }, { +        'url': 'http://trailers.apple.com/#section=moviestudios', +        'info_dict': { +            'title': 'Movie Studios', +            'id': 'moviestudios', +        }, +        'playlist_mincount': 80, +    }] + +    def _real_extract(self, url): +        section = self._match_id(url) +        section_data = self._download_json( +            'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], +            section) +        entries = [ +            self.url_result('http://trailers.apple.com' + e['location']) +            for e in section_data] +        return self.playlist_result(entries, section, self._SECTIONS[section]['title'])  | 
