diff options
Diffstat (limited to 'youtube_dl/extractor/gogoanime.py')
-rw-r--r-- | youtube_dl/extractor/gogoanime.py | 76 |
1 files changed, 0 insertions, 76 deletions
diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py deleted file mode 100644 index d4f4ecc58..000000000 --- a/youtube_dl/extractor/gogoanime.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - compat_urllib_parse, - get_element_by_attribute, - unescapeHTML -) - - -class GoGoAnimeIE(InfoExtractor): - IE_NAME = 'gogoanime' - IE_DESC = 'GoGoAnime' - _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' - - _TEST = { - 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-movie-1' - }, - 'playlist_count': 3 - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - if 'Oops! Page Not Found</font>' in page: - raise ExtractorError('Video does not exist', expected=True) - - content = get_element_by_attribute("class", "postcontent", page) - vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) - vids = [ - unescapeHTML(compat_urllib_parse.unquote(x)) - for x in vids if not re.search(r".*videofun.*", x)] - - if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): - return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - - title = self._html_search_regex( - r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') - - return { - '_type': 'url', - 'id': video_id, - 'url': vids[0], - 'title': title, - } - - -class GoGoAnimeSearchIE(InfoExtractor): - IE_NAME = 'gogoanime:search' - IE_DESC = 'GoGoAnime Search' - - _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' - _TEST = { - 'url': 'http://www.gogoanime.com/?s=bokusatsu', - 'info_dict': { - 'id': 'bokusatsu' - }, - 'playlist_count': 6 - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - posts = re.findall( - r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', - webpage) - - return self.playlist_result( - [self.url_result(p) for p in posts], playlist_id) |