aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/gogoanime.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/gogoanime.py')
-rw-r--r--youtube_dl/extractor/gogoanime.py76
1 files changed, 76 insertions, 0 deletions
diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py
new file mode 100644
index 000000000..d4f4ecc58
--- /dev/null
+++ b/youtube_dl/extractor/gogoanime.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_parse,
+ get_element_by_attribute,
+ unescapeHTML
+)
+
+
+class GoGoAnimeIE(InfoExtractor):
+ IE_NAME = 'gogoanime'
+ IE_DESC = 'GoGoAnime'
+ _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
+
+ _TEST = {
+ 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
+ 'info_dict': {
+ 'id': 'mahou-shoujo-madoka-magica-movie-1'
+ },
+ 'playlist_count': 3
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id)
+
+ if 'Oops! Page Not Found</font>' in page:
+ raise ExtractorError('Video does not exist', expected=True)
+
+ content = get_element_by_attribute("class", "postcontent", page)
+ vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
+ vids = [
+ unescapeHTML(compat_urllib_parse.unquote(x))
+ for x in vids if not re.search(r".*videofun.*", x)]
+
+ if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
+ return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
+
+ title = self._html_search_regex(
+ r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
+
+ return {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': vids[0],
+ 'title': title,
+ }
+
+
+class GoGoAnimeSearchIE(InfoExtractor):
+ IE_NAME = 'gogoanime:search'
+ IE_DESC = 'GoGoAnime Search'
+
+ _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
+ _TEST = {
+ 'url': 'http://www.gogoanime.com/?s=bokusatsu',
+ 'info_dict': {
+ 'id': 'bokusatsu'
+ },
+ 'playlist_count': 6
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ posts = re.findall(
+ r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
+ webpage)
+
+ return self.playlist_result(
+ [self.url_result(p) for p in posts], playlist_id)