aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-04 02:05:26 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-04 02:05:26 +0100
commitb68ff259170711574a1fd779e86bdfea9aaafcf5 (patch)
treea30f3511341e3e2d62f392fa7793597b97d6c4e5
parent19b05d886ef0bd23f4a4c6ee90f171c48cb55ace (diff)
Add various anime sites (Closes #4554)
-rw-r--r--AUTHORS1
-rw-r--r--youtube_dl/extractor/__init__.py19
-rw-r--r--youtube_dl/extractor/gogoanime.py76
-rw-r--r--youtube_dl/extractor/play44.py149
-rw-r--r--youtube_dl/extractor/soulanime.py74
-rw-r--r--youtube_dl/extractor/videofun.py36
6 files changed, 355 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
index 37306ac83..9b548cf25 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -98,3 +98,4 @@ Will Glynn
Max Reimann
Cédric Luthi
Thijs Vermeir
+Joel Leclerc
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 9ccd1b32e..c3dc09f75 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -164,6 +164,10 @@ from .globo import GloboIE
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
+from .gogoanime import (
+ GoGoAnimeIE,
+ GoGoAnimeSearchIE
+)
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
@@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
from .played import PlayedIE
+from .play44 import (
+ Play44IE,
+ ByZooIE,
+ Video44IE,
+ VideoWingIE,
+ PlayPandaIE,
+ VideoZooIE,
+ PlayBBIE,
+ EasyVideoIE
+)
from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
@@ -373,6 +387,10 @@ from .smotri import (
from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE
+from .soulanime import (
+ SoulAnimeWatchingIE,
+ SoulAnimeSeriesIE
+)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -467,6 +485,7 @@ from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
+from .videofun import VideoFunIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE
diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py
new file mode 100644
index 000000000..d4f4ecc58
--- /dev/null
+++ b/youtube_dl/extractor/gogoanime.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_parse,
+ get_element_by_attribute,
+ unescapeHTML
+)
+
+
+class GoGoAnimeIE(InfoExtractor):
+ IE_NAME = 'gogoanime'
+ IE_DESC = 'GoGoAnime'
+ _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
+
+ _TEST = {
+ 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
+ 'info_dict': {
+ 'id': 'mahou-shoujo-madoka-magica-movie-1'
+ },
+ 'playlist_count': 3
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id)
+
+ if 'Oops! Page Not Found</font>' in page:
+ raise ExtractorError('Video does not exist', expected=True)
+
+ content = get_element_by_attribute("class", "postcontent", page)
+ vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
+ vids = [
+ unescapeHTML(compat_urllib_parse.unquote(x))
+ for x in vids if not re.search(r".*videofun.*", x)]
+
+ if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
+ return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
+
+ title = self._html_search_regex(
+ r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
+
+ return {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': vids[0],
+ 'title': title,
+ }
+
+
+class GoGoAnimeSearchIE(InfoExtractor):
+ IE_NAME = 'gogoanime:search'
+ IE_DESC = 'GoGoAnime Search'
+
+ _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
+ _TEST = {
+ 'url': 'http://www.gogoanime.com/?s=bokusatsu',
+ 'info_dict': {
+ 'id': 'bokusatsu'
+ },
+ 'playlist_count': 6
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ posts = re.findall(
+ r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
+ webpage)
+
+ return self.playlist_result(
+ [self.url_result(p) for p in posts], playlist_id)
diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py
new file mode 100644
index 000000000..b8696e516
--- /dev/null
+++ b/youtube_dl/extractor/play44.py
@@ -0,0 +1,149 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse
+)
+
+
+class Play44IE(InfoExtractor):
+ _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
+
+ _TESTS = [{
+ 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
+ 'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+ 'info_dict': {
+ 'id': 'mahou-shoujo-madoka-magica-07',
+ 'ext': 'flv',
+ 'title': 'mahou-shoujo-madoka-magica-07',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id)
+
+ video_url = compat_urllib_parse.unquote(self._html_search_regex(
+ r'_url = "(https?://[^"]+?)";', page, 'url'))
+ title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
+
+ return {
+ 'id': title,
+ 'url': video_url,
+ 'title': title,
+ }
+
+
+class ByZooIE(Play44IE):
+ _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
+
+ _TESTS = [{
+ 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
+ 'md5': '455c83dabe2cd9fd74a87612b01fe017',
+ 'info_dict': {
+ 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+ 'ext': 'mp4',
+ 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+ }
+ }]
+
+
+class Video44IE(Play44IE):
+ _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
+
+ _TESTS = [{
+ 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
+ 'md5': '43eaec6d0beb10e8d42459b9f108aff3',
+ 'info_dict': {
+ 'id': 'chaoshead-12',
+ 'ext': 'mp4',
+ 'title': 'chaoshead-12',
+ }
+ }]
+
+
+class VideoWingIE(Play44IE):
+ _VALID_URL = r'''(?x)
+ http://[w.]*videowing\.[^/]*/
+ (?:
+ .*video=/*
+ |embed/
+ )
+ (?P<id>[^&?.]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+ 'md5': '4ed320e353ed26c742c4f12a9c210b60',
+ 'info_dict': {
+ 'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ 'ext': 'mp4',
+ 'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ }
+ }, {
+ 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
+ 'md5': '33fdd71581357018c226f95c5cedcfd7',
+ 'info_dict': {
+ 'id': 'mahoushoujomadokamagicamovie1part1',
+ 'ext': 'flv',
+ 'title': 'mahoushoujomadokamagicamovie1part1',
+ }
+ }]
+
+
+class PlayPandaIE(Play44IE):
+ _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+ _TESTS = [{
+ 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+ 'md5': '4ed320e353ed26c742c4f12a9c210b60',
+ 'info_dict': {
+ 'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ 'ext': 'mp4',
+ 'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ 'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
+ }
+ }]
+
+
+class VideoZooIE(Play44IE):
+ _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+ _TESTS = [{
+ 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+ 'md5': '4ed320e353ed26c742c4f12a9c210b60',
+ 'info_dict': {
+ 'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ 'ext': 'mp4',
+ 'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ }
+ }]
+
+
+class PlayBBIE(Play44IE):
+ _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+ _TESTS = [{
+ 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+ 'md5': '4ed320e353ed26c742c4f12a9c210b60',
+ 'info_dict': {
+ 'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ 'ext': 'mp4',
+ 'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+ }
+ }]
+
+
+class EasyVideoIE(Play44IE):
+ _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
+
+ _TESTS = [{
+ 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
+ 'md5': '26178b57629b7650106d72b191137176',
+ 'info_dict': {
+ 'id': 'bokuwatomodachigasukunai-04',
+ 'ext': 'mp4',
+ 'title': 'bokuwatomodachigasukunai-04',
+ },
+ 'skip': 'Blocked in Germany',
+ }]
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
new file mode 100644
index 000000000..7adb10c03
--- /dev/null
+++ b/youtube_dl/extractor/soulanime.py
@@ -0,0 +1,74 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+ IE_NAME = "soulanime:watching"
+ IE_DESC = "SoulAnime video"
+ _TEST = {
+ 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+ 'md5': '05fae04abf72298098b528e98abf4298',
+ 'info_dict': {
+ 'id': 'seirei-tsukai-no-blade-dance-episode-9',
+ 'ext': 'mp4',
+ 'title': 'seirei-tsukai-no-blade-dance-episode-9',
+ 'description': 'seirei-tsukai-no-blade-dance-episode-9'
+ }
+ }
+ _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ domain = mobj.group('domain')
+
+ page = self._download_webpage(url, video_id)
+
+ video_url_encoded = self._html_search_regex(
+ r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+ video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+ vid = self._request_webpage(video_url, video_id)
+ ext = vid.info().gettype().split("/")[1]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'title': video_id,
+ 'description': video_id
+ }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+ IE_NAME = "soulanime:series"
+ IE_DESC = "SoulAnime Series"
+
+ _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+ _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+ _TEST = {
+ 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+ 'info_dict': {
+ 'id': 'black-rock-shooter-tv'
+ },
+ 'playlist_count': 8
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ series_id = mobj.group('id')
+ domain = mobj.group('domain')
+
+ pattern = re.compile(self._EPISODE_REGEX)
+
+ page = self._download_webpage(url, series_id, "Downloading series page")
+ mobj = pattern.findall(page)
+
+ entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+ return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py
new file mode 100644
index 000000000..0364b9d32
--- /dev/null
+++ b/youtube_dl/extractor/videofun.py
@@ -0,0 +1,36 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse
+)
+
+
+class VideoFunIE(InfoExtractor):
+ _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
+
+ _TEST = {
+ 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
+ 'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+ 'info_dict': {
+ 'id': 'Mahou-Shoujo-Madoka-Magica-07',
+ 'ext': 'flv',
+ 'title': 'Mahou-Shoujo-Madoka-Magica-07',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page')
+
+ video_url_encoded = self._html_search_regex(
+ r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
+ video_url = compat_urllib_parse.unquote(video_url_encoded)
+ title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
+
+ return {
+ 'id': title,
+ 'url': video_url,
+ 'title': title,
+ }