diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2015-01-04 02:05:26 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2015-01-04 02:05:26 +0100 | 
| commit | b68ff259170711574a1fd779e86bdfea9aaafcf5 (patch) | |
| tree | a30f3511341e3e2d62f392fa7793597b97d6c4e5 | |
| parent | 19b05d886ef0bd23f4a4c6ee90f171c48cb55ace (diff) | |
Add various anime sites (Closes #4554)
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/gogoanime.py | 76 | ||||
| -rw-r--r-- | youtube_dl/extractor/play44.py | 149 | ||||
| -rw-r--r-- | youtube_dl/extractor/soulanime.py | 74 | ||||
| -rw-r--r-- | youtube_dl/extractor/videofun.py | 36 | 
6 files changed, 355 insertions, 0 deletions
| @@ -98,3 +98,4 @@ Will Glynn  Max Reimann  Cédric Luthi  Thijs Vermeir +Joel Leclerc diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9ccd1b32e..c3dc09f75 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -164,6 +164,10 @@ from .globo import GloboIE  from .godtube import GodTubeIE  from .goldenmoustache import GoldenMoustacheIE  from .golem import GolemIE +from .gogoanime import ( +    GoGoAnimeIE, +    GoGoAnimeSearchIE +)  from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE  from .gorillavid import GorillaVidIE @@ -313,6 +317,16 @@ from .phoenix import PhoenixIE  from .photobucket import PhotobucketIE  from .planetaplay import PlanetaPlayIE  from .played import PlayedIE +from .play44 import ( +    Play44IE, +    ByZooIE, +    Video44IE, +    VideoWingIE, +    PlayPandaIE, +    VideoZooIE, +    PlayBBIE, +    EasyVideoIE +)  from .playfm import PlayFMIE  from .playvid import PlayvidIE  from .podomatic import PodomaticIE @@ -373,6 +387,10 @@ from .smotri import (  from .snotr import SnotrIE  from .sockshare import SockshareIE  from .sohu import SohuIE +from .soulanime import ( +    SoulAnimeWatchingIE, +    SoulAnimeSeriesIE +)  from .soundcloud import (      SoundcloudIE,      SoundcloudSetIE, @@ -467,6 +485,7 @@ from .viddler import ViddlerIE  from .videobam import VideoBamIE  from .videodetective import VideoDetectiveIE  from .videolecturesnet import VideoLecturesNetIE +from .videofun import VideoFunIE  from .videofyme import VideofyMeIE  from .videomega import VideoMegaIE  from .videopremium import VideoPremiumIE diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py new file mode 100644 index 000000000..d4f4ecc58 --- /dev/null +++ b/youtube_dl/extractor/gogoanime.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    compat_urllib_parse, +    get_element_by_attribute, +    unescapeHTML +) + + +class GoGoAnimeIE(InfoExtractor): +    IE_NAME = 'gogoanime' +    IE_DESC = 'GoGoAnime' +    _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' + +    _TEST = { +        'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', +        'info_dict': { +            'id': 'mahou-shoujo-madoka-magica-movie-1' +        }, +        'playlist_count': 3 +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        page = self._download_webpage(url, video_id) + +        if 'Oops! Page Not Found</font>' in page: +            raise ExtractorError('Video does not exist', expected=True) + +        content = get_element_by_attribute("class", "postcontent", page) +        vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) +        vids = [ +            unescapeHTML(compat_urllib_parse.unquote(x)) +            for x in vids if not re.search(r".*videofun.*", x)] + +        if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): +            return self.playlist_result([self.url_result(vid) for vid in vids], video_id) + +        title = self._html_search_regex( +            r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') + +        return { +            '_type': 'url', +            'id': video_id, +            'url': vids[0], +            'title': title, +        } + + +class GoGoAnimeSearchIE(InfoExtractor): +    IE_NAME = 'gogoanime:search' +    IE_DESC = 'GoGoAnime Search' + +    _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' +    _TEST = { +        'url': 'http://www.gogoanime.com/?s=bokusatsu', +        'info_dict': { +            'id': 'bokusatsu' +        }, +        'playlist_count': 6 +    } + +    def _real_extract(self, url): +        playlist_id = self._match_id(url) +        webpage = self._download_webpage(url, playlist_id) + +        posts = re.findall( +            r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', +            webpage) + +        return self.playlist_result( +            [self.url_result(p) for p in posts], playlist_id) diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py new file mode 100644 index 000000000..b8696e516 --- /dev/null +++ b/youtube_dl/extractor/play44.py @@ -0,0 +1,149 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse +) + + +class Play44IE(InfoExtractor): +    _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' + +    _TESTS = [{ +        'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', +        'md5': 'e37e99d665f503dd2db952f7c4dba9e6', +        'info_dict': { +            'id': 'mahou-shoujo-madoka-magica-07', +            'ext': 'flv', +            'title': 'mahou-shoujo-madoka-magica-07', +        } +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        page = self._download_webpage(url, video_id) + +        video_url = compat_urllib_parse.unquote(self._html_search_regex( +            r'_url = "(https?://[^"]+?)";', page, 'url')) +        title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') + +        return { +            'id': title, +            'url': video_url, +            'title': title, +        } + + +class ByZooIE(Play44IE): +    _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' + +    _TESTS = [{ +        'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', +        'md5': '455c83dabe2cd9fd74a87612b01fe017', +        'info_dict': { +            'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', +            'ext': 'mp4', +            'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', +        } +    }] + + +class Video44IE(Play44IE): +    _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' + +    _TESTS = [{ +        'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', +        'md5': '43eaec6d0beb10e8d42459b9f108aff3', +        'info_dict': { +            'id': 'chaoshead-12', +            'ext': 'mp4', +            'title': 'chaoshead-12', +        } +    }] + + +class VideoWingIE(Play44IE): +    _VALID_URL = r'''(?x) +        http://[w.]*videowing\.[^/]*/ +        (?: +            .*video=/* +            |embed/ +        ) +        (?P<id>[^&?.]+) +    ''' + +    _TESTS = [{ +        'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', +        'md5': '4ed320e353ed26c742c4f12a9c210b60', +        'info_dict': { +            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', +            'ext': 'mp4', +            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', +        } +    }, { +        'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', +        'md5': '33fdd71581357018c226f95c5cedcfd7', +        'info_dict': { +            'id': 'mahoushoujomadokamagicamovie1part1', +            'ext': 'flv', +            'title': 'mahoushoujomadokamagicamovie1part1', +        } +    }] + + +class PlayPandaIE(Play44IE): +    _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' + +    _TESTS = [{ +        'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', +        'md5': '4ed320e353ed26c742c4f12a9c210b60', +        'info_dict': { +            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', +            'ext': 'mp4', +            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', +            'description': 'boku_wa_tomodachi_ga_sukunai_-_05' +        } +    }] + + +class VideoZooIE(Play44IE): +    _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' + +    _TESTS = [{ +        'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', +        'md5': '4ed320e353ed26c742c4f12a9c210b60', +        'info_dict': { +            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', +            'ext': 'mp4', +            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', +        } +    }] + + +class PlayBBIE(Play44IE): +    _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' + +    _TESTS = [{ +        'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', +        'md5': '4ed320e353ed26c742c4f12a9c210b60', +        'info_dict': { +            'id': 'boku_wa_tomodachi_ga_sukunai_-_05', +            'ext': 'mp4', +            'title': 'boku_wa_tomodachi_ga_sukunai_-_05', +        } +    }] + + +class EasyVideoIE(Play44IE): +    _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' + +    _TESTS = [{ +        'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', +        'md5': '26178b57629b7650106d72b191137176', +        'info_dict': { +            'id': 'bokuwatomodachigasukunai-04', +            'ext': 'mp4', +            'title': 'bokuwatomodachigasukunai-04', +        }, +        'skip': 'Blocked in Germany', +    }] diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..7adb10c03 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoulAnimeWatchingIE(InfoExtractor): +    IE_NAME = "soulanime:watching" +    IE_DESC = "SoulAnime video" +    _TEST = { +        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', +        'md5': '05fae04abf72298098b528e98abf4298', +        'info_dict': { +            'id': 'seirei-tsukai-no-blade-dance-episode-9', +            'ext': 'mp4', +            'title': 'seirei-tsukai-no-blade-dance-episode-9', +            'description': 'seirei-tsukai-no-blade-dance-episode-9' +        } +    } +    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        domain = mobj.group('domain') + +        page = self._download_webpage(url, video_id) + +        video_url_encoded = self._html_search_regex( +            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') +        video_url = "http://www.soul-anime." + domain + video_url_encoded + +        vid = self._request_webpage(video_url, video_id) +        ext = vid.info().gettype().split("/")[1] + +        return { +            'id': video_id, +            'url': video_url, +            'ext': ext, +            'title': video_id, +            'description': video_id +        } + + +class SoulAnimeSeriesIE(InfoExtractor): +    IE_NAME = "soulanime:series" +    IE_DESC = "SoulAnime Series" + +    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + +    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + +    _TEST = { +        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', +        'info_dict': { +            'id': 'black-rock-shooter-tv' +        }, +        'playlist_count': 8 +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        series_id = mobj.group('id') +        domain = mobj.group('domain') + +        pattern = re.compile(self._EPISODE_REGEX) + +        page = self._download_webpage(url, series_id, "Downloading series page") +        mobj = pattern.findall(page) + +        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + +        return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py new file mode 100644 index 000000000..0364b9d32 --- /dev/null +++ b/youtube_dl/extractor/videofun.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse +) + + +class VideoFunIE(InfoExtractor): +    _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' + +    _TEST = { +        'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', +        'md5': 'e37e99d665f503dd2db952f7c4dba9e6', +        'info_dict': { +            'id': 'Mahou-Shoujo-Madoka-Magica-07', +            'ext': 'flv', +            'title': 'Mahou-Shoujo-Madoka-Magica-07', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage( +            url, video_id, 'Downloading video page') + +        video_url_encoded = self._html_search_regex( +            r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') +        video_url = compat_urllib_parse.unquote(video_url_encoded) +        title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') + +        return { +            'id': title, +            'url': video_url, +            'title': title, +        } | 
