diff options
| -rw-r--r-- | test/test_playlists.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/toypics.py | 77 | 
3 files changed, 59 insertions, 29 deletions
| diff --git a/test/test_playlists.py b/test/test_playlists.py index fbeed1c8c..4c9c34057 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -37,6 +37,7 @@ from youtube_dl.extractor import (      GoogleSearchIE,      GenericIE,      TEDIE, +    ToypicsUserIE,  ) @@ -269,5 +270,13 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], 'Who are the hackers?')          self.assertTrue(len(result['entries']) >= 6) +    def test_toypics_user(self): +        dl = FakeYDL() +        ie = ToypicsUserIE(dl) +        result = ie.extract('http://videos.toypics.net/Mikey') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], 'Mikey') +        self.assertTrue(len(result['entries']) >= 17) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5ca6eb16c..b8c843515 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -239,7 +239,7 @@ from .theplatform import ThePlatformIE  from .thisav import ThisAVIE  from .tinypic import TinyPicIE  from .toutv import TouTvIE -from .toypics import ToypicsIE +from .toypics import ToypicsUserIE, ToypicsIE  from .traileraddict import TrailerAddictIE  from .trilulilu import TriluliluIE  from .trutube import TruTubeIE diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 3cbfe2e7e..33a6988ca 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -2,43 +2,26 @@ from .common import InfoExtractor  from math import ceil  import re +  class ToypicsIE(InfoExtractor): -    _VALID_URL = r'(?:http://)?videos\.toypics\.net/.*' +    IE_DESC = 'Toypics user profile' +    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'      _TEST = {          'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', -        #'md5': '8a8b546956bbd0e769dbe28f6e80abb3', == $head -c10K 12929646011616163504.mp4 |md5sum //no idea why it fails +        'md5': '16e806ad6d6f58079d210fe30985e08b',          'info_dict': {              'id': '514',              'ext': 'mp4',              'title': 'Chance-Bulge\'d, 2', -            'age_limit': 18 +            'age_limit': 18, +            'uploader': 'kidsune',          }      } -    PAGINATED=8      def _real_extract(self, url): -        mobj = re.match(r'(http://)?videos\.toypics\.net/(?P<username>[^/?]+)$', url) -        if not mobj: -            return self.extract_one(url) -        return [self.extract_one(u) for u in self.process_paginated(url, -            r'public/">Public Videos \((?P<videos_count>[0-9]+)\)</a></li>', -            r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">' -        )] - -    def process_paginated(self, profile_url, re_total, re_video_page): -        profile_page = self._download_webpage(profile_url, 'profile' , 'getting profile page: '+profile_url) -        videos_count = self._html_search_regex(re_total, profile_page, 'videos count') -        lst = [] -        for n in xrange(1,int(ceil(float(videos_count)/self.PAGINATED)) +1): -            lpage_url = profile_url +'/public/%d'%n -            lpage = self._download_webpage(lpage_url, 'page %d'%n) -            lst.extend(re.findall(re_video_page, lpage)) -        return lst - -    def extract_one(self,url): -        mobj = re.match(r'(http://)?videos\.toypics\.net/view/(?P<videoid>[0-9]+)/.*', url) -        video_id = mobj.group('videoid') -        page = self._download_webpage(url, video_id, 'getting page: '+url) +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        page = self._download_webpage(url, video_id)          video_url = self._html_search_regex(              r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')          title = self._html_search_regex( @@ -48,8 +31,46 @@ class ToypicsIE(InfoExtractor):          return {              'id': video_id,              'url': video_url, -            'ext': video_url[-3:],              'title': title,              'uploader': username, -            'age_limit': 18 +            'age_limit': 18, +        } + + +class ToypicsUserIE(InfoExtractor): +    IE_DESC = 'Toypics user profile' +    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        username = mobj.group('username') + +        profile_page = self._download_webpage( +            url, username, note='Retrieving profile page') + +        video_count = int(self._search_regex( +            r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page, +            'video count')) + +        PAGE_SIZE = 8 +        urls = [] +        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE +        for n in range(1, page_count + 1): +            lpage_url = url + '/public/%d' % n +            lpage = self._download_webpage( +                lpage_url, username, +                note='Downloading page %d/%d' % (n, page_count)) +            urls.extend( +                re.findall( +                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">', +                    lpage)) + +        return { +            '_type': 'playlist', +            'id': username, +            'entries': [{ +                '_type': 'url', +                'url': url, +                'ie_key': 'Toypics', +            } for url in urls]          } | 
