diff options
| -rw-r--r-- | test/helper.py | 18 | ||||
| -rw-r--r-- | test/test_download.py | 19 | ||||
| -rw-r--r-- | test/test_playlists.py | 30 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/instagram.py | 68 | 
6 files changed, 124 insertions, 29 deletions
| diff --git a/test/helper.py b/test/helper.py index 9e255878f..8739f816c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict):              self.assertEqual(expected, got,                  u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) +    # Check for the presence of mandatory fields +    for key in ('id', 'url', 'title', 'ext'): +        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) +    # Check for mandatory fields that are automatically set by YoutubeDL +    for key in ['webpage_url', 'extractor', 'extractor_key']: +        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) + +    # Are checkable fields missing from the test case definition? +    test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) +        for key, value in got_dict.items() +        if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) +    missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) +    if missing_keys: +        sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') +        self.assertFalse( +            missing_keys, +            'Missing keys in test definition: %s' % ( +                ', '.join(sorted(missing_keys)))) diff --git a/test/test_download.py b/test/test_download.py index f4e5d120e..f171c10ba 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -137,25 +137,6 @@ def generator(test_case):                      info_dict = json.load(infof)                  expect_info_dict(self, tc.get('info_dict', {}), info_dict) - -                # Check for the presence of mandatory fields -                for key in ('id', 'url', 'title', 'ext'): -                    self.assertTrue(key in info_dict.keys() and info_dict[key]) -                # Check for mandatory fields that are automatically set by YoutubeDL -                for key in ['webpage_url', 'extractor', 'extractor_key']: -                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) - -                # Are checkable fields missing from the test case definition? -                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) -                    for key, value in info_dict.items() -                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) -                missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys()) -                if missing_keys: -                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') -                    self.assertFalse( -                        missing_keys, -                        'Missing keys in test definition: %s' % ( -                            ','.join(sorted(missing_keys))))          finally:              try_rm_tcs_files() diff --git a/test/test_playlists.py b/test/test_playlists.py index 2b1a7e849..b1e38e7e9 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -9,8 +9,10 @@ import sys  import unittest  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL - +from test.helper import ( +    expect_info_dict, +    FakeYDL, +)  from youtube_dl.extractor import (      AcademicEarthCourseIE, @@ -39,6 +41,7 @@ from youtube_dl.extractor import (      TEDIE,      ToypicsUserIE,      XTubeUserIE, +    InstagramUserIE,  ) @@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], 'greenshowers')          self.assertTrue(len(result['entries']) >= 155) +    def test_InstagramUser(self): +        dl = FakeYDL() +        ie = InstagramUserIE(dl) +        result = ie.extract('http://instagram.com/porsche') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], 'porsche') +        self.assertTrue(len(result['entries']) >= 2) +        test_video = next( +            e for e in result['entries'] +            if e['id'] == '614605558512799803_462752227') +        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)') +        dl.process_video_result(test_video, download=False) +        EXPECTED = { +            'id': '614605558512799803_462752227', +            'ext': 'mp4', +            'title': '#Porsche Intelligent Performance.', +            'thumbnail': 're:^https?://.*\.jpg', +            'uploader': 'Porsche', +            'uploader_id': 'porsche', +        } +        expect_info_dict(self, EXPECTED, test_video) + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c5d08b0bb..d18d6dd00 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -512,13 +512,7 @@ class YoutubeDL(object):                          '_type': 'compat_list',                          'entries': ie_result,                      } -                self.add_extra_info(ie_result, -                    { -                        'extractor': ie.IE_NAME, -                        'webpage_url': url, -                        'webpage_url_basename': url_basename(url), -                        'extractor_key': ie.ie_key(), -                    }) +                self.add_default_extra_info(ie_result, ie, url)                  if process:                      return self.process_ie_result(ie_result, download, extra_info)                  else: @@ -537,6 +531,14 @@ class YoutubeDL(object):          else:              self.report_error('no suitable InfoExtractor for URL %s' % url) +    def add_default_extra_info(self, ie_result, ie, url): +        self.add_extra_info(ie_result, { +            'extractor': ie.IE_NAME, +            'webpage_url': url, +            'webpage_url_basename': url_basename(url), +            'extractor_key': ie.ie_key(), +        }) +      def process_ie_result(self, ie_result, download=True, extra_info={}):          """          Take the result of the ie(may be modified) and resolve all unresolved diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b5c8ef682..3e728e876 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,7 +112,7 @@ from .imdb import (  )  from .ina import InaIE  from .infoq import InfoQIE -from .instagram import InstagramIE +from .instagram import InstagramIE, InstagramUserIE  from .internetvideoarchive import InternetVideoArchiveIE  from .iprima import IPrimaIE  from .ivi import ( diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 63141af27..994f0e4ae 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,6 +3,9 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..utils import ( +    int_or_none, +)  class InstagramIE(InfoExtractor): @@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):              'uploader_id': uploader_id,              'description': desc,          } + + +class InstagramUserIE(InfoExtractor): +    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' +    IE_DESC = 'Instagram user profile' +    IE_NAME = 'instagram:user' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        uploader_id = mobj.group('username') + +        entries = [] +        page_count = 0 +        media_url = 'http://instagram.com/%s/media' % uploader_id +        while True: +            page = self._download_json( +                media_url, uploader_id, +                note='Downloading page %d ' % (page_count + 1), +            ) +            page_count += 1 + +            for it in page['items']: +                if it.get('type') != 'video': +                    continue +                like_count = int_or_none(it.get('likes', {}).get('count')) +                user = it.get('user', {}) + +                formats = [{ +                    'format_id': k, +                    'height': v.get('height'), +                    'width': v.get('width'), +                    'url': v['url'], +                } for k, v in it['videos'].items()] +                self._sort_formats(formats) + +                thumbnails_el = it.get('images', {}) +                thumbnail = thumbnails_el.get('thumbnail', {}).get('url') + +                title = it.get('caption', {}).get('text', it['id']) + +                entries.append({ +                    'id': it['id'], +                    'title': title, +                    'formats': formats, +                    'thumbnail': thumbnail, +                    'webpage_url': it.get('link'), +                    'uploader': user.get('full_name'), +                    'uploader_id': user.get('username'), +                    'like_count': like_count, +                    'upload_timestamp': int_or_none(it.get('created_time')), +                }) + +            if not page['items']: +                break +            max_id = page['items'][-1]['id'] +            media_url = ( +                'http://instagram.com/%s/media?max_id=%s' % ( +                    uploader_id, max_id)) + +        return { +            '_type': 'playlist', +            'entries': entries, +            'id': uploader_id, +            'title': uploader_id, +        } | 
