diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-03-23 16:06:03 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-03-23 16:06:07 +0100 |
commit | ea38e55fff639545394e32208a7dabc7e6258166 (patch) | |
tree | bed0ba9d1538d95a111ace924c9b9074e2c87e41 /youtube_dl | |
parent | 257cfebfe6833a41b8ec2c3882b8666c15e454a1 (diff) |
[instagram] Add support for user profiles (Fixes #2606)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/YoutubeDL.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 68 |
3 files changed, 78 insertions, 8 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c5d08b0bb..d18d6dd00 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -512,13 +512,7 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } - self.add_extra_info(ie_result, - { - 'extractor': ie.IE_NAME, - 'webpage_url': url, - 'webpage_url_basename': url_basename(url), - 'extractor_key': ie.ie_key(), - }) + self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) else: @@ -537,6 +531,14 @@ class YoutubeDL(object): else: self.report_error('no suitable InfoExtractor for URL %s' % url) + def add_default_extra_info(self, ie_result, ie, url): + self.add_extra_info(ie_result, { + 'extractor': ie.IE_NAME, + 'webpage_url': url, + 'webpage_url_basename': url_basename(url), + 'extractor_key': ie.ie_key(), + }) + def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b5c8ef682..3e728e876 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,7 +112,7 @@ from .imdb import ( ) from .ina import InaIE from .infoq import InfoQIE -from .instagram import InstagramIE +from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .ivi import ( diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 63141af27..994f0e4ae 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,6 +3,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + int_or_none, +) class InstagramIE(InfoExtractor): @@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor): 'uploader_id': uploader_id, 'description': desc, } + + +class InstagramUserIE(InfoExtractor): + _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' + IE_DESC = 'Instagram user profile' + IE_NAME = 'instagram:user' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader_id = mobj.group('username') + + entries = [] + page_count = 0 + media_url = 'http://instagram.com/%s/media' % uploader_id + while True: + page = self._download_json( + media_url, uploader_id, + note='Downloading page %d ' % (page_count + 1), + ) + page_count += 1 + + for it in page['items']: + if it.get('type') != 'video': + continue + like_count = int_or_none(it.get('likes', {}).get('count')) + user = it.get('user', {}) + + formats = [{ + 'format_id': k, + 'height': v.get('height'), + 'width': v.get('width'), + 'url': v['url'], + } for k, v in it['videos'].items()] + self._sort_formats(formats) + + thumbnails_el = it.get('images', {}) + thumbnail = thumbnails_el.get('thumbnail', {}).get('url') + + title = it.get('caption', {}).get('text', it['id']) + + entries.append({ + 'id': it['id'], + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'webpage_url': it.get('link'), + 'uploader': user.get('full_name'), + 'uploader_id': user.get('username'), + 'like_count': like_count, + 'upload_timestamp': int_or_none(it.get('created_time')), + }) + + if not page['items']: + break + max_id = page['items'][-1]['id'] + media_url = ( + 'http://instagram.com/%s/media?max_id=%s' % ( + uploader_id, max_id)) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': uploader_id, + 'title': uploader_id, + } |