aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-03-23 16:06:03 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-03-23 16:06:07 +0100
commitea38e55fff639545394e32208a7dabc7e6258166 (patch)
treebed0ba9d1538d95a111ace924c9b9074e2c87e41 /youtube_dl/extractor
parent257cfebfe6833a41b8ec2c3882b8666c15e454a1 (diff)
[instagram] Add support for user profiles (Fixes #2606)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/instagram.py68
2 files changed, 69 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b5c8ef682..3e728e876 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -112,7 +112,7 @@ from .imdb import (
)
from .ina import InaIE
from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import (
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 63141af27..994f0e4ae 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+)
class InstagramIE(InfoExtractor):
@@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
'uploader_id': uploader_id,
'description': desc,
}
+
+
+class InstagramUserIE(InfoExtractor):
+ _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader_id = mobj.group('username')
+
+ entries = []
+ page_count = 0
+ media_url = 'http://instagram.com/%s/media' % uploader_id
+ while True:
+ page = self._download_json(
+ media_url, uploader_id,
+ note='Downloading page %d ' % (page_count + 1),
+ )
+ page_count += 1
+
+ for it in page['items']:
+ if it.get('type') != 'video':
+ continue
+ like_count = int_or_none(it.get('likes', {}).get('count'))
+ user = it.get('user', {})
+
+ formats = [{
+ 'format_id': k,
+ 'height': v.get('height'),
+ 'width': v.get('width'),
+ 'url': v['url'],
+ } for k, v in it['videos'].items()]
+ self._sort_formats(formats)
+
+ thumbnails_el = it.get('images', {})
+ thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+ title = it.get('caption', {}).get('text', it['id'])
+
+ entries.append({
+ 'id': it['id'],
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'webpage_url': it.get('link'),
+ 'uploader': user.get('full_name'),
+ 'uploader_id': user.get('username'),
+ 'like_count': like_count,
+ 'upload_timestamp': int_or_none(it.get('created_time')),
+ })
+
+ if not page['items']:
+ break
+ max_id = page['items'][-1]['id']
+ media_url = (
+ 'http://instagram.com/%s/media?max_id=%s' % (
+ uploader_id, max_id))
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': uploader_id,
+ 'title': uploader_id,
+ }