aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/instagram.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/instagram.py')
-rw-r--r--youtube_dl/extractor/instagram.py26
1 files changed, 16 insertions, 10 deletions
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index b020e2621..c158f2064 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -5,13 +5,14 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ limit_length,
)
class InstagramIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
- _TEST = {
- 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
'id': 'aye83DjauH',
@@ -20,11 +21,14 @@ class InstagramIE(InfoExtractor):
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
- }
+ }, {
+ 'url': 'https://instagram.com/p/-Cmh1cukG2/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False)
@@ -43,11 +47,11 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
- 'url': 'http://instagram.com/porsche',
+ 'url': 'https://instagram.com/porsche',
'info_dict': {
'id': 'porsche',
'title': 'porsche',
@@ -102,11 +106,13 @@ class InstagramUserIE(InfoExtractor):
thumbnails_el = it.get('images', {})
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
- title = it.get('caption', {}).get('text', it['id'])
+ # In some cases caption is null, which corresponds to None
+ # in python. As a result, it.get('caption', {}) gives None
+ title = (it.get('caption') or {}).get('text', it['id'])
entries.append({
'id': it['id'],
- 'title': title,
+ 'title': limit_length(title, 80),
'formats': formats,
'thumbnail': thumbnail,
'webpage_url': it.get('link'),