[instagram] Improve title extraction and extract duration (#28469)

Co-authored-by: Sergey M. <dstftw@gmail.com>
author: Chris Hranj <christopher.hranj@gmail.com> 2021-03-29 15:05:19 -0400
committer: GitHub <noreply@github.com> 2021-03-30 02:05:19 +0700
commit: da762c4e329f6158956ddd51ac294e9183e5ce89 (patch)
tree: bedfab482251864fd0b80e98236044cfbc221084
parent: 87a8bde7775ebc31175ebb111015b4052b50b7db (diff)
1 files changed, 27 insertions, 2 deletions
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 1eeddc3b6..12e10143c 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -12,6 +12,7 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    float_or_none,
     get_element_by_attribute,
     int_or_none,
     lowercase_escape,
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
             'title': 'Video by naomipq',
             'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
             'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
             'timestamp': 1371748545,
             'upload_date': '20130620',
             'uploader_id': 'naomipq',
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Video by britneyspears',
             'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
             'timestamp': 1453760977,
             'upload_date': '20160125',
             'uploader_id': 'britneyspears',
@@ -87,6 +90,24 @@ class InstagramIE(InfoExtractor):
             'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
         },
     }, {
+        # IGTV
+        'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+        'info_dict': {
+            'id': 'BkfuX9UB-eK',
+            'ext': 'mp4',
+            'title': 'Fingerboarding Tricks with @cass.fb',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 53.83,
+            'timestamp': 1530032919,
+            'upload_date': '20180626',
+            'uploader_id': 'instagram',
+            'uploader': 'Instagram',
+            'like_count': int,
+            'comment_count': int,
+            'comments': list,
+            'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+        }
+    }, {
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'only_matching': True,
     }, {
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
             description = try_get(
                 media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                 compat_str) or media.get('caption')
+            title = media.get('title')
             thumbnail = media.get('display_src') or media.get('display_url')
+            duration = float_or_none(media.get('video_duration'))
             timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
             uploader = media.get('owner', {}).get('full_name')
             uploader_id = media.get('owner', {}).get('username')
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
                             continue
                         entries.append({
                             'id': node.get('shortcode') or node['id'],
-                            'title': 'Video %d' % edge_num,
+                            'title': node.get('title') or 'Video %d' % edge_num,
                             'url': node_video_url,
                             'thumbnail': node.get('display_url'),
+                            'duration': float_or_none(node.get('video_duration')),
                             'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
                             'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
                             'view_count': int_or_none(node.get('video_view_count')),
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
             'id': video_id,
             'formats': formats,
             'ext': 'mp4',
-            'title': 'Video by %s' % uploader_id,
+            'title': title or 'Video by %s' % uploader_id,
             'description': description,
+            'duration': duration,
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'uploader_id': uploader_id,
author	Chris Hranj <christopher.hranj@gmail.com>	2021-03-29 15:05:19 -0400
committer	GitHub <noreply@github.com>	2021-03-30 02:05:19 +0700
commit	da762c4e329f6158956ddd51ac294e9183e5ce89 (patch)
tree	bedfab482251864fd0b80e98236044cfbc221084
parent	87a8bde7775ebc31175ebb111015b4052b50b7db (diff)