aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/twitter.py')
-rw-r--r--youtube_dl/extractor/twitter.py97
1 files changed, 70 insertions, 27 deletions
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 1f32ea2eb..ac0b221b4 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
+ determine_ext,
float_or_none,
xpath_text,
remove_end,
@@ -12,6 +14,8 @@ from ..utils import (
ExtractorError,
)
+from .periscope import PeriscopeIE
+
class TwitterBaseIE(InfoExtractor):
def _get_vmap_video_url(self, vmap_url, video_id):
@@ -21,7 +25,7 @@ class TwitterBaseIE(InfoExtractor):
class TwitterCardIE(TwitterBaseIE):
IE_NAME = 'twitter:card'
- _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -47,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE):
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
- 'md5': 'd4724ffe6d2437886d004fa5de1043b3',
+ 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
'info_dict': {
'id': 'dq4Oj5quskI',
'ext': 'mp4',
'title': 'Ubuntu 11.10 Overview',
- 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
+ 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
'upload_date': '20111013',
'uploader': 'OMG! Ubuntu!',
'uploader_id': 'omgubuntu',
@@ -80,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Twitter web player',
'thumbnail': 're:^https?://.*\.jpg',
},
+ }, {
+ 'url': 'https://twitter.com/i/videos/752274308186120192',
+ 'only_matching': True,
},
]
@@ -99,12 +106,17 @@ class TwitterCardIE(TwitterBaseIE):
return self.url_result(iframe_url)
config = self._parse_json(self._html_search_regex(
- r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
+ r'data-(?:player-)?config="([^"]+)"', webpage,
+ 'data player config', default='{}'),
video_id)
if config.get('source_type') == 'vine':
return self.url_result(config['player_url'], 'Vine')
+ periscope_url = PeriscopeIE._extract_url(webpage)
+ if periscope_url:
+ return self.url_result(periscope_url, PeriscopeIE.ie_key())
+
def _search_dimensions_in_video_url(a_format, video_url):
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
@@ -116,13 +128,16 @@ class TwitterCardIE(TwitterBaseIE):
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
if video_url:
- f = {
- 'url': video_url,
- }
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
+ else:
+ f = {
+ 'url': video_url,
+ }
- _search_dimensions_in_video_url(f, video_url)
+ _search_dimensions_in_video_url(f, video_url)
- formats.append(f)
+ formats.append(f)
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
if vmap_url:
@@ -207,6 +222,7 @@ class TwitterIE(InfoExtractor):
'uploader_id': 'giphz',
},
'expected_warnings': ['height', 'width'],
+ 'skip': 'Account suspended',
}, {
'url': 'https://twitter.com/starwars/status/665052190608723968',
'md5': '39b7199856dee6cd4432e72c74bc69d4',
@@ -239,10 +255,10 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': '700207533655363584',
'ext': 'mp4',
- 'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
- 'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+ 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
'thumbnail': 're:^https?://.*\.jpg',
- 'uploader': 'jay',
+ 'uploader': 'JG',
'uploader_id': 'jaydingeer',
},
'params': {
@@ -260,6 +276,31 @@ class TwitterIE(InfoExtractor):
'upload_date': '20140615',
},
'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+ 'info_dict': {
+ 'id': '719944021058060289',
+ 'ext': 'mp4',
+ 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+ 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
+ 'uploader_id': 'captainamerica',
+ 'uploader': 'Captain America',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
+ 'info_dict': {
+ 'id': '1zqKVVlkqLaKB',
+ 'ext': 'mp4',
+ 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'upload_date': '20160923',
+ 'uploader_id': 'OPP_HSD',
+ 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+ 'timestamp': 1474613214,
+ },
+ 'add_ie': ['Periscope'],
}]
def _real_extract(self, url):
@@ -267,7 +308,11 @@ class TwitterIE(InfoExtractor):
user_id = mobj.group('user_id')
twid = mobj.group('id')
- webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
+ webpage, urlh = self._download_webpage_handle(
+ self._TEMPLATE_URL % (user_id, twid), twid)
+
+ if 'twitter.com/account/suspended' in urlh.geturl():
+ raise ExtractorError('Account suspended by Twitter.', expected=True)
username = remove_end(self._og_search_title(webpage), ' on Twitter')
@@ -284,17 +329,6 @@ class TwitterIE(InfoExtractor):
'title': username + ' - ' + title,
}
- card_id = self._search_regex(
- r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
- if card_id:
- card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
- info.update({
- '_type': 'url_transparent',
- 'ie_key': 'TwitterCard',
- 'url': card_url,
- })
- return info
-
mobj = re.search(r'''(?x)
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
<source[^>]+video-src="(?P<url>[^"]+)"
@@ -317,13 +351,22 @@ class TwitterIE(InfoExtractor):
})
return info
+ twitter_card_url = None
if 'class="PlayableMedia' in webpage:
+ twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid)
+ else:
+ twitter_card_iframe_url = self._search_regex(
+ r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'Twitter card iframe URL', default=None, group='url')
+ if twitter_card_iframe_url:
+ twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url)
+
+ if twitter_card_url:
info.update({
'_type': 'url_transparent',
'ie_key': 'TwitterCard',
- 'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
+ 'url': twitter_card_url,
})
-
return info
raise ExtractorError('There\'s no video in this tweet.')
@@ -331,7 +374,7 @@ class TwitterIE(InfoExtractor):
class TwitterAmplifyIE(TwitterBaseIE):
IE_NAME = 'twitter:amplify'
- _VALID_URL = 'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
+ _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
_TEST = {
'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',