aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/twitter.py')
-rw-r--r--youtube_dl/extractor/twitter.py104
1 files changed, 83 insertions, 21 deletions
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 9d3e46b94..055047340 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -9,6 +9,8 @@ from ..utils import (
float_or_none,
xpath_text,
remove_end,
+ int_or_none,
+ ExtractorError,
)
@@ -18,7 +20,7 @@ class TwitterCardIE(InfoExtractor):
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
- 'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
+ 'md5': '4fa26a35f9d1bf4b646590ba8e84be19',
'info_dict': {
'id': '560070183650213889',
'ext': 'mp4',
@@ -50,6 +52,20 @@ class TwitterCardIE(InfoExtractor):
'uploader': 'OMG! Ubuntu!',
'uploader_id': 'omgubuntu',
},
+ 'add_ie': ['Youtube'],
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+ 'md5': 'ab2745d0b0ce53319a534fccaa986439',
+ 'info_dict': {
+ 'id': 'iBb2x00UVlv',
+ 'ext': 'mp4',
+ 'upload_date': '20151113',
+ 'uploader_id': '1189339351084113920',
+ 'uploader': '@ArsenalTerje',
+ 'title': 'Vine by @ArsenalTerje',
+ },
+ 'add_ie': ['Vine'],
}
]
@@ -69,11 +85,11 @@ class TwitterCardIE(InfoExtractor):
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(request, video_id)
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
- webpage, 'youtube iframe', default=None)
- if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
+ iframe_url = self._html_search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+ webpage, 'video iframe', default=None)
+ if iframe_url:
+ return self.url_result(iframe_url)
config = self._parse_json(self._html_search_regex(
r'data-player-config="([^"]+)"', webpage, 'data player config'),
@@ -120,9 +136,9 @@ class TwitterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
- _TEST = {
+ _TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
- 'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
+ 'md5': 'db6612ec5d03355953c3ca9250c97e5e',
'info_dict': {
'id': '643211948184596480',
'ext': 'mp4',
@@ -133,7 +149,30 @@ class TwitterIE(InfoExtractor):
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
},
- }
+ }, {
+ 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+ 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+ 'info_dict': {
+ 'id': '657991469417025536',
+ 'ext': 'mp4',
+ 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+ 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+ 'thumbnail': 're:^https?://.*\.png',
+ 'uploader': 'Gifs',
+ 'uploader_id': 'giphz',
+ },
+ }, {
+ 'url': 'https://twitter.com/starwars/status/665052190608723968',
+ 'md5': '39b7199856dee6cd4432e72c74bc69d4',
+ 'info_dict': {
+ 'id': '665052190608723968',
+ 'ext': 'mp4',
+ 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+ 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
+ 'uploader_id': 'starwars',
+ 'uploader': 'Star Wars',
+ },
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -144,23 +183,46 @@ class TwitterIE(InfoExtractor):
username = remove_end(self._og_search_title(webpage), ' on Twitter')
- title = self._og_search_description(webpage).strip('').replace('\n', ' ')
+ title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
- mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
- title, short_url = mobj.groups()
-
- card_id = self._search_regex(
- r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
- card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+ title = re.sub(r'\s+(https?://[^ ]+)', '', title)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'TwitterCard',
+ info = {
'uploader_id': user_id,
'uploader': username,
- 'url': card_url,
'webpage_url': url,
- 'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
+ 'description': '%s on Twitter: "%s"' % (username, description),
'title': username + ' - ' + title,
}
+
+ card_id = self._search_regex(
+ r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
+ if card_id:
+ card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'TwitterCard',
+ 'url': card_url,
+ })
+ return info
+
+ mobj = re.search(r'''(?x)
+ <video[^>]+class="animated-gif"[^>]+
+ (?:data-height="(?P<height>\d+)")?[^>]+
+ (?:data-width="(?P<width>\d+)")?[^>]+
+ (?:poster="(?P<poster>[^"]+)")?[^>]*>\s*
+ <source[^>]+video-src="(?P<url>[^"]+)"
+ ''', webpage)
+
+ if mobj:
+ info.update({
+ 'id': twid,
+ 'url': mobj.group('url'),
+ 'height': int_or_none(mobj.group('height')),
+ 'width': int_or_none(mobj.group('width')),
+ 'thumbnail': mobj.group('poster'),
+ })
+ return info
+
+ raise ExtractorError('There\'s not video in this tweet.')