diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-24 19:57:55 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-24 20:00:29 +0800 | 
| commit | f0bc5a8609786633d8b51ab4255c1f0fdb941f73 (patch) | |
| tree | b13de2bca46267ff79b37c41c0dfc735c4a9daf1 | |
| parent | a54ffb8aa778062901dd15b020576bc7d472ae40 (diff) | |
[twitter] Support Periscope embeds (closes #10737)
Also update _TESTS
| -rw-r--r-- | ChangeLog | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/periscope.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitter.py | 45 | 
3 files changed, 47 insertions, 8 deletions
| @@ -4,6 +4,7 @@ Core  + Improved support for HTML5 subtitles  Extractors ++ [twitter] Support Periscope embeds (#10737)  + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index eb1aeba46..e8b2f11c6 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -1,6 +1,8 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..utils import (      parse_iso8601, @@ -41,6 +43,13 @@ class PeriscopeIE(PeriscopeBaseIE):          'only_matching': True,      }] +    @staticmethod +    def _extract_url(webpage): +        mobj = re.search( +            r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) +        if mobj: +            return mobj.group('url') +      def _real_extract(self, url):          token = self._match_id(url) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index c5a5843b6..3411fcf7e 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import (      determine_ext,      float_or_none, @@ -13,6 +14,8 @@ from ..utils import (      ExtractorError,  ) +from .periscope import PeriscopeIE +  class TwitterBaseIE(InfoExtractor):      def _get_vmap_video_url(self, vmap_url, video_id): @@ -48,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE):          },          {              'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', -            'md5': 'd4724ffe6d2437886d004fa5de1043b3', +            'md5': 'b6d9683dd3f48e340ded81c0e917ad46',              'info_dict': {                  'id': 'dq4Oj5quskI',                  'ext': 'mp4',                  'title': 'Ubuntu 11.10 Overview', -                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', +                'description': 'md5:a831e97fa384863d6e26ce48d1c43376',                  'upload_date': '20111013',                  'uploader': 'OMG! Ubuntu!',                  'uploader_id': 'omgubuntu', @@ -100,12 +103,17 @@ class TwitterCardIE(TwitterBaseIE):              return self.url_result(iframe_url)          config = self._parse_json(self._html_search_regex( -            r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), +            r'data-(?:player-)?config="([^"]+)"', webpage, +            'data player config', default='{}'),              video_id)          if config.get('source_type') == 'vine':              return self.url_result(config['player_url'], 'Vine') +        periscope_url = PeriscopeIE._extract_url(webpage) +        if periscope_url: +            return self.url_result(periscope_url, PeriscopeIE.ie_key()) +          def _search_dimensions_in_video_url(a_format, video_url):              m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)              if m: @@ -244,10 +252,10 @@ class TwitterIE(InfoExtractor):          'info_dict': {              'id': '700207533655363584',              'ext': 'mp4', -            'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', -            'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', +            'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', +            'description': 'JG on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',              'thumbnail': 're:^https?://.*\.jpg', -            'uploader': 'Donte The Dumbass', +            'uploader': 'JG',              'uploader_id': 'jaydingeer',          },          'params': { @@ -278,6 +286,18 @@ class TwitterIE(InfoExtractor):          'params': {              'skip_download': True,  # requires ffmpeg          }, +    }, { +        'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', +        'info_dict': { +            'id': '1zqKVVlkqLaKB', +            'ext': 'mp4', +            'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', +            'upload_date': '20160923', +            'uploader_id': 'OPP_HSD', +            'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', +            'timestamp': 1474613214, +        }, +        'add_ie': ['Periscope'],      }]      def _real_extract(self, url): @@ -328,13 +348,22 @@ class TwitterIE(InfoExtractor):              })              return info +        twitter_card_url = None          if 'class="PlayableMedia' in webpage: +            twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) +        else: +            twitter_card_iframe_url = self._search_regex( +                r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1', +                webpage, 'Twitter card iframe URL', default=None, group='url') +            if twitter_card_iframe_url: +                twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + +        if twitter_card_url:              info.update({                  '_type': 'url_transparent',                  'ie_key': 'TwitterCard', -                'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid), +                'url': twitter_card_url,              }) -              return info          raise ExtractorError('There\'s no video in this tweet.') | 
