diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-10 20:31:08 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-10 20:36:11 +0100 | 
| commit | 3501423dfec657892e54ffe00a02da3babdfe8e4 (patch) | |
| tree | da64fe5286dc7a8910ab7d7b1eb8a8b02bcd6dea | |
| parent | 0de668af512f87315dd9e15717ce6c97050e3606 (diff) | |
[googleplus] Modernize and simplify
| -rw-r--r-- | youtube_dl/extractor/googleplus.py | 59 | 
1 files changed, 27 insertions, 32 deletions
| diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 2570746b2..cc29a7e5d 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -1,4 +1,5 @@  # coding: utf-8 +from __future__ import unicode_literals  import datetime  import re @@ -10,32 +11,28 @@ from ..utils import (  class GooglePlusIE(InfoExtractor): -    IE_DESC = u'Google Plus' -    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' -    IE_NAME = u'plus.google' +    IE_DESC = 'Google Plus' +    _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' +    IE_NAME = 'plus.google'      _TEST = { -        u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", -        u"file": u"ZButuJc6CtH.flv", -        u"info_dict": { -            u"upload_date": u"20120613", -            u"uploader": u"井上ヨシマサ", -            u"title": u"嘆きの天使 降臨" +        'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', +        'info_dict': { +            'id': 'ZButuJc6CtH', +            'ext': 'flv', +            'upload_date': '20120613', +            'uploader': '井上ヨシマサ', +            'title': '嘆きの天使 降臨',          }      }      def _real_extract(self, url):          # Extract id from URL          mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) -        post_url = mobj.group(0) -        video_id = mobj.group(1) - -        video_extension = 'flv' +        video_id = mobj.group('id')          # Step 1, Retrieve post webpage to extract further information -        webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage') +        webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')          self.report_extraction(video_id) @@ -43,7 +40,7 @@ class GooglePlusIE(InfoExtractor):          upload_date = self._html_search_regex(              r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>                      ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', -            webpage, u'upload date', fatal=False, flags=re.VERBOSE) +            webpage, 'upload date', fatal=False, flags=re.VERBOSE)          if upload_date:              # Convert timestring to a format suitable for filename              upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") @@ -51,28 +48,27 @@ class GooglePlusIE(InfoExtractor):          # Extract uploader          uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', -            webpage, u'uploader', fatal=False) +            webpage, 'uploader', fatal=False)          # Extract title          # Get the first line for title          video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', -            webpage, 'title', default=u'NA') +            webpage, 'title', default='NA')          # Step 2, Simulate clicking the image box to launch video          DOMAIN = 'https://plus.google.com/'          video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), -            webpage, u'video page URL') +            webpage, 'video page URL')          if not video_page.startswith(DOMAIN):              video_page = DOMAIN + video_page -        webpage = self._download_webpage(video_page, video_id, u'Downloading video page') +        webpage = self._download_webpage(video_page, video_id, 'Downloading video page') -        # Extract video links on video page -        """Extract video links of all sizes""" +        # Extract video links all sizes          pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'          mobj = re.findall(pattern, webpage)          if len(mobj) == 0: -            raise ExtractorError(u'Unable to extract video links') +            raise ExtractorError('Unable to extract video links')          # Sort in resolution          links = sorted(mobj) @@ -87,12 +83,11 @@ class GooglePlusIE(InfoExtractor):          except AttributeError: # Python 3              video_url = bytes(video_url, 'ascii').decode('unicode-escape') - -        return [{ -            'id':       video_id, -            'url':      video_url, +        return { +            'id': video_id, +            'url': video_url,              'uploader': uploader, -            'upload_date':  upload_date, -            'title':    video_title, -            'ext':      video_extension, -        }] +            'upload_date': upload_date, +            'title': video_title, +            'ext': 'flv', +        } | 
