diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-10 20:31:08 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-10 20:36:11 +0100 |
commit | 3501423dfec657892e54ffe00a02da3babdfe8e4 (patch) | |
tree | da64fe5286dc7a8910ab7d7b1eb8a8b02bcd6dea | |
parent | 0de668af512f87315dd9e15717ce6c97050e3606 (diff) |
[googleplus] Modernize and simplify
-rw-r--r-- | youtube_dl/extractor/googleplus.py | 59 |
1 files changed, 27 insertions, 32 deletions
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 2570746b2..cc29a7e5d 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -1,4 +1,5 @@ # coding: utf-8 +from __future__ import unicode_literals import datetime import re @@ -10,32 +11,28 @@ from ..utils import ( class GooglePlusIE(InfoExtractor): - IE_DESC = u'Google Plus' - _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' - IE_NAME = u'plus.google' + IE_DESC = 'Google Plus' + _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' + IE_NAME = 'plus.google' _TEST = { - u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", - u"file": u"ZButuJc6CtH.flv", - u"info_dict": { - u"upload_date": u"20120613", - u"uploader": u"井上ヨシマサ", - u"title": u"嘆きの天使 降臨" + 'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', + 'info_dict': { + 'id': 'ZButuJc6CtH', + 'ext': 'flv', + 'upload_date': '20120613', + 'uploader': '井上ヨシマサ', + 'title': '嘆きの天使 降臨', } } def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - post_url = mobj.group(0) - video_id = mobj.group(1) - - video_extension = 'flv' + video_id = mobj.group('id') # Step 1, Retrieve post webpage to extract further information - webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage') + webpage = self._download_webpage(url, video_id, 'Downloading entry webpage') self.report_extraction(video_id) @@ -43,7 +40,7 @@ class GooglePlusIE(InfoExtractor): upload_date = self._html_search_regex( r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*> ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', - webpage, u'upload date', fatal=False, flags=re.VERBOSE) + webpage, 'upload date', fatal=False, flags=re.VERBOSE) if upload_date: # Convert timestring to a format suitable for filename upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") @@ -51,28 +48,27 @@ class GooglePlusIE(InfoExtractor): # Extract uploader uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', - webpage, u'uploader', fatal=False) + webpage, 'uploader', fatal=False) # Extract title # Get the first line for title video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', - webpage, 'title', default=u'NA') + webpage, 'title', default='NA') # Step 2, Simulate clicking the image box to launch video DOMAIN = 'https://plus.google.com/' video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), - webpage, u'video page URL') + webpage, 'video page URL') if not video_page.startswith(DOMAIN): video_page = DOMAIN + video_page - webpage = self._download_webpage(video_page, video_id, u'Downloading video page') + webpage = self._download_webpage(video_page, video_id, 'Downloading video page') - # Extract video links on video page - """Extract video links of all sizes""" + # Extract video links all sizes pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' mobj = re.findall(pattern, webpage) if len(mobj) == 0: - raise ExtractorError(u'Unable to extract video links') + raise ExtractorError('Unable to extract video links') # Sort in resolution links = sorted(mobj) @@ -87,12 +83,11 @@ class GooglePlusIE(InfoExtractor): except AttributeError: # Python 3 video_url = bytes(video_url, 'ascii').decode('unicode-escape') - - return [{ - 'id': video_id, - 'url': video_url, + return { + 'id': video_id, + 'url': video_url, 'uploader': uploader, - 'upload_date': upload_date, - 'title': video_title, - 'ext': video_extension, - }] + 'upload_date': upload_date, + 'title': video_title, + 'ext': 'flv', + } |