diff options
Diffstat (limited to 'youtube_dl/extractor/bliptv.py')
| -rw-r--r-- | youtube_dl/extractor/bliptv.py | 83 | 
1 files changed, 66 insertions, 17 deletions
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 57d17bea3..14b814120 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -4,13 +4,17 @@ import re  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( + +from ..compat import ( +    compat_str,      compat_urllib_request, -    unescapeHTML, -    parse_iso8601,      compat_urlparse, +) +from ..utils import (      clean_html, -    compat_str, +    int_or_none, +    parse_iso8601, +    unescapeHTML,  ) @@ -64,20 +68,55 @@ class BlipTVIE(SubtitlesInfoExtractor):                  'uploader': 'redvsblue',                  'uploader_id': '792887',              } -        } +        }, +        { +            'url': 'http://blip.tv/play/gbk766dkj4Yn', +            'md5': 'fe0a33f022d49399a241e84a8ea8b8e3', +            'info_dict': { +                'id': '1749452', +                'ext': 'mp4', +                'upload_date': '20090208', +                'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.', +                'title': 'Nostalgia Critic: Transformers', +                'timestamp': 1234068723, +                'uploader': 'NostalgiaCritic', +                'uploader_id': '246467', +            } +        }, +        { +            # https://github.com/rg3/youtube-dl/pull/4404 +            'note': 'Audio only', +            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982', +            'md5': '76c0a56f24e769ceaab21fbb6416a351', +            'info_dict': { +                'id': '7103299', +                'ext': 'flv', +                'title': 'Weekly Manga Recap: Kingdom', +                'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?', +                'timestamp': 1417660321, +                'upload_date': '20141204', +                'uploader': 'The Rollo T', +                'uploader_id': '407429', +                'duration': 7251, +                'vcodec': 'none', +            } +        },      ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          lookup_id = mobj.group('lookup_id') -        # See https://github.com/rg3/youtube-dl/issues/857 +        # See https://github.com/rg3/youtube-dl/issues/857 and +        # https://github.com/rg3/youtube-dl/issues/4197          if lookup_id: -            info_page = self._download_webpage( -                'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id') -            video_id = self._search_regex(r'data-episode-id="([0-9]+)', info_page, 'video_id') -        else: -            video_id = mobj.group('id') +            urlh = self._request_webpage( +                'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id') +            url = compat_urlparse.urlparse(urlh.geturl()) +            qs = compat_urlparse.parse_qs(url.query) +            mobj = re.match(self._VALID_URL, qs['file'][0]) + +        video_id = mobj.group('id')          rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') @@ -113,7 +152,7 @@ class BlipTVIE(SubtitlesInfoExtractor):              msg = self._download_webpage(                  url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',                  video_id, 'Resolving URL for %s' % role) -            real_url = compat_urlparse.parse_qs(msg)['message'][0] +            real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]              media_type = media_content.get('type')              if media_type == 'text/srt' or url.endswith('.srt'): @@ -128,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):                      'url': real_url,                      'format_id': role,                      'format_note': media_type, -                    'vcodec': media_content.get(blip('vcodec')), +                    'vcodec': media_content.get(blip('vcodec')) or 'none',                      'acodec': media_content.get(blip('acodec')),                      'filesize': media_content.get('filesize'), -                    'width': int(media_content.get('width')), -                    'height': int(media_content.get('height')), +                    'width': int_or_none(media_content.get('width')), +                    'height': int_or_none(media_content.get('height')),                  })          self._sort_formats(formats) @@ -165,9 +204,17 @@ class BlipTVIE(SubtitlesInfoExtractor):  class BlipTVUserIE(InfoExtractor): -    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$' +    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'      _PAGE_SIZE = 12      IE_NAME = 'blip.tv:user' +    _TEST = { +        'url': 'http://blip.tv/actone', +        'info_dict': { +            'id': 'actone', +            'title': 'Act One: The Series', +        }, +        'playlist_count': 5, +    }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -178,6 +225,7 @@ class BlipTVUserIE(InfoExtractor):          page = self._download_webpage(url, username, 'Downloading user page')          mobj = re.search(r'data-users-id="([^"]+)"', page)          page_base = page_base % mobj.group(1) +        title = self._og_search_title(page)          # Download video ids using BlipTV Ajax calls. Result size per          # query is limited (currently to 12 videos) so we need to query @@ -214,4 +262,5 @@ class BlipTVUserIE(InfoExtractor):          urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]          url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] -        return [self.playlist_result(url_entries, playlist_title=username)] +        return self.playlist_result( +            url_entries, playlist_title=title, playlist_id=username)  | 
