diff options
Diffstat (limited to 'youtube_dl/extractor/bliptv.py')
| -rw-r--r-- | youtube_dl/extractor/bliptv.py | 75 | 
1 files changed, 41 insertions, 34 deletions
| diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 8a8c2e7a8..4f1272f29 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import datetime  import json  import re @@ -21,36 +23,35 @@ class BlipTVIE(InfoExtractor):      """Information extractor for blip.tv"""      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' -    _URL_EXT = r'^.*\.([a-z0-9]+)$' -    IE_NAME = u'blip.tv' +      _TEST = { -        u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', -        u'file': u'5779306.m4v', -        u'md5': u'80baf1ec5c3d2019037c1c707d676b9f', -        u'info_dict': { -            u"upload_date": u"20111205",  -            u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",  -            u"uploader": u"Comic Book Resources - CBR TV",  -            u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" +        'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', +        'file': '5779306.mov', +        'md5': 'c6934ad0b6acf2bd920720ec888eb812', +        'info_dict': { +            'upload_date': '20111205', +            'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', +            'uploader': 'Comic Book Resources - CBR TV', +            'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',          }      }      def report_direct_download(self, title):          """Report information extraction.""" -        self.to_screen(u'%s: Direct download detected' % title) +        self.to_screen('%s: Direct download detected' % title)      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          # See https://github.com/rg3/youtube-dl/issues/857          embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)          if embed_mobj:              info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)              info_page = self._download_webpage(info_url, embed_mobj.group(1)) -            video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, u'video_id') -            return self.url_result('http://blip.tv/a/a-'+video_id, 'BlipTV') +            video_id = self._search_regex(r'data-episode-id="(\d+)', info_page,  'video_id') +            return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')          if '?' in url:              cchar = '&' @@ -61,13 +62,13 @@ class BlipTVIE(InfoExtractor):          request.add_header('User-Agent', 'iTunes/10.6.1')          self.report_extraction(mobj.group(1))          urlh = self._request_webpage(request, None, False, -            u'unable to download video info webpage') +            'unable to download video info webpage')          try:              json_code_bytes = urlh.read()              json_code = json_code_bytes.decode('utf-8')          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -            raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) +            raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))          try:              json_data = json.loads(json_code) @@ -77,32 +78,38 @@ class BlipTVIE(InfoExtractor):                  data = json_data              upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') +            formats = []              if 'additionalMedia' in data: -                formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) -                best_format = formats[-1] -                video_url = best_format['url'] +                for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])): +                    if not int(f['media_width']): # filter m3u8 +                        continue +                    formats.append({ +                        'url': f['url'], +                        'format_id': f['role'], +                        'width': int(f['media_width']), +                        'height': int(f['media_height']), +                    })              else: -                video_url = data['media']['url'] -            umobj = re.match(self._URL_EXT, video_url) -            if umobj is None: -                raise ValueError('Can not determine filename extension') -            ext = umobj.group(1) +                formats.append({ +                    'url': data['media']['url'], +                    'width': int(data['media']['width']), +                    'height': int(data['media']['height']), +                }) + +            self._sort_formats(formats)              return {                  'id': compat_str(data['item_id']), -                'url': video_url,                  'uploader': data['display_name'],                  'upload_date': upload_date,                  'title': data['title'], -                'ext': ext, -                'format': data['media']['mimeType'],                  'thumbnail': data['thumbnailUrl'],                  'description': data['description'], -                'player_url': data['embedUrl'],                  'user_agent': 'iTunes/10.6.1', +                'formats': formats,              }          except (ValueError, KeyError) as err: -            raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) +            raise ExtractorError('Unable to parse video information: %s' % repr(err))  class BlipTVUserIE(InfoExtractor): @@ -110,19 +117,19 @@ class BlipTVUserIE(InfoExtractor):      _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'      _PAGE_SIZE = 12 -    IE_NAME = u'blip.tv:user' +    IE_NAME = 'blip.tv:user'      def _real_extract(self, url):          # Extract username          mobj = re.match(self._VALID_URL, url)          if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) +            raise ExtractorError('Invalid URL: %s' % url)          username = mobj.group(1)          page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' -        page = self._download_webpage(url, username, u'Downloading user page') +        page = self._download_webpage(url, username, 'Downloading user page')          mobj = re.search(r'data-users-id="([^"]+)"', page)          page_base = page_base % mobj.group(1) @@ -138,7 +145,7 @@ class BlipTVUserIE(InfoExtractor):          while True:              url = page_base + "&page=" + str(pagenum)              page = self._download_webpage(url, username, -                                          u'Downloading video ids from page %d' % pagenum) +                                          'Downloading video ids from page %d' % pagenum)              # Extract video identifiers              ids_in_page = [] @@ -160,6 +167,6 @@ class BlipTVUserIE(InfoExtractor):              pagenum += 1 -        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] +        urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]          url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]          return [self.playlist_result(url_entries, playlist_title = username)] | 
