diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitch.py (renamed from youtube_dl/extractor/justintv.py) | 102 | 
2 files changed, 28 insertions, 76 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e38d2663..c29c29c10 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE  from .jeuxvideo import JeuxVideoIE  from .jove import JoveIE  from .jukebox import JukeboxIE -from .justintv import JustinTVIE  from .jpopsukitv import JpopsukiIE  from .kankan import KankanIE  from .keezmovies import KeezMoviesIE @@ -395,6 +394,7 @@ from .tutv import TutvIE  from .tvigle import TvigleIE  from .tvp import TvpIE  from .tvplay import TVPlayIE +from .twitch import TwitchIE  from .ubu import UbuIE  from .udemy import (      UdemyIE, diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/twitch.py index 411167d7d..42d38b224 100644 --- a/youtube_dl/extractor/justintv.py +++ b/youtube_dl/extractor/twitch.py @@ -1,26 +1,20 @@  from __future__ import unicode_literals  import itertools -import json -import os  import re  from .common import InfoExtractor  from ..utils import ( -    compat_str,      ExtractorError, -    formatSeconds,      parse_iso8601,  ) -class JustinTVIE(InfoExtractor): -    """Information extractor for justin.tv and twitch.tv""" +class TwitchIE(InfoExtractor):      # TODO: One broadcast may be split into multiple videos. The key      # 'broadcast_id' is the same for all parts, and 'broadcast_part'      # starts at 1 and increases. Can we treat all parts as one video? - -    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ +    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/          (?:              (?P<channelid>[^/]+)|              (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| @@ -28,9 +22,8 @@ class JustinTVIE(InfoExtractor):          )          /?(?:\#.*)?$          """ -    _JUSTIN_PAGE_LIMIT = 100 -    IE_NAME = 'justin.tv' -    IE_DESC = 'justin.tv and twitch.tv' +    _PAGE_LIMIT = 100 +    _API_BASE = 'https://api.twitch.tv'      _TEST = {          'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',          'md5': 'ecaa8a790c22a40770901460af191c9a', @@ -44,39 +37,6 @@ class JustinTVIE(InfoExtractor):          }      } -    _API_BASE = 'https://api.twitch.tv' - -    # Return count of items, list of *valid* items -    def _parse_page(self, url, video_id, counter): -        info_json = self._download_webpage( -            url, video_id, -            'Downloading video info JSON on page %d' % counter, -            'Unable to download video info JSON %d' % counter) - -        response = json.loads(info_json) -        if type(response) != list: -            error_text = response.get('error', 'unknown error') -            raise ExtractorError('Justin.tv API: %s' % error_text) -        info = [] -        for clip in response: -            video_url = clip['video_file_url'] -            if video_url: -                video_extension = os.path.splitext(video_url)[1][1:] -                video_date = re.sub('-', '', clip['start_time'][:10]) -                video_uploader_id = clip.get('user_id', clip.get('channel_id')) -                video_id = clip['id'] -                video_title = clip.get('title', video_id) -                info.append({ -                    'id': compat_str(video_id), -                    'url': video_url, -                    'title': video_title, -                    'uploader': clip.get('channel_name', video_uploader_id), -                    'uploader_id': video_uploader_id, -                    'upload_date': video_date, -                    'ext': video_extension, -                }) -        return (len(response), info) -      def _handle_error(self, response):          if not isinstance(response, dict):              return @@ -87,25 +47,21 @@ class JustinTVIE(InfoExtractor):                  expected=True)      def _download_json(self, url, video_id, note='Downloading JSON metadata'): -        response = super(JustinTVIE, self)._download_json(url, video_id, note) +        response = super(TwitchIE, self)._download_json(url, video_id, note)          self._handle_error(response)          return response      def _extract_media(self, item, item_id): -          ITEMS = {              'a': 'video',              'c': 'chapter',          } -          info = self._extract_info(self._download_json(              '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,              'Downloading %s info JSON' % ITEMS[item])) -          response = self._download_json(              '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,              'Downloading %s playlist JSON' % ITEMS[item]) -          entries = []          chunks = response['chunks']          qualities = list(chunks.keys()) @@ -144,14 +100,7 @@ class JustinTVIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) - -        api_base = 'http://api.twitch.tv' -        paged = False -        if mobj.group('channelid'): -            paged = True -            video_id = mobj.group('channelid') -            api = api_base + '/channel/archives/%s.json' % video_id -        elif mobj.group('chapterid'): +        if mobj.group('chapterid'):              return self._extract_media('c', mobj.group('chapterid'))              """ @@ -203,22 +152,25 @@ class JustinTVIE(InfoExtractor):              }              return info              """ -        else: +        elif mobj.group('videoid'):              return self._extract_media('a', mobj.group('videoid')) - -        entries = [] -        offset = 0 -        limit = self._JUSTIN_PAGE_LIMIT -        for counter in itertools.count(1): -            page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) -            page_count, page_info = self._parse_page( -                page_url, video_id, counter) -            entries.extend(page_info) -            if not paged or page_count != limit: -                break -            offset += limit -        return { -            '_type': 'playlist', -            'id': video_id, -            'entries': entries, -        } +        elif mobj.group('channelid'): +            channel_id = mobj.group('channelid') +            info = self._download_json( +                '%s/kraken/channels/%s' % (self._API_BASE, channel_id), +                channel_id, 'Downloading channel info JSON') +            channel_name = info.get('display_name') or info.get('name') +            entries = [] +            offset = 0 +            limit = self._PAGE_LIMIT +            for counter in itertools.count(1): +                response = self._download_json( +                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d' +                    % (self._API_BASE, channel_id, offset, limit), +                    channel_id, 'Downloading channel videos JSON page %d' % counter) +                videos = response['videos'] +                if not videos: +                    break +                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos]) +                offset += limit +            return self.playlist_result(entries, channel_id, channel_name) | 
