diff options
| -rw-r--r-- | youtube_dl/extractor/common.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/vlive.py | 98 | 
2 files changed, 83 insertions, 23 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a285ee7d8..2763d2ffe 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1061,7 +1061,7 @@ class InfoExtractor(object):      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,                                entry_protocol='m3u8', preference=None,                                m3u8_id=None, note=None, errnote=None, -                              fatal=True): +                              fatal=True, live=False):          formats = [{              'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), @@ -1139,7 +1139,11 @@ class InfoExtractor(object):                  if m3u8_id:                      format_id.append(m3u8_id)                  last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None -                format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) +                # Bandwidth of live streams may differ over time thus making +                # format_id unpredictable. So it's better to keep provided +                # format_id intact. +                if last_media_name and not live: +                    format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))                  f = {                      'format_id': '-'.join(format_id),                      'url': format_url(line.strip()), diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index baf39bb2c..2151696ea 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,11 @@  # coding: utf-8 -from __future__ import unicode_literals +from __future__ import division, unicode_literals +import re +import time  from .common import InfoExtractor  from ..utils import ( +    ExtractorError,      dict_get,      float_or_none,      int_or_none, @@ -31,16 +34,77 @@ class VLiveIE(InfoExtractor):          webpage = self._download_webpage(              'http://www.vlive.tv/video/%s' % video_id, video_id) -        long_video_id = self._search_regex( -            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"', -            webpage, 'long video id') +        # UTC+x - UTC+9 (KST) +        tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone +        tz_offset = -tz // 60 - 9 * 60 +        self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) -        key = self._search_regex( -            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"', -            webpage, 'key') +        status_params = self._download_json( +            'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, +            video_id, 'Downloading JSON status', +            headers={'Referer': url}) +        status = status_params.get('status') +        air_start = status_params.get('onAirStartAt', '') +        is_live = status_params.get('isLive') +        video_params = self._search_regex( +            r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', +            webpage, 'video params') +        live_params, long_video_id, key = re.split( +            r'"\s*,\s*"', video_params)[1:4] + +        if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': +            live_params = self._parse_json('"%s"' % live_params, video_id) +            live_params = self._parse_json(live_params, video_id) +            return self._live(video_id, webpage, live_params) +        elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': +            if long_video_id and key: +                return self._replay(video_id, webpage, long_video_id, key) +            elif is_live: +                status = 'LIVE_END' +            else: +                status = 'COMING_SOON' + +        if status == 'LIVE_END': +            raise ExtractorError('Uploading for replay. Please wait...', +                                 expected=True) +        elif status == 'COMING_SOON': +            raise ExtractorError('Coming soon! %s' % air_start, expected=True) +        elif status == 'CANCELED': +            raise ExtractorError('We are sorry, ' +                                 'but the live broadcast has been canceled.', +                                 expected=True) +        else: +            raise ExtractorError('Unknown status %s' % status) + +    def _get_common_fields(self, webpage):          title = self._og_search_title(webpage) +        creator = self._html_search_regex( +            r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)', +            webpage, 'creator', fatal=False) +        thumbnail = self._og_search_thumbnail(webpage) +        return { +            'title': title, +            'creator': creator, +            'thumbnail': thumbnail, +        } +    def _live(self, video_id, webpage, live_params): +        formats = [] +        for vid in live_params.get('resolutions', []): +            formats.extend(self._extract_m3u8_formats( +                vid['cdnUrl'], video_id, 'mp4', +                m3u8_id=vid.get('name'), +                fatal=False, live=True)) +        self._sort_formats(formats) + +        return dict(self._get_common_fields(webpage), +            id=video_id, +            formats=formats, +            is_live=True, +        ) + +    def _replay(self, video_id, webpage, long_video_id, key):          playinfo = self._download_json(              'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'              % compat_urllib_parse_urlencode({ @@ -62,11 +126,6 @@ class VLiveIE(InfoExtractor):          } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]          self._sort_formats(formats) -        thumbnail = self._og_search_thumbnail(webpage) -        creator = self._html_search_regex( -            r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)', -            webpage, 'creator', fatal=False) -          view_count = int_or_none(playinfo.get('meta', {}).get('count'))          subtitles = {} @@ -77,12 +136,9 @@ class VLiveIE(InfoExtractor):                      'ext': 'vtt',                      'url': caption['source']}] -        return { -            'id': video_id, -            'title': title, -            'creator': creator, -            'thumbnail': thumbnail, -            'view_count': view_count, -            'formats': formats, -            'subtitles': subtitles, -        } +        return dict(self._get_common_fields(webpage), +            id=video_id, +            formats=formats, +            view_count=view_count, +            subtitles=subtitles, +        ) | 
