diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-03-03 01:24:36 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-03-03 01:24:36 +0700 | 
| commit | 4c780fbd0ab1938a0fa98af6a66cfe5f08c3cda7 (patch) | |
| tree | 4becfb8e971a4df74fe954f7d871d0cfab1fbe70 | |
| parent | 7773a92800900413af0846a3da580749483be346 (diff) | |
[yapfiles] Add extractor (closes #15726, refs #11085)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/yapfiles.py | 101 | 
3 files changed, 120 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a0bb773e1..46ca7bec3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1369,6 +1369,7 @@ from .yandexmusic import (      YandexMusicPlaylistIE,  )  from .yandexdisk import YandexDiskIE +from .yapfiles import YapFilesIE  from .yesjapan import YesJapanIE  from .yinyuetai import YinYueTaiIE  from .ynet import YnetIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b66e623f8..bc350bcf7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -102,6 +102,7 @@ from .channel9 import Channel9IE  from .vshare import VShareIE  from .mediasite import MediasiteIE  from .springboardplatform import SpringboardPlatformIE +from .yapfiles import YapFilesIE  class GenericIE(InfoExtractor): @@ -1970,6 +1971,18 @@ class GenericIE(InfoExtractor):              'params': {                  'skip_download': True,              }, +        }, +        { +            'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html', +            'info_dict': { +                'id': 'vMDE4NzI1Mjgt690b', +                'ext': 'mp4', +                'title': 'Котята', +            }, +            'add_ie': [YapFilesIE.ie_key()], +            'params': { +                'skip_download': True, +            },          }          # {          #     # TODO: find another test @@ -2947,6 +2960,11 @@ class GenericIE(InfoExtractor):                  springboardplatform_urls, video_id, video_title,                  ie=SpringboardPlatformIE.ie_key()) +        yapfiles_urls = YapFilesIE._extract_urls(webpage) +        if yapfiles_urls: +            return self.playlist_from_matches( +                yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key()) +          def merge_dicts(dict1, dict2):              merged = {}              for k, v in dict1.items(): diff --git a/youtube_dl/extractor/yapfiles.py b/youtube_dl/extractor/yapfiles.py new file mode 100644 index 000000000..7fafbf596 --- /dev/null +++ b/youtube_dl/extractor/yapfiles.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    ExtractorError, +    int_or_none, +    qualities, +    unescapeHTML, +) + + +class YapFilesIE(InfoExtractor): +    _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)' +    _VALID_URL = r'https?:%s' % _YAPFILES_URL +    _TESTS = [{ +        # with hd +        'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413', +        'md5': '2db19e2bfa2450568868548a1aa1956c', +        'info_dict': { +            'id': 'vMDE1NjcyNDUt0413', +            'ext': 'mp4', +            'title': 'Самый худший пароль WIFI', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 72, +        }, +    }, { +        # without hd +        'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return [unescapeHTML(mobj.group('url')) for mobj in re.finditer( +            r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1' +            % YapFilesIE._YAPFILES_URL, webpage)] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id, fatal=False) + +        player_url = None +        query = {} +        if webpage: +            player_url = self._search_regex( +                r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, +                'player url', default=None, group='url') + +        if not player_url: +            player_url = 'http://api.yapfiles.ru/load/%s/' % video_id +            query = { +                'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff', +                'type': 'json', +                'ref': url, +            } + +        player = self._download_json( +            player_url, video_id, query=query)['player'] + +        playlist_url = player['playlist'] +        title = player['title'] +        thumbnail = player.get('poster') + +        if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''): +            raise ExtractorError( +                'Video %s has been removed' % video_id, expected=True) + +        playlist = self._download_json( +            playlist_url, video_id)['player']['main'] + +        hd_height = int_or_none(player.get('hd')) + +        QUALITIES = ('sd', 'hd') +        quality_key = qualities(QUALITIES) +        formats = [] +        for format_id in QUALITIES: +            is_hd = format_id == 'hd' +            format_url = playlist.get( +                'file%s' % ('_hd' if is_hd else '')) +            if not format_url or not isinstance(format_url, compat_str): +                continue +            formats.append({ +                'url': format_url, +                'format_id': format_id, +                'quality': quality_key(format_id), +                'height': hd_height if is_hd else None, +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'duration': int_or_none(player.get('length')), +            'formats': formats, +        }  | 
