diff options
Diffstat (limited to 'youtube_dl/extractor/mailru.py')
| -rw-r--r-- | youtube_dl/extractor/mailru.py | 155 | 
1 files changed, 155 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 6b7c5e3e0..6b0e64b7f 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -1,12 +1,17 @@  # coding: utf-8  from __future__ import unicode_literals +import itertools +import json  import re  from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote  from ..utils import (      int_or_none, +    parse_duration,      remove_end, +    try_get,  ) @@ -157,3 +162,153 @@ class MailRuIE(InfoExtractor):              'view_count': view_count,              'formats': formats,          } + + +class MailRuMusicSearchBaseIE(InfoExtractor): +    def _search(self, query, url, audio_id, limit=100, offset=0): +        search = self._download_json( +            'https://my.mail.ru/cgi-bin/my/ajax', audio_id, +            'Downloading songs JSON page %d' % (offset // limit + 1), +            headers={ +                'Referer': url, +                'X-Requested-With': 'XMLHttpRequest', +            }, query={ +                'xemail': '', +                'ajax_call': '1', +                'func_name': 'music.search', +                'mna': '', +                'mnb': '', +                'arg_query': query, +                'arg_extended': '1', +                'arg_search_params': json.dumps({ +                    'music': { +                        'limit': limit, +                        'offset': offset, +                    }, +                }), +                'arg_limit': limit, +                'arg_offset': offset, +            }) +        return next(e for e in search if isinstance(e, dict)) + +    @staticmethod +    def _extract_track(t, fatal=True): +        audio_url = t['URL'] if fatal else t.get('URL') +        if not audio_url: +            return + +        audio_id = t['File'] if fatal else t.get('File') +        if not audio_id: +            return + +        thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover') +        uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML') +        uploader_id = t.get('UploaderID') +        duration = int_or_none(t.get('DurationInSeconds')) or parse_duration( +            t.get('Duration') or t.get('DurationStr')) +        view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr')) + +        track = t.get('Name') or t.get('Name_Text_HTML') +        artist = t.get('Author') or t.get('Author_Text_HTML') + +        if track: +            title = '%s - %s' % (artist, track) if artist else track +        else: +            title = audio_id + +        return { +            'extractor_key': MailRuMusicIE.ie_key(), +            'id': audio_id, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'duration': duration, +            'view_count': view_count, +            'vcodec': 'none', +            'abr': int_or_none(t.get('BitRate')), +            'track': track, +            'artist': artist, +            'album': t.get('Album'), +            'url': audio_url, +        } + + +class MailRuMusicIE(MailRuMusicSearchBaseIE): +    IE_NAME = 'mailru:music' +    IE_DESC = 'Музыка@Mail.Ru' +    _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)' +    _TESTS = [{ +        'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', +        'md5': '0f8c22ef8c5d665b13ac709e63025610', +        'info_dict': { +            'id': '4e31f7125d0dfaef505d947642366893', +            'ext': 'mp3', +            'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ', +            'uploader': 'Игорь Мудрый', +            'uploader_id': '1459196328', +            'duration': 280, +            'view_count': int, +            'vcodec': 'none', +            'abr': 320, +            'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017', +            'artist': 'М8Л8ТХ', +        }, +    }] + +    def _real_extract(self, url): +        audio_id = self._match_id(url) + +        webpage = self._download_webpage(url, audio_id) + +        title = self._og_search_title(webpage) +        music_data = self._search(title, url, audio_id)['MusicData'] +        t = next(t for t in music_data if t.get('File') == audio_id) + +        info = self._extract_track(t) +        info['title'] = title +        return info + + +class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): +    IE_NAME = 'mailru:music:search' +    IE_DESC = 'Музыка@Mail.Ru' +    _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://my.mail.ru/music/search/black%20shadow', +        'info_dict': { +            'id': 'black shadow', +        }, +        'playlist_mincount': 532, +    }] + +    def _real_extract(self, url): +        query = compat_urllib_parse_unquote(self._match_id(url)) + +        entries = [] + +        LIMIT = 100 +        offset = 0 + +        for _ in itertools.count(1): +            search = self._search(query, url, query, LIMIT, offset) + +            music_data = search.get('MusicData') +            if not music_data or not isinstance(music_data, list): +                break + +            for t in music_data: +                track = self._extract_track(t, fatal=False) +                if track: +                    entries.append(track) + +            total = try_get( +                search, lambda x: x['Results']['music']['Total'], int) + +            if total is not None: +                if offset > total: +                    break + +            offset += LIMIT + +        return self.playlist_result(entries, query) | 
