diff options
| -rw-r--r-- | youtube_dl/extractor/chirbit.py | 54 | 
1 files changed, 30 insertions, 24 deletions
| diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b1eeaf101..b43518652 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -1,30 +1,33 @@  # coding: utf-8  from __future__ import unicode_literals +import base64 +  from .common import InfoExtractor -from ..utils import ( -    parse_duration, -    int_or_none, -) +from ..utils import parse_duration  class ChirbitIE(InfoExtractor):      IE_NAME = 'chirbit'      _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'      _TESTS = [{ -        'url': 'http://chirb.it/PrIPv5', -        'md5': '9847b0dad6ac3e074568bf2cfb197de8', +        'url': 'http://chirb.it/be2abG',          'info_dict': { -            'id': 'PrIPv5', +            'id': 'be2abG',              'ext': 'mp3', -            'title': 'Фасадстрой', -            'duration': 52, -            'view_count': int, -            'comment_count': int, +            'title': 'md5:f542ea253f5255240be4da375c6a5d7e', +            'description': 'md5:f24a4e22a71763e32da5fed59e47c770', +            'duration': 306, +        }, +        'params': { +            'skip_download': True,          }      }, {          'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',          'only_matching': True, +    }, { +        'url': 'https://chirb.it/wp/MN58c2', +        'only_matching': True,      }]      def _real_extract(self, url): @@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor):          webpage = self._download_webpage(              'http://chirb.it/%s' % audio_id, audio_id) -        audio_url = self._search_regex( -            r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') +        data_fd = self._search_regex( +            r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1', +            webpage, 'data fd', group='url') + +        # Reverse engineered from https://chirb.it/js/chirbit.player.js (look +        # for soundURL) +        audio_url = base64.b64decode( +            data_fd[::-1].encode('ascii')).decode('utf-8')          title = self._search_regex( -            r'itemprop="name">([^<]+)', webpage, 'title') -        duration = parse_duration(self._html_search_meta( -            'duration', webpage, 'duration', fatal=False)) -        view_count = int_or_none(self._search_regex( -            r'itemprop="playCount"\s*>(\d+)', webpage, -            'listen count', fatal=False)) -        comment_count = int_or_none(self._search_regex( -            r'>(\d+) Comments?:', webpage, -            'comment count', fatal=False)) +            r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') +        description = self._search_regex( +            r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>', +            webpage, 'description', default=None) +        duration = parse_duration(self._search_regex( +            r'class=["\']c-length["\'][^>]*>([^<]+)', +            webpage, 'duration', fatal=False))          return {              'id': audio_id,              'url': audio_url,              'title': title, +            'description': description,              'duration': duration, -            'view_count': view_count, -            'comment_count': comment_count,          } | 
