diff options
| -rw-r--r-- | test/test_utils.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/xminus.py | 54 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 51 | 
3 files changed, 95 insertions, 20 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 8307599b3..9dc879e0d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -47,6 +47,7 @@ from youtube_dl.utils import (      js_to_json,      intlist_to_bytes,      args_to_str, +    parse_filesize,  ) @@ -367,5 +368,14 @@ class TestUtil(unittest.TestCase):              'foo ba/r -baz \'2 be\' \'\''          ) +    def test_parse_filesize(self): +        self.assertEqual(parse_filesize(None), None) +        self.assertEqual(parse_filesize(''), None) +        self.assertEqual(parse_filesize('91 B'), 91) +        self.assertEqual(parse_filesize('foobar'), None) +        self.assertEqual(parse_filesize('2 MiB'), 2097152) +        self.assertEqual(parse_filesize('5 GB'), 5000000000) +        self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index c92c8451a..a3fd8f48c 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -2,7 +2,14 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import ( +    compat_chr, +    compat_ord, +) +from ..utils import ( +    int_or_none, +    parse_filesize, +)  class XMinusIE(InfoExtractor): @@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor):              'ext': 'mp3',              'title': 'Леонид Агутин-Песенка шофера',              'duration': 156, +            'tbr': 320, +            'filesize_approx': 5900000, +            'view_count': int,          }      }      def _real_extract(self, url):          video_id = self._match_id(url) - -        # TODO more code goes here, for example ...          webpage = self._download_webpage(url, video_id) +          artist = self._html_search_regex( -            r'minus_track.artist="(.+?)"', webpage, 'artist') +            r'minus_track\.artist="(.+?)"', webpage, 'artist')          title = artist + '-' + self._html_search_regex( -            r'minus_track.title="(.+?)"', webpage, 'title') +            r'minus_track\.title="(.+?)"', webpage, 'title')          duration = int_or_none(self._html_search_regex( -            r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration')) +            r'minus_track\.dur_sec=\'([0-9]*?)\'', +            webpage, 'duration', fatal=False)) +        filesize_approx = parse_filesize(self._html_search_regex( +            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])', +            webpage, 'approximate filesize', fatal=False)) +        tbr = int_or_none(self._html_search_regex( +            r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps', +            webpage, 'bitrate', fatal=False)) +        view_count = int_or_none(self._html_search_regex( +            r'<div class="quality.*?► ([0-9]+)', +            webpage, 'view count', fatal=False)) +          enc_token = self._html_search_regex(              r'data-mt="(.*?)"', webpage, 'enc_token') -        token = self._decode_token(enc_token) -        url = 'http://x-minus.org/dwlf/{}/{}.mp3'.format(video_id, token) +        token = ''.join( +            c if pos == 3 else compat_chr(compat_ord(c) - 1) +            for pos, c in enumerate(reversed(enc_token))) +        video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)          return {              'id': video_id,              'title': title, -            'url': url, +            'url': video_url,              'duration': duration, +            'filesize_approx': filesize_approx, +            'tbr': tbr, +            'view_count': view_count,          } - -    def _decode_token(self, enc_token): -        token = '' -        pos = 0 -        for c in reversed(enc_token): -            if pos != 3: -                token += chr(ord(c) - 1) -            else: -                token += c -            pos += 1 -        return token diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dd1023eef..f9b5f9867 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1046,6 +1046,57 @@ def format_bytes(bytes):      return '%.2f%s' % (converted, suffix) +def parse_filesize(s): +    if s is None: +        return None + +    # The lower-case forms are of course incorrect and inofficial, +    # but we support those too +    _UNIT_TABLE = { +        'B': 1, +        'b': 1, +        'KiB': 1024, +        'KB': 1000, +        'kB': 1024, +        'Kb': 1000, +        'MiB': 1024 ** 2, +        'MB': 1000 ** 2, +        'mB': 1024 ** 2, +        'Mb': 1000 ** 2, +        'GiB': 1024 ** 3, +        'GB': 1000 ** 3, +        'gB': 1024 ** 3, +        'Gb': 1000 ** 3, +        'TiB': 1024 ** 4, +        'TB': 1000 ** 4, +        'tB': 1024 ** 4, +        'Tb': 1000 ** 4, +        'PiB': 1024 ** 5, +        'PB': 1000 ** 5, +        'pB': 1024 ** 5, +        'Pb': 1000 ** 5, +        'EiB': 1024 ** 6, +        'EB': 1000 ** 6, +        'eB': 1024 ** 6, +        'Eb': 1000 ** 6, +        'ZiB': 1024 ** 7, +        'ZB': 1000 ** 7, +        'zB': 1024 ** 7, +        'Zb': 1000 ** 7, +        'YiB': 1024 ** 8, +        'YB': 1000 ** 8, +        'yB': 1024 ** 8, +        'Yb': 1000 ** 8, +    } + +    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) +    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s) +    if not m: +        return None + +    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) + +  def get_term_width():      columns = compat_getenv('COLUMNS', None)      if columns: | 
