diff options
| -rw-r--r-- | README.md | 3 | ||||
| -rw-r--r-- | test/test_playlists.py | 21 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 21 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/aes.py | 144 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/aparat.py | 56 | ||||
| -rw-r--r-- | youtube_dl/extractor/blinkx.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/bliptv.py | 87 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 171 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 48 | ||||
| -rw-r--r-- | youtube_dl/extractor/imdb.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/ivi.py | 154 | ||||
| -rw-r--r-- | youtube_dl/extractor/mdr.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/ooyala.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/smotri.py | 59 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vbox7.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 22 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 5 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
24 files changed, 721 insertions, 141 deletions
| @@ -39,7 +39,8 @@ which means you can modify it, redistribute it or use it however you like.                                 /youtube-dl .      --no-cache-dir             Disable filesystem caching      --bidi-workaround          Work around terminals that lack bidirectional -                               text support. Requires fribidi executable in PATH +                               text support. Requires bidiv or fribidi +                               executable in PATH  ## Video Selection:      --playlist-start NUMBER    playlist video to start at (default is 1) diff --git a/test/test_playlists.py b/test/test_playlists.py index 5004d0464..1b7b4e3d8 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -27,7 +27,8 @@ from youtube_dl.extractor import (      BambuserChannelIE,      BandcampAlbumIE,      SmotriCommunityIE, -    SmotriUserIE +    SmotriUserIE, +    IviCompilationIE  ) @@ -168,6 +169,24 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], u'Building Dynamic Websites')          self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")          self.assertEqual(len(result['entries']), 10) +         +    def test_ivi_compilation(self): +        dl = FakeYDL() +        ie = IviCompilationIE(dl) +        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'dezhurnyi_angel') +        self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)') +        self.assertTrue(len(result['entries']) >= 36) +         +    def test_ivi_compilation_season(self): +        dl = FakeYDL() +        ie = IviCompilationIE(dl) +        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'dezhurnyi_angel/season2') +        self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') +        self.assertTrue(len(result['entries']) >= 20)  if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a93dd41a3..e705c410b 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -183,12 +183,18 @@ class YoutubeDL(object):                      width_args = []                  else:                      width_args = ['-w', str(width)] -                self._fribidi = subprocess.Popen( -                    ['fribidi', '-c', 'UTF-8'] + width_args, +                sp_kwargs = dict(                      stdin=subprocess.PIPE,                      stdout=slave,                      stderr=self._err_file) -                self._fribidi_channel = os.fdopen(master, 'rb') +                try: +                    self._output_process = subprocess.Popen( +                        ['bidiv'] + width_args, **sp_kwargs +                    ) +                except OSError: +                    self._output_process = subprocess.Popen( +                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) +                self._output_channel = os.fdopen(master, 'rb')              except OSError as ose:                  if ose.errno == 2:                      self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.') @@ -243,14 +249,15 @@ class YoutubeDL(object):          pp.set_downloader(self)      def _bidi_workaround(self, message): -        if not hasattr(self, '_fribidi_channel'): +        if not hasattr(self, '_output_channel'):              return message +        assert hasattr(self, '_output_process')          assert type(message) == type(u'')          line_count = message.count(u'\n') + 1 -        self._fribidi.stdin.write((message + u'\n').encode('utf-8')) -        self._fribidi.stdin.flush() -        res = u''.join(self._fribidi_channel.readline().decode('utf-8') +        self._output_process.stdin.write((message + u'\n').encode('utf-8')) +        self._output_process.stdin.flush() +        res = u''.join(self._output_channel.readline().decode('utf-8')                         for _ in range(line_count))          return res[:-len(u'\n')] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6df44020b..c37d28c59 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -56,7 +56,6 @@ from .utils import (      compat_print,      DateRange,      decodeOption, -    determine_ext,      get_term_width,      DownloadError,      get_cachedir, @@ -195,7 +194,7 @@ def parseOpts(overrideArguments=None):          type=float, default=None, help=optparse.SUPPRESS_HELP)      general.add_option(          '--bidi-workaround', dest='bidi_workaround', action='store_true', -        help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH') +        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')      selection.add_option( @@ -525,7 +524,6 @@ def _real_main(argv=None):          for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):              compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))              matchedUrls = [url for url in all_urls if ie.suitable(url)] -            all_urls = [url for url in all_urls if url not in matchedUrls]              for mu in matchedUrls:                  compat_print(u'  ' + mu)          sys.exit(0) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 9a0c93fa6..e9c5e2152 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -1,4 +1,4 @@ -__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] +__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']  import base64  from math import ceil @@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):      return decrypted_data +def aes_cbc_decrypt(data, key, iv): +    """ +    Decrypt with aes in CBC mode +     +    @param {int[]} data        cipher +    @param {int[]} key         16/24/32-Byte cipher key +    @param {int[]} iv          16-Byte IV +    @returns {int[]}           decrypted data +    """ +    expanded_key = key_expansion(key) +    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) +     +    decrypted_data=[] +    previous_cipher_block = iv +    for i in range(block_count): +        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] +        block += [0]*(BLOCK_SIZE_BYTES - len(block)) +         +        decrypted_block = aes_decrypt(block, expanded_key) +        decrypted_data += xor(decrypted_block, previous_cipher_block) +        previous_cipher_block = block +    decrypted_data = decrypted_data[:len(data)] +     +    return decrypted_data +  def key_expansion(data):      """      Generate key schedule @@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):      @returns {int[]}             16-Byte cipher      """      rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 -     +      data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])      for i in range(1, rounds+1):          data = sub_bytes(data) @@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):          if i != rounds:              data = mix_columns(data)          data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) + +    return data + +def aes_decrypt(data, expanded_key): +    """ +    Decrypt one block with aes +     +    @param {int[]} data          16-Byte cipher +    @param {int[]} expanded_key  176/208/240-Byte expanded key +    @returns {int[]}             16-Byte state +    """ +    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 +     +    for i in range(rounds, 0, -1): +        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) +        if i != rounds: +            data = mix_columns_inv(data) +        data = shift_rows_inv(data) +        data = sub_bytes_inv(data) +    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])      return data @@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,          0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,          0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,          0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) -MIX_COLUMN_MATRIX = ((2,3,1,1), -                     (1,2,3,1), -                     (1,1,2,3), -                     (3,1,1,2)) +SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, +            0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, +            0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, +            0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, +            0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, +            0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, +            0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, +            0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, +            0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, +            0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, +            0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, +            0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, +            0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, +            0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, +            0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, +            0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) +MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1), +                     (0x1,0x2,0x3,0x1), +                     (0x1,0x1,0x2,0x3), +                     (0x3,0x1,0x1,0x2)) +MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9), +                         (0x9,0xE,0xB,0xD), +                         (0xD,0x9,0xE,0xB), +                         (0xB,0xD,0x9,0xE)) +RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, +                      0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, +                      0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, +                      0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, +                      0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, +                      0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, +                      0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, +                      0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, +                      0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, +                      0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, +                      0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, +                      0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, +                      0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, +                      0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, +                      0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, +                      0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) +RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, +                      0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, +                      0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, +                      0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, +                      0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, +                      0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, +                      0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, +                      0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, +                      0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, +                      0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, +                      0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, +                      0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, +                      0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, +                      0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, +                      0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, +                      0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)  def sub_bytes(data):      return [SBOX[x] for x in data] +def sub_bytes_inv(data): +    return [SBOX_INV[x] for x in data] +  def rotate(data):      return data[1:] + [data[0]] @@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):  def xor(data1, data2):      return [x^y for x, y in zip(data1, data2)] -def mix_column(data): +def rijndael_mul(a, b): +    if(a==0 or b==0): +        return 0 +    return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] + +def mix_column(data, matrix):      data_mixed = []      for row in range(4):          mixed = 0          for column in range(4): -            addend = data[column] -            if MIX_COLUMN_MATRIX[row][column] in (2,3): -                addend <<= 1 -                if addend > 0xff: -                    addend &= 0xff -                    addend ^= 0x1b -                if MIX_COLUMN_MATRIX[row][column] == 3: -                    addend ^= data[column] -            mixed ^= addend & 0xff +            # xor is (+) and (-) +            mixed ^= rijndael_mul(data[column], matrix[row][column])          data_mixed.append(mixed)      return data_mixed -def mix_columns(data): +def mix_columns(data, matrix=MIX_COLUMN_MATRIX):      data_mixed = []      for i in range(4):          column = data[i*4 : (i+1)*4] -        data_mixed += mix_column(column) +        data_mixed += mix_column(column, matrix)      return data_mixed +def mix_columns_inv(data): +    return mix_columns(data, MIX_COLUMN_MATRIX_INV) +  def shift_rows(data):      data_shifted = []      for column in range(4): @@ -191,6 +292,13 @@ def shift_rows(data):              data_shifted.append( data[((column + row) & 0b11) * 4 + row] )      return data_shifted +def shift_rows_inv(data): +    data_shifted = [] +    for column in range(4): +        for row in range(4): +            data_shifted.append( data[((column - row) & 0b11) * 4 + row] ) +    return data_shifted +  def inc(data):      data = data[:] # copy      for i in range(len(data)-1,-1,-1): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f2f8806e..a39a1e2f4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,6 +1,7 @@  from .academicearth import AcademicEarthCourseIE  from .addanime import AddAnimeIE  from .anitube import AnitubeIE +from .aparat import AparatIE  from .appletrailers import AppleTrailersIE  from .archiveorg import ArchiveOrgIE  from .ard import ARDIE @@ -32,6 +33,7 @@ from .collegehumor import CollegeHumorIE  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE  from .condenast import CondeNastIE  from .criterion import CriterionIE +from .crunchyroll import CrunchyrollIE  from .cspan import CSpanIE  from .d8 import D8IE  from .dailymotion import ( @@ -82,6 +84,10 @@ from .ina import InaIE  from .infoq import InfoQIE  from .instagram import InstagramIE  from .internetvideoarchive import InternetVideoArchiveIE +from .ivi import ( +    IviIE, +    IviCompilationIE +)  from .jeuxvideo import JeuxVideoIE  from .jukebox import JukeboxIE  from .justintv import JustinTVIE diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py new file mode 100644 index 000000000..7e93bc4df --- /dev/null +++ b/youtube_dl/extractor/aparat.py @@ -0,0 +1,56 @@ +#coding: utf-8 + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    HEADRequest, +) + + +class AparatIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' + +    _TEST = { +        u'url': u'http://www.aparat.com/v/wP8On', +        u'file': u'wP8On.mp4', +        u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1', +        u'info_dict': { +            u"title": u"تیم گلکسی 11 - زومیت", +        }, +        #u'skip': u'Extremely unreliable', +    } + +    def _real_extract(self, url): +        m = re.match(self._VALID_URL, url) +        video_id = m.group('id') + +        # Note: There is an easier-to-parse configuration at +        # http://www.aparat.com/video/video/config/videohash/%video_id +        # but the URL in there does not work +        embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' + +                     video_id + u'/vt/frame') +        webpage = self._download_webpage(embed_url, video_id) + +        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) +        for i, video_url in enumerate(video_urls): +            req = HEADRequest(video_url) +            res = self._request_webpage( +                req, video_id, note=u'Testing video URL %d' % i, errnote=False) +            if res: +                break +        else: +            raise ExtractorError(u'No working video URLs found') + +        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title') +        thumbnail = self._search_regex( +            r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +            'ext': 'mp4', +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 48f16b692..144ce64cc 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -9,7 +9,7 @@ from ..utils import (  class BlinkxIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)' +    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'      _IE_NAME = u'blinkx'      _TEST = { @@ -54,6 +54,10 @@ class BlinkxIE(InfoExtractor):                  })              elif m['type'] == 'original':                  duration = m['d'] +            elif m['type'] == 'youtube': +                yt_id = m['link'] +                self.to_screen(u'Youtube video detected: %s' % yt_id) +                return self.url_result(yt_id, 'Youtube', video_id=yt_id)              elif m['type'] in ('flv', 'mp4'):                  vcodec = remove_start(m['vcodec'], 'ff')                  acodec = remove_start(m['acodec'], 'ff') diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 5e33a69df..0e63208df 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -70,13 +70,14 @@ class BlipTVIE(InfoExtractor):          info = None          urlh = self._request_webpage(request, None, False,              u'unable to download video info webpage') +          if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download              basename = url.split('/')[-1]              title,ext = os.path.splitext(basename)              title = title.decode('UTF-8')              ext = ext.replace('.', '')              self.report_direct_download(title) -            info = { +            return {                  'id': title,                  'url': url,                  'uploader': None, @@ -85,49 +86,47 @@ class BlipTVIE(InfoExtractor):                  'ext': ext,                  'urlhandle': urlh              } -        if info is None: # Regular URL -            try: -                json_code_bytes = urlh.read() -                json_code = json_code_bytes.decode('utf-8') -            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) - -            try: -                json_data = json.loads(json_code) -                if 'Post' in json_data: -                    data = json_data['Post'] -                else: -                    data = json_data - -                upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') -                if 'additionalMedia' in data: -                    formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) -                    best_format = formats[-1] -                    video_url = best_format['url'] -                else: -                    video_url = data['media']['url'] -                umobj = re.match(self._URL_EXT, video_url) -                if umobj is None: -                    raise ValueError('Can not determine filename extension') -                ext = umobj.group(1) - -                info = { -                    'id': compat_str(data['item_id']), -                    'url': video_url, -                    'uploader': data['display_name'], -                    'upload_date': upload_date, -                    'title': data['title'], -                    'ext': ext, -                    'format': data['media']['mimeType'], -                    'thumbnail': data['thumbnailUrl'], -                    'description': data['description'], -                    'player_url': data['embedUrl'], -                    'user_agent': 'iTunes/10.6.1', -                } -            except (ValueError,KeyError) as err: -                raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) - -        return [info] + +        try: +            json_code_bytes = urlh.read() +            json_code = json_code_bytes.decode('utf-8') +        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) + +        try: +            json_data = json.loads(json_code) +            if 'Post' in json_data: +                data = json_data['Post'] +            else: +                data = json_data + +            upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') +            if 'additionalMedia' in data: +                formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) +                best_format = formats[-1] +                video_url = best_format['url'] +            else: +                video_url = data['media']['url'] +            umobj = re.match(self._URL_EXT, video_url) +            if umobj is None: +                raise ValueError('Can not determine filename extension') +            ext = umobj.group(1) + +            return { +                'id': compat_str(data['item_id']), +                'url': video_url, +                'uploader': data['display_name'], +                'upload_date': upload_date, +                'title': data['title'], +                'ext': ext, +                'format': data['media']['mimeType'], +                'thumbnail': data['thumbnailUrl'], +                'description': data['description'], +                'player_url': data['embedUrl'], +                'user_agent': 'iTunes/10.6.1', +            } +        except (ValueError, KeyError) as err: +            raise ExtractorError(u'Unable to parse video information: %s' % repr(err))  class BlipTVUserIE(InfoExtractor): diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index b1b7526ca..f7f0041c0 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -26,7 +26,7 @@ class BrightcoveIE(InfoExtractor):              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/              u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',              u'file': u'2371591881001.mp4', -            u'md5': u'8eccab865181d29ec2958f32a6a754f5', +            u'md5': u'5423e113865d26e40624dce2e4b45d95',              u'note': u'Test Brightcove downloads and detection in GenericIE',              u'info_dict': {                  u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 939249d7b..ba46a7bc7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -170,6 +170,8 @@ class InfoExtractor(object):          try:              return self._downloader.urlopen(url_or_request)          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            if errnote is False: +                return False              if errnote is None:                  errnote = u'Unable to download webpage'              errmsg = u'%s: %s' % (errnote, compat_str(err)) @@ -263,7 +265,8 @@ class InfoExtractor(object):          self.to_screen(u'Logging in')      #Methods for following #608 -    def url_result(self, url, ie=None, video_id=None): +    @staticmethod +    def url_result(url, ie=None, video_id=None):          """Returns a url that points to a page that should be processed"""          #TODO: ie should be the class used for getting the info          video_info = {'_type': 'url', @@ -272,7 +275,8 @@ class InfoExtractor(object):          if video_id is not None:              video_info['id'] = video_id          return video_info -    def playlist_result(self, entries, playlist_id=None, playlist_title=None): +    @staticmethod +    def playlist_result(entries, playlist_id=None, playlist_title=None):          """Returns a playlist"""          video_info = {'_type': 'playlist',                        'entries': entries} diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py new file mode 100644 index 000000000..2b66bddbb --- /dev/null +++ b/youtube_dl/extractor/crunchyroll.py @@ -0,0 +1,171 @@ +# encoding: utf-8 +import re, base64, zlib +from hashlib import sha1 +from math import pow, sqrt, floor +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    compat_urllib_parse, +    compat_urllib_request, +    bytes_to_intlist, +    intlist_to_bytes, +    unified_strdate, +    clean_html, +) +from ..aes import ( +    aes_cbc_decrypt, +    inc, +) + +class CrunchyrollIE(InfoExtractor): +    _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)' +    _TESTS = [{ +        u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', +        u'file': u'645513.flv', +        #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412', +        u'info_dict': { +            u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', +            u'description': u'md5:2d17137920c64f2f49981a7797d275ef', +            u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', +            u'uploader': u'Yomiuri Telecasting Corporation (YTV)', +            u'upload_date': u'20131013', +        }, +        u'params': { +            # rtmp +            u'skip_download': True, +        }, +    }] + +    _FORMAT_IDS = { +        u'360': (u'60', u'106'), +        u'480': (u'61', u'106'), +        u'720': (u'62', u'106'), +        u'1080': (u'80', u'108'), +    } + +    def _decrypt_subtitles(self, data, iv, id): +        data = bytes_to_intlist(data) +        iv = bytes_to_intlist(iv) +        id = int(id) + +        def obfuscate_key_aux(count, modulo, start): +            output = list(start) +            for _ in range(count): +                output.append(output[-1] + output[-2]) +            # cut off start values +            output = output[2:] +            output = list(map(lambda x: x % modulo + 33, output)) +            return output + +        def obfuscate_key(key): +            num1 = int(floor(pow(2, 25) * sqrt(6.9))) +            num2 = (num1 ^ key) << 5 +            num3 = key ^ num1 +            num4 = num3 ^ (num3 >> 3) ^ num2 +            prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) +            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest()) +            # Extend 160 Bit hash to 256 Bit +            return shaHash + [0] * 12 +         +        key = obfuscate_key(id) +        class Counter: +            __value = iv +            def next_value(self): +                temp = self.__value +                self.__value = inc(self.__value) +                return temp +        decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) +        return zlib.decompress(decrypted_data) + +    def _convert_subtitles_to_srt(self, subtitles): +        i=1 +        output = u'' +        for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): +            start = start.replace(u'.', u',') +            end = end.replace(u'.', u',') +            text = clean_html(text) +            text = text.replace(u'\\N', u'\n') +            if not text: +                continue +            output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text) +            i+=1 +        return output + +    def _real_extract(self,url): +        mobj = re.match(self._VALID_URL, url) + +        webpage_url = u'http://www.' + mobj.group('url') +        video_id = mobj.group(u'video_id') +        webpage = self._download_webpage(webpage_url, video_id) +        note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'') +        if note_m: +            raise ExtractorError(note_m) + +        video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL) +        video_title = re.sub(r' {2,}', u' ', video_title) +        video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'') +        if not video_description: +            video_description = None +        video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL) +        if video_upload_date: +            video_upload_date = unified_strdate(video_upload_date) +        video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL) + +        playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url')) +        playerdata_req = compat_urllib_request.Request(playerdata_url) +        playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url}) +        playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded') +        playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info') +         +        stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id') +        video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False) + +        formats = [] +        for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): +            stream_quality, stream_format = self._FORMAT_IDS[fmt] +            video_format = fmt+u'p' +            streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/') +            # urlencode doesn't work! +            streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format +            streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded') +            streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data))) +            streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format) +            video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url') +            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path') +            formats.append({ +                u'url': video_url, +                u'play_path':   video_play_path, +                u'ext': 'flv', +                u'format': video_format, +                u'format_id': video_format, +            }) + +        subtitles = {} +        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): +            sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ +                                              video_id, note=u'Downloading subtitles for '+sub_name) +            id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False) +            iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False) +            data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False) +            if not id or not iv or not data: +                continue +            id = int(id) +            iv = base64.b64decode(iv) +            data = base64.b64decode(data) + +            subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8') +            lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False) +            if not lang_code: +                continue +            subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) + +        return { +            u'id':          video_id, +            u'title':       video_title, +            u'description': video_description, +            u'thumbnail':   video_thumbnail, +            u'uploader':    video_uploader, +            u'upload_date': video_upload_date, +            u'subtitles':   subtitles, +            u'formats':     formats, +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index fd32370c2..7a14c98f9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,12 +11,14 @@ from ..utils import (      compat_urlparse,      ExtractorError, +    HEADRequest,      smuggle_url,      unescapeHTML,      unified_strdate,      url_basename,  )  from .brightcove import BrightcoveIE +from .ooyala import OoyalaIE  class GenericIE(InfoExtractor): @@ -83,7 +85,17 @@ class GenericIE(InfoExtractor):                  u'title': u'trailer',                  u'upload_date': u'20100513',              } -        } +        }, +        # ooyala video +        { +            u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', +            u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c', +            u'info_dict': { +                u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', +                u'ext': u'mp4', +                u'title': u'2cc213299525360.mov', #that's what we get +            }, +        },      ]      def report_download_webpage(self, video_id): @@ -98,21 +110,18 @@ class GenericIE(InfoExtractor):      def _send_head(self, url):          """Check if it is a redirect, like url shorteners, in case return the new url.""" -        class HeadRequest(compat_urllib_request.Request): -            def get_method(self): -                return "HEAD"          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):              """              Subclass the HTTPRedirectHandler to make it use our -            HeadRequest also on the redirected URL +            HEADRequest also on the redirected URL              """              def redirect_request(self, req, fp, code, msg, headers, newurl):                  if code in (301, 302, 303, 307):                      newurl = newurl.replace(' ', '%20')                      newheaders = dict((k,v) for k,v in req.headers.items()                                        if k.lower() not in ("content-length", "content-type")) -                    return HeadRequest(newurl, +                    return HEADRequest(newurl,                                         headers=newheaders,                                         origin_req_host=req.get_origin_req_host(),                                         unverifiable=True) @@ -141,7 +150,7 @@ class GenericIE(InfoExtractor):                          compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:              opener.add_handler(handler()) -        response = opener.open(HeadRequest(url)) +        response = opener.open(HEADRequest(url))          if response is None:              raise ExtractorError(u'Invalid URL protocol')          return response @@ -213,7 +222,7 @@ class GenericIE(InfoExtractor):              self.to_screen(u'Brightcove video detected.')              return self.url_result(bc_url, 'Brightcove') -        # Look for embedded Vimeo player +        # Look for embedded (iframe) Vimeo player          mobj = re.search(              r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)          if mobj: @@ -221,9 +230,18 @@ class GenericIE(InfoExtractor):              surl = smuggle_url(player_url, {'Referer': url})              return self.url_result(surl, 'Vimeo') +        # Look for embedded (swf embed) Vimeo player +        mobj = re.search( +            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage) +        if mobj: +            return self.url_result(mobj.group(1), 'Vimeo') +          # Look for embedded YouTube player -        matches = re.findall( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) +        matches = re.findall(r'''(?x) +            (?:<iframe[^>]+?src=|embedSWF\(\s*) +            (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ +                (?:embed|v)/.+?) +            \1''', webpage)          if matches:              urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')                       for tuppl in matches] @@ -277,6 +295,16 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url')) +        # Look for Ooyala videos +        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage) +        if mobj is not None: +            return OoyalaIE._build_url_result(mobj.group(1)) + +        # Look for Aparat videos +        mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage) +        if mobj is not None: +            return self.url_result(mobj.group(1), 'Aparat') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 6fb373db2..e5332cce8 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -11,7 +11,7 @@ from ..utils import (  class ImdbIE(InfoExtractor):      IE_NAME = u'imdb'      IE_DESC = u'Internet Movie Database trailers' -    _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)' +    _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'      _TEST = {          u'url': u'http://www.imdb.com/video/imdb/vi2524815897', @@ -27,7 +27,7 @@ class ImdbIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        webpage = self._download_webpage(url,video_id) +        webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)          descr = get_element_by_attribute('itemprop', 'description', webpage)          available_formats = re.findall(              r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage, diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py new file mode 100644 index 000000000..4bdf55f93 --- /dev/null +++ b/youtube_dl/extractor/ivi.py @@ -0,0 +1,154 @@ +# encoding: utf-8 + +import re +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_request, +    ExtractorError, +) + + +class IviIE(InfoExtractor): +    IE_DESC = u'ivi.ru' +    IE_NAME = u'ivi' +    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' + +    _TESTS = [ +        # Single movie +        { +            u'url': u'http://www.ivi.ru/watch/53141', +            u'file': u'53141.mp4', +            u'md5': u'6ff5be2254e796ed346251d117196cf4', +            u'info_dict': { +                u'title': u'Иван Васильевич меняет профессию', +                u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346', +                u'duration': 5498, +                u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', +            }, +            u'skip': u'Only works from Russia', +        }, +        # Serial's serie +        { +            u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791', +            u'file': u'74791.mp4', +            u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9', +            u'info_dict': { +                u'title': u'Дежурный ангел - 1 серия', +                u'duration': 2490, +                u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', +            }, +            u'skip': u'Only works from Russia', +         } +    ] +     +    # Sorted by quality +    _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] + +    # Sorted by size +    _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] + +    def _extract_description(self, html): +        m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) +        return m.group('description') if m is not None else None + +    def _extract_comment_count(self, html): +        m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) +        return int(m.group('commentcount')) if m is not None else 0 + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('videoid') + +        api_url = 'http://api.digitalaccess.ru/api/json/' + +        data = {u'method': u'da.content.get', +                u'params': [video_id, {u'site': u's183', +                                       u'referrer': u'http://www.ivi.ru/watch/%s' % video_id, +                                       u'contentid': video_id +                                    } +                            ] +                } + +        request = compat_urllib_request.Request(api_url, json.dumps(data)) + +        video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON') +        video_json = json.loads(video_json_page) + +        if u'error' in video_json: +            error = video_json[u'error'] +            if error[u'origin'] == u'NoRedisValidData': +                raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) +            raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True) + +        result = video_json[u'result'] + +        formats = [{'url': x[u'url'], +                    'format_id': x[u'content_format'] +                    } for x in result[u'files'] if x[u'content_format'] in self._known_formats] +        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) + +        if len(formats) == 0: +            self._downloader.report_warning(u'No media links available for %s' % video_id) +            return + +        duration = result[u'duration'] +        compilation = result[u'compilation'] +        title = result[u'title'] + +        title = '%s - %s' % (compilation, title) if compilation is not None else title   + +        previews = result[u'preview'] +        previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) +        thumbnail = previews[-1][u'url'] if len(previews) > 0 else None + +        video_page = self._download_webpage(url, video_id, u'Downloading video page') +        description = self._extract_description(video_page) +        comment_count = self._extract_comment_count(video_page) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'description': description, +            'duration': duration, +            'comment_count': comment_count, +            'formats': formats, +        } + + +class IviCompilationIE(InfoExtractor): +    IE_DESC = u'ivi.ru compilations' +    IE_NAME = u'ivi:compilation' +    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' + +    def _extract_entries(self, html, compilation_id): +        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') +                for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        compilation_id = mobj.group('compilationid') +        season_id = mobj.group('seasonid') + +        if season_id is not None: # Season link +            season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id) +            playlist_id = '%s/season%s' % (compilation_id, season_id) +            playlist_title = self._html_search_meta(u'title', season_page, u'title') +            entries = self._extract_entries(season_page, compilation_id) +        else: # Compilation link             +            compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page') +            playlist_id = compilation_id +            playlist_title = self._html_search_meta(u'title', compilation_page, u'title') +            seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) +            if len(seasons) == 0: # No seasons in this compilation +                entries = self._extract_entries(compilation_page, compilation_id) +            else: +                entries = [] +                for season_id in seasons: +                    season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), +                                                         compilation_id, u'Downloading season %s web page' % season_id) +                    entries.extend(self._extract_entries(season_page, compilation_id)) + +        return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index d29cf2c07..08ce0647f 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -8,23 +8,8 @@ from ..utils import (  class MDRIE(InfoExtractor):      _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' - -    _TESTS = [{ -        u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html', -        u'file': u'165624.mp4', -        u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815', -        u'info_dict': { -            u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr" -        }, -    }, -    { -        u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html', -        u'file': u'718370.mp3', -        u'md5': u'a9d21345a234c7b45dee612f290fd8d7', -        u'info_dict': { -            u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr" -        }, -    }] +     +    # No tests, MDR regularily deletes its videos      def _real_extract(self, url):          m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 1f7b4d2e7..d08e47734 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -22,6 +22,11 @@ class OoyalaIE(InfoExtractor):      def _url_for_embed_code(embed_code):          return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code +    @classmethod +    def _build_url_result(cls, embed_code): +        return cls.url_result(cls._url_for_embed_code(embed_code), +            ie=cls.ie_key()) +      def _extract_result(self, info, more_info):          return {'id': info['embedCode'],                  'ext': 'mp4', diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 4ea89bf85..beea58d63 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,5 +1,6 @@  # encoding: utf-8 +import os.path  import re  import json  import hashlib @@ -10,6 +11,7 @@ from ..utils import (      compat_urllib_parse,      compat_urllib_request,      ExtractorError, +    url_basename,  ) @@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):          # We will extract some from the video web page instead          video_page_url = 'http://' + mobj.group('url')          video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') -         + +        # Warning if video is unavailable +        warning = self._html_search_regex( +            r'<div class="videoUnModer">(.*?)</div>', video_page, +            u'warning messagef', default=None) +        if warning is not None: +            self._downloader.report_warning( +                u'Video %s may not be available; smotri said: %s ' % +                (video_id, warning)) +          # Adult content          if re.search(u'EroConfirmText">', video_page) is not None:              self.report_age_confirmation() @@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):          # Extract the rest of meta data          video_title = self._search_meta(u'name', video_page, u'title')          if not video_title: -            video_title = video_url.rsplit('/', 1)[-1] +            video_title = os.path.splitext(url_basename(video_url))[0]          video_description = self._search_meta(u'description', video_page)          END_TEXT = u' на сайте Smotri.com' -        if video_description.endswith(END_TEXT): +        if video_description and video_description.endswith(END_TEXT):              video_description = video_description[:-len(END_TEXT)]          START_TEXT = u'Смотреть онлайн ролик ' -        if video_description.startswith(START_TEXT): +        if video_description and video_description.startswith(START_TEXT):              video_description = video_description[len(START_TEXT):]          video_thumbnail = self._search_meta(u'thumbnail', video_page)          upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') -        upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) -        video_upload_date = ( -            ( -                upload_date_m.group('year') + -                upload_date_m.group('month') + -                upload_date_m.group('day') +        if upload_date_str: +            upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) +            video_upload_date = ( +                ( +                    upload_date_m.group('year') + +                    upload_date_m.group('month') + +                    upload_date_m.group('day') +                ) +                if upload_date_m else None              ) -            if upload_date_m else None -        ) +        else: +            video_upload_date = None          duration_str = self._search_meta(u'duration', video_page) -        duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) -        video_duration = ( -            ( -                (int(duration_m.group('hours')) * 60 * 60) + -                (int(duration_m.group('minutes')) * 60) + -                int(duration_m.group('seconds')) +        if duration_str: +            duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) +            video_duration = ( +                ( +                    (int(duration_m.group('hours')) * 60 * 60) + +                    (int(duration_m.group('minutes')) * 60) + +                    int(duration_m.group('seconds')) +                ) +                if duration_m else None              ) -            if duration_m else None -        ) +        else: +            video_duration = None          video_uploader = self._html_search_regex(              u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', @@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor):              'uploader': video_uploader,              'upload_date': video_upload_date,              'uploader_id': video_uploader_id, -            'video_duration': video_duration, +            'duration': video_duration,              'view_count': video_view_count,              'age_limit': 18 if adult_content else 0,              'video_page_url': video_page_url diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index cbba4094b..e22ff9c38 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -24,7 +24,7 @@ class SoundcloudIE(InfoExtractor):       """      _VALID_URL = r'''^(?:https?://)? -                    (?:(?:(?:www\.)?soundcloud\.com/ +                    (?:(?:(?:www\.|m\.)?soundcloud\.com/                              (?P<uploader>[\w\d-]+)/                              (?!sets/)(?P<title>[\w\d-]+)/?                              (?P<token>[^?]+?)?(?:[?].*)?$) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 4f803bcd3..5a136a952 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -15,7 +15,7 @@ class Vbox7IE(InfoExtractor):      _TEST = {          u'url': u'http://vbox7.com/play:249bb972c2',          u'file': u'249bb972c2.flv', -        u'md5': u'9c70d6d956f888bdc08c124acc120cfe', +        u'md5': u'99f65c0c9ef9b682b97313e052734c3f',          u'info_dict': {              u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"          } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ea4409528..c3623fcbe 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,11 +16,20 @@ from ..utils import (      unsmuggle_url,  ) +  class VimeoIE(InfoExtractor):      """Information extractor for vimeo.com."""      # _VALID_URL matches Vimeo URLs -    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' +    _VALID_URL = r'''(?x) +        (?P<proto>https?://)? +        (?:(?:www|(?P<player>player))\.)? +        vimeo(?P<pro>pro)?\.com/ +        (?:.*?/)? +        (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? +        (?:videos?/)? +        (?P<id>[0-9]+) +        /?(?:[?&].*)?(?:[#].*)?$'''      _NETRC_MACHINE = 'vimeo'      IE_NAME = u'vimeo'      _TESTS = [ diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 58d274970..9fb07b366 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1666,7 +1666,7 @@ class YoutubeUserIE(InfoExtractor):          # page by page until there are no video ids - it means we got          # all of them. -        video_ids = [] +        url_results = []          for pagenum in itertools.count(0):              start_index = pagenum * self._GDATA_PAGE_SIZE + 1 @@ -1684,10 +1684,17 @@ class YoutubeUserIE(InfoExtractor):                  break              # Extract video identifiers -            ids_in_page = [] -            for entry in response['feed']['entry']: -                ids_in_page.append(entry['id']['$t'].split('/')[-1]) -            video_ids.extend(ids_in_page) +            entries = response['feed']['entry'] +            for entry in entries: +                title = entry['title']['$t'] +                video_id = entry['id']['$t'].split('/')[-1] +                url_results.append({ +                    '_type': 'url', +                    'url': video_id, +                    'ie_key': 'Youtube', +                    'id': 'video_id', +                    'title': title, +                })              # A little optimization - if current page is not              # "full", ie. does not contain PAGE_SIZE video ids then @@ -1695,12 +1702,9 @@ class YoutubeUserIE(InfoExtractor):              # are no more ids on further pages - no need to query              # again. -            if len(ids_in_page) < self._GDATA_PAGE_SIZE: +            if len(entries) < self._GDATA_PAGE_SIZE:                  break -        url_results = [ -            self.url_result(video_id, 'Youtube', video_id=video_id) -            for video_id in video_ids]          return self.playlist_result(url_results, playlist_title=username) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cc391bddd..2e48f187e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1093,3 +1093,8 @@ def remove_start(s, start):  def url_basename(url):      path = compat_urlparse.urlparse(url).path      return path.strip(u'/').split(u'/')[-1] + + +class HEADRequest(compat_urllib_request.Request): +    def get_method(self): +        return "HEAD" diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7cbee7335..80b722608 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.17.2' +__version__ = '2013.12.23.2' | 
