aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/YoutubeDL.py21
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/aes.py144
-rw-r--r--youtube_dl/extractor/__init__.py6
-rw-r--r--youtube_dl/extractor/aparat.py56
-rw-r--r--youtube_dl/extractor/blinkx.py6
-rw-r--r--youtube_dl/extractor/bliptv.py87
-rw-r--r--youtube_dl/extractor/brightcove.py2
-rw-r--r--youtube_dl/extractor/common.py8
-rw-r--r--youtube_dl/extractor/crunchyroll.py171
-rw-r--r--youtube_dl/extractor/generic.py48
-rw-r--r--youtube_dl/extractor/imdb.py4
-rw-r--r--youtube_dl/extractor/ivi.py154
-rw-r--r--youtube_dl/extractor/mdr.py19
-rw-r--r--youtube_dl/extractor/ooyala.py5
-rw-r--r--youtube_dl/extractor/smotri.py59
-rw-r--r--youtube_dl/extractor/soundcloud.py2
-rw-r--r--youtube_dl/extractor/vbox7.py2
-rw-r--r--youtube_dl/extractor/vimeo.py11
-rw-r--r--youtube_dl/extractor/youtube.py22
-rw-r--r--youtube_dl/utils.py5
-rw-r--r--youtube_dl/version.py2
22 files changed, 699 insertions, 139 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index a93dd41a3..e705c410b 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -183,12 +183,18 @@ class YoutubeDL(object):
width_args = []
else:
width_args = ['-w', str(width)]
- self._fribidi = subprocess.Popen(
- ['fribidi', '-c', 'UTF-8'] + width_args,
+ sp_kwargs = dict(
stdin=subprocess.PIPE,
stdout=slave,
stderr=self._err_file)
- self._fribidi_channel = os.fdopen(master, 'rb')
+ try:
+ self._output_process = subprocess.Popen(
+ ['bidiv'] + width_args, **sp_kwargs
+ )
+ except OSError:
+ self._output_process = subprocess.Popen(
+ ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
@@ -243,14 +249,15 @@ class YoutubeDL(object):
pp.set_downloader(self)
def _bidi_workaround(self, message):
- if not hasattr(self, '_fribidi_channel'):
+ if not hasattr(self, '_output_channel'):
return message
+ assert hasattr(self, '_output_process')
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
- self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
- self._fribidi.stdin.flush()
- res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+ self._output_process.stdin.write((message + u'\n').encode('utf-8'))
+ self._output_process.stdin.flush()
+ res = u''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 6df44020b..c37d28c59 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -56,7 +56,6 @@ from .utils import (
compat_print,
DateRange,
decodeOption,
- determine_ext,
get_term_width,
DownloadError,
get_cachedir,
@@ -195,7 +194,7 @@ def parseOpts(overrideArguments=None):
type=float, default=None, help=optparse.SUPPRESS_HELP)
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
- help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
+ help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
selection.add_option(
@@ -525,7 +524,6 @@ def _real_main(argv=None):
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
- all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls:
compat_print(u' ' + mu)
sys.exit(0)
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 9a0c93fa6..e9c5e2152 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -1,4 +1,4 @@
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
@@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):
return decrypted_data
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data=[]
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+ block += [0]*(BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
def key_expansion(data):
"""
Generate key schedule
@@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
+
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
@@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+
+ return data
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = mix_columns_inv(data)
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
@@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
-MIX_COLUMN_MATRIX = ((2,3,1,1),
- (1,2,3,1),
- (1,1,2,3),
- (3,1,1,2))
+SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
+MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
+ (0x1,0x2,0x3,0x1),
+ (0x1,0x1,0x2,0x3),
+ (0x3,0x1,0x1,0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
+ (0x9,0xE,0xB,0xD),
+ (0xD,0x9,0xE,0xB),
+ (0xB,0xD,0x9,0xE))
+RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
+RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data):
return [SBOX[x] for x in data]
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
def rotate(data):
return data[1:] + [data[0]]
@@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
-def mix_column(data):
+def rijndael_mul(a, b):
+ if(a==0 or b==0):
+ return 0
+ return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+
+def mix_column(data, matrix):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
- addend = data[column]
- if MIX_COLUMN_MATRIX[row][column] in (2,3):
- addend <<= 1
- if addend > 0xff:
- addend &= 0xff
- addend ^= 0x1b
- if MIX_COLUMN_MATRIX[row][column] == 3:
- addend ^= data[column]
- mixed ^= addend & 0xff
+ # xor is (+) and (-)
+ mixed ^= rijndael_mul(data[column], matrix[row][column])
data_mixed.append(mixed)
return data_mixed
-def mix_columns(data):
+def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
- data_mixed += mix_column(column)
+ data_mixed += mix_column(column, matrix)
return data_mixed
+def mix_columns_inv(data):
+ return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+
def shift_rows(data):
data_shifted = []
for column in range(4):
@@ -191,6 +292,13 @@ def shift_rows(data):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
+def shift_rows_inv(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
+ return data_shifted
+
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 7f2f8806e..a39a1e2f4 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,6 +1,7 @@
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
+from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
@@ -32,6 +33,7 @@ from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
+from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
@@ -82,6 +84,10 @@ from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
new file mode 100644
index 000000000..7e93bc4df
--- /dev/null
+++ b/youtube_dl/extractor/aparat.py
@@ -0,0 +1,56 @@
+#coding: utf-8
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ HEADRequest,
+)
+
+
+class AparatIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TEST = {
+ u'url': u'http://www.aparat.com/v/wP8On',
+ u'file': u'wP8On.mp4',
+ u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
+ u'info_dict': {
+ u"title": u"تیم گلکسی 11 - زومیت",
+ },
+ #u'skip': u'Extremely unreliable',
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ # Note: There is an easier-to-parse configuration at
+ # http://www.aparat.com/video/video/config/videohash/%video_id
+ # but the URL in there does not work
+ embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
+ video_id + u'/vt/frame')
+ webpage = self._download_webpage(embed_url, video_id)
+
+ video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+ for i, video_url in enumerate(video_urls):
+ req = HEADRequest(video_url)
+ res = self._request_webpage(
+ req, video_id, note=u'Testing video URL %d' % i, errnote=False)
+ if res:
+ break
+ else:
+ raise ExtractorError(u'No working video URLs found')
+
+ title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
+ thumbnail = self._search_regex(
+ r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py
index 48f16b692..144ce64cc 100644
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -9,7 +9,7 @@ from ..utils import (
class BlinkxIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
+ _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_IE_NAME = u'blinkx'
_TEST = {
@@ -54,6 +54,10 @@ class BlinkxIE(InfoExtractor):
})
elif m['type'] == 'original':
duration = m['d']
+ elif m['type'] == 'youtube':
+ yt_id = m['link']
+ self.to_screen(u'Youtube video detected: %s' % yt_id)
+ return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 5e33a69df..0e63208df 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -70,13 +70,14 @@ class BlipTVIE(InfoExtractor):
info = None
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
+
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
- info = {
+ return {
'id': title,
'url': url,
'uploader': None,
@@ -85,49 +86,47 @@ class BlipTVIE(InfoExtractor):
'ext': ext,
'urlhandle': urlh
}
- if info is None: # Regular URL
- try:
- json_code_bytes = urlh.read()
- json_code = json_code_bytes.decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
-
- try:
- json_data = json.loads(json_code)
- if 'Post' in json_data:
- data = json_data['Post']
- else:
- data = json_data
-
- upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
- if 'additionalMedia' in data:
- formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
- best_format = formats[-1]
- video_url = best_format['url']
- else:
- video_url = data['media']['url']
- umobj = re.match(self._URL_EXT, video_url)
- if umobj is None:
- raise ValueError('Can not determine filename extension')
- ext = umobj.group(1)
-
- info = {
- 'id': compat_str(data['item_id']),
- 'url': video_url,
- 'uploader': data['display_name'],
- 'upload_date': upload_date,
- 'title': data['title'],
- 'ext': ext,
- 'format': data['media']['mimeType'],
- 'thumbnail': data['thumbnailUrl'],
- 'description': data['description'],
- 'player_url': data['embedUrl'],
- 'user_agent': 'iTunes/10.6.1',
- }
- except (ValueError,KeyError) as err:
- raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
-
- return [info]
+
+ try:
+ json_code_bytes = urlh.read()
+ json_code = json_code_bytes.decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
+
+ try:
+ json_data = json.loads(json_code)
+ if 'Post' in json_data:
+ data = json_data['Post']
+ else:
+ data = json_data
+
+ upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
+ if 'additionalMedia' in data:
+ formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
+ best_format = formats[-1]
+ video_url = best_format['url']
+ else:
+ video_url = data['media']['url']
+ umobj = re.match(self._URL_EXT, video_url)
+ if umobj is None:
+ raise ValueError('Can not determine filename extension')
+ ext = umobj.group(1)
+
+ return {
+ 'id': compat_str(data['item_id']),
+ 'url': video_url,
+ 'uploader': data['display_name'],
+ 'upload_date': upload_date,
+ 'title': data['title'],
+ 'ext': ext,
+ 'format': data['media']['mimeType'],
+ 'thumbnail': data['thumbnailUrl'],
+ 'description': data['description'],
+ 'player_url': data['embedUrl'],
+ 'user_agent': 'iTunes/10.6.1',
+ }
+ except (ValueError, KeyError) as err:
+ raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
class BlipTVUserIE(InfoExtractor):
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index b1b7526ca..f7f0041c0 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -26,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
- u'md5': u'8eccab865181d29ec2958f32a6a754f5',
+ u'md5': u'5423e113865d26e40624dce2e4b45d95',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 939249d7b..ba46a7bc7 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -170,6 +170,8 @@ class InfoExtractor(object):
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ if errnote is False:
+ return False
if errnote is None:
errnote = u'Unable to download webpage'
errmsg = u'%s: %s' % (errnote, compat_str(err))
@@ -263,7 +265,8 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None, video_id=None):
+ @staticmethod
+ def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
@@ -272,7 +275,8 @@ class InfoExtractor(object):
if video_id is not None:
video_info['id'] = video_id
return video_info
- def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
new file mode 100644
index 000000000..2b66bddbb
--- /dev/null
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -0,0 +1,171 @@
+# encoding: utf-8
+import re, base64, zlib
+from hashlib import sha1
+from math import pow, sqrt, floor
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_parse,
+ compat_urllib_request,
+ bytes_to_intlist,
+ intlist_to_bytes,
+ unified_strdate,
+ clean_html,
+)
+from ..aes import (
+ aes_cbc_decrypt,
+ inc,
+)
+
+class CrunchyrollIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _TESTS = [{
+ u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ u'file': u'645513.flv',
+ #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
+ u'info_dict': {
+ u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
+ u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
+ u'upload_date': u'20131013',
+ },
+ u'params': {
+ # rtmp
+ u'skip_download': True,
+ },
+ }]
+
+ _FORMAT_IDS = {
+ u'360': (u'60', u'106'),
+ u'480': (u'61', u'106'),
+ u'720': (u'62', u'106'),
+ u'1080': (u'80', u'108'),
+ }
+
+ def _decrypt_subtitles(self, data, iv, id):
+ data = bytes_to_intlist(data)
+ iv = bytes_to_intlist(iv)
+ id = int(id)
+
+ def obfuscate_key_aux(count, modulo, start):
+ output = list(start)
+ for _ in range(count):
+ output.append(output[-1] + output[-2])
+ # cut off start values
+ output = output[2:]
+ output = list(map(lambda x: x % modulo + 33, output))
+ return output
+
+ def obfuscate_key(key):
+ num1 = int(floor(pow(2, 25) * sqrt(6.9)))
+ num2 = (num1 ^ key) << 5
+ num3 = key ^ num1
+ num4 = num3 ^ (num3 >> 3) ^ num2
+ prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
+ # Extend 160 Bit hash to 256 Bit
+ return shaHash + [0] * 12
+
+ key = obfuscate_key(id)
+ class Counter:
+ __value = iv
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+ decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
+ return zlib.decompress(decrypted_data)
+
+ def _convert_subtitles_to_srt(self, subtitles):
+ i=1
+ output = u''
+ for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
+ start = start.replace(u'.', u',')
+ end = end.replace(u'.', u',')
+ text = clean_html(text)
+ text = text.replace(u'\\N', u'\n')
+ if not text:
+ continue
+ output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ i+=1
+ return output
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ webpage_url = u'http://www.' + mobj.group('url')
+ video_id = mobj.group(u'video_id')
+ webpage = self._download_webpage(webpage_url, video_id)
+ note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
+ if note_m:
+ raise ExtractorError(note_m)
+
+ video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
+ video_title = re.sub(r' {2,}', u' ', video_title)
+ video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
+ if not video_description:
+ video_description = None
+ video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
+ if video_upload_date:
+ video_upload_date = unified_strdate(video_upload_date)
+ video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
+
+ playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
+ playerdata_req = compat_urllib_request.Request(playerdata_url)
+ playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
+ playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
+ playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
+
+ stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
+ video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
+
+ formats = []
+ for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
+ stream_quality, stream_format = self._FORMAT_IDS[fmt]
+ video_format = fmt+u'p'
+ streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
+ # urlencode doesn't work!
+ streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
+ streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
+ streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
+ streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
+ video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
+ video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
+ formats.append({
+ u'url': video_url,
+ u'play_path': video_play_path,
+ u'ext': 'flv',
+ u'format': video_format,
+ u'format_id': video_format,
+ })
+
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+ sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
+ video_id, note=u'Downloading subtitles for '+sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
+ if not id or not iv or not data:
+ continue
+ id = int(id)
+ iv = base64.b64decode(iv)
+ data = base64.b64decode(data)
+
+ subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
+ lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
+
+ return {
+ u'id': video_id,
+ u'title': video_title,
+ u'description': video_description,
+ u'thumbnail': video_thumbnail,
+ u'uploader': video_uploader,
+ u'upload_date': video_upload_date,
+ u'subtitles': subtitles,
+ u'formats': formats,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index fd32370c2..7a14c98f9 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -11,12 +11,14 @@ from ..utils import (
compat_urlparse,
ExtractorError,
+ HEADRequest,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
)
from .brightcove import BrightcoveIE
+from .ooyala import OoyalaIE
class GenericIE(InfoExtractor):
@@ -83,7 +85,17 @@ class GenericIE(InfoExtractor):
u'title': u'trailer',
u'upload_date': u'20100513',
}
- }
+ },
+ # ooyala video
+ {
+ u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
+ u'info_dict': {
+ u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ u'ext': u'mp4',
+ u'title': u'2cc213299525360.mov', #that's what we get
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -98,21 +110,18 @@ class GenericIE(InfoExtractor):
def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
- class HeadRequest(compat_urllib_request.Request):
- def get_method(self):
- return "HEAD"
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
"""
Subclass the HTTPRedirectHandler to make it use our
- HeadRequest also on the redirected URL
+ HEADRequest also on the redirected URL
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
if code in (301, 302, 303, 307):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
- return HeadRequest(newurl,
+ return HEADRequest(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
@@ -141,7 +150,7 @@ class GenericIE(InfoExtractor):
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
- response = opener.open(HeadRequest(url))
+ response = opener.open(HEADRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
return response
@@ -213,7 +222,7 @@ class GenericIE(InfoExtractor):
self.to_screen(u'Brightcove video detected.')
return self.url_result(bc_url, 'Brightcove')
- # Look for embedded Vimeo player
+ # Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
if mobj:
@@ -221,9 +230,18 @@ class GenericIE(InfoExtractor):
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
+ # Look for embedded (swf embed) Vimeo player
+ mobj = re.search(
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'Vimeo')
+
# Look for embedded YouTube player
- matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
+ matches = re.findall(r'''(?x)
+ (?:<iframe[^>]+?src=|embedSWF\(\s*)
+ (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?:embed|v)/.+?)
+ \1''', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
@@ -277,6 +295,16 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for Ooyala videos
+ mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+ if mobj is not None:
+ return OoyalaIE._build_url_result(mobj.group(1))
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 6fb373db2..e5332cce8 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -11,7 +11,7 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
- _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+ _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
@@ -27,7 +27,7 @@ class ImdbIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- webpage = self._download_webpage(url,video_id)
+ webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
new file mode 100644
index 000000000..4bdf55f93
--- /dev/null
+++ b/youtube_dl/extractor/ivi.py
@@ -0,0 +1,154 @@
+# encoding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_request,
+ ExtractorError,
+)
+
+
+class IviIE(InfoExtractor):
+ IE_DESC = u'ivi.ru'
+ IE_NAME = u'ivi'
+ _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+
+ _TESTS = [
+ # Single movie
+ {
+ u'url': u'http://www.ivi.ru/watch/53141',
+ u'file': u'53141.mp4',
+ u'md5': u'6ff5be2254e796ed346251d117196cf4',
+ u'info_dict': {
+ u'title': u'Иван Васильевич меняет профессию',
+ u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
+ u'duration': 5498,
+ u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
+ },
+ u'skip': u'Only works from Russia',
+ },
+ # Serial's serie
+ {
+ u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
+ u'file': u'74791.mp4',
+ u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
+ u'info_dict': {
+ u'title': u'Дежурный ангел - 1 серия',
+ u'duration': 2490,
+ u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+ },
+ u'skip': u'Only works from Russia',
+ }
+ ]
+
+ # Sorted by quality
+ _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
+
+ # Sorted by size
+ _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
+
+ def _extract_description(self, html):
+ m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
+ return m.group('description') if m is not None else None
+
+ def _extract_comment_count(self, html):
+ m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
+ return int(m.group('commentcount')) if m is not None else 0
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+
+ api_url = 'http://api.digitalaccess.ru/api/json/'
+
+ data = {u'method': u'da.content.get',
+ u'params': [video_id, {u'site': u's183',
+ u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
+ u'contentid': video_id
+ }
+ ]
+ }
+
+ request = compat_urllib_request.Request(api_url, json.dumps(data))
+
+ video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
+ video_json = json.loads(video_json_page)
+
+ if u'error' in video_json:
+ error = video_json[u'error']
+ if error[u'origin'] == u'NoRedisValidData':
+ raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+ raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
+
+ result = video_json[u'result']
+
+ formats = [{'url': x[u'url'],
+ 'format_id': x[u'content_format']
+ } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
+ formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
+
+ if len(formats) == 0:
+ self._downloader.report_warning(u'No media links available for %s' % video_id)
+ return
+
+ duration = result[u'duration']
+ compilation = result[u'compilation']
+ title = result[u'title']
+
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
+
+ previews = result[u'preview']
+ previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
+ thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
+
+ video_page = self._download_webpage(url, video_id, u'Downloading video page')
+ description = self._extract_description(video_page)
+ comment_count = self._extract_comment_count(video_page)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
+
+
+class IviCompilationIE(InfoExtractor):
+ IE_DESC = u'ivi.ru compilations'
+ IE_NAME = u'ivi:compilation'
+ _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+
+ def _extract_entries(self, html, compilation_id):
+ return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
+ for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ compilation_id = mobj.group('compilationid')
+ season_id = mobj.group('seasonid')
+
+ if season_id is not None: # Season link
+ season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
+ playlist_id = '%s/season%s' % (compilation_id, season_id)
+ playlist_title = self._html_search_meta(u'title', season_page, u'title')
+ entries = self._extract_entries(season_page, compilation_id)
+ else: # Compilation link
+ compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
+ playlist_id = compilation_id
+ playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
+ seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
+ if len(seasons) == 0: # No seasons in this compilation
+ entries = self._extract_entries(compilation_page, compilation_id)
+ else:
+ entries = []
+ for season_id in seasons:
+ season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, u'Downloading season %s web page' % season_id)
+ entries.extend(self._extract_entries(season_page, compilation_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_title) \ No newline at end of file
diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py
index d29cf2c07..08ce0647f 100644
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -8,23 +8,8 @@ from ..utils import (
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
-
- _TESTS = [{
- u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
- u'file': u'165624.mp4',
- u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
- u'info_dict': {
- u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
- },
- },
- {
- u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
- u'file': u'718370.mp3',
- u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
- u'info_dict': {
- u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
- },
- }]
+
+ # No tests, MDR regularily deletes its videos
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index 1f7b4d2e7..d08e47734 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -22,6 +22,11 @@ class OoyalaIE(InfoExtractor):
def _url_for_embed_code(embed_code):
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+ @classmethod
+ def _build_url_result(cls, embed_code):
+ return cls.url_result(cls._url_for_embed_code(embed_code),
+ ie=cls.ie_key())
+
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 4ea89bf85..beea58d63 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -1,5 +1,6 @@
# encoding: utf-8
+import os.path
import re
import json
import hashlib
@@ -10,6 +11,7 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
+ url_basename,
)
@@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
-
+
+ # Warning if video is unavailable
+ warning = self._html_search_regex(
+ r'<div class="videoUnModer">(.*?)</div>', video_page,
+ u'warning messagef', default=None)
+ if warning is not None:
+ self._downloader.report_warning(
+ u'Video %s may not be available; smotri said: %s ' %
+ (video_id, warning))
+
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
@@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
- video_title = video_url.rsplit('/', 1)[-1]
+ video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
- if video_description.endswith(END_TEXT):
+ if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
- if video_description.startswith(START_TEXT):
+ if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
- upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
- video_upload_date = (
- (
- upload_date_m.group('year') +
- upload_date_m.group('month') +
- upload_date_m.group('day')
+ if upload_date_str:
+ upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
+ video_upload_date = (
+ (
+ upload_date_m.group('year') +
+ upload_date_m.group('month') +
+ upload_date_m.group('day')
+ )
+ if upload_date_m else None
)
- if upload_date_m else None
- )
+ else:
+ video_upload_date = None
duration_str = self._search_meta(u'duration', video_page)
- duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
- video_duration = (
- (
- (int(duration_m.group('hours')) * 60 * 60) +
- (int(duration_m.group('minutes')) * 60) +
- int(duration_m.group('seconds'))
+ if duration_str:
+ duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
+ video_duration = (
+ (
+ (int(duration_m.group('hours')) * 60 * 60) +
+ (int(duration_m.group('minutes')) * 60) +
+ int(duration_m.group('seconds'))
+ )
+ if duration_m else None
)
- if duration_m else None
- )
+ else:
+ video_duration = None
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
@@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor):
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
- 'video_duration': video_duration,
+ 'duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index cbba4094b..e22ff9c38 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -24,7 +24,7 @@ class SoundcloudIE(InfoExtractor):
"""
_VALID_URL = r'''^(?:https?://)?
- (?:(?:(?:www\.)?soundcloud\.com/
+ (?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index 4f803bcd3..5a136a952 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -15,7 +15,7 @@ class Vbox7IE(InfoExtractor):
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
- u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
+ u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index ea4409528..c3623fcbe 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -16,11 +16,20 @@ from ..utils import (
unsmuggle_url,
)
+
class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?://)?
+ (?:(?:www|(?P<player>player))\.)?
+ vimeo(?P<pro>pro)?\.com/
+ (?:.*?/)?
+ (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
+ (?:videos?/)?
+ (?P<id>[0-9]+)
+ /?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 58d274970..9fb07b366 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1666,7 +1666,7 @@ class YoutubeUserIE(InfoExtractor):
# page by page until there are no video ids - it means we got
# all of them.
- video_ids = []
+ url_results = []
for pagenum in itertools.count(0):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@@ -1684,10 +1684,17 @@ class YoutubeUserIE(InfoExtractor):
break
# Extract video identifiers
- ids_in_page = []
- for entry in response['feed']['entry']:
- ids_in_page.append(entry['id']['$t'].split('/')[-1])
- video_ids.extend(ids_in_page)
+ entries = response['feed']['entry']
+ for entry in entries:
+ title = entry['title']['$t']
+ video_id = entry['id']['$t'].split('/')[-1]
+ url_results.append({
+ '_type': 'url',
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ 'id': 'video_id',
+ 'title': title,
+ })
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
@@ -1695,12 +1702,9 @@ class YoutubeUserIE(InfoExtractor):
# are no more ids on further pages - no need to query
# again.
- if len(ids_in_page) < self._GDATA_PAGE_SIZE:
+ if len(entries) < self._GDATA_PAGE_SIZE:
break
- url_results = [
- self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index cc391bddd..2e48f187e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1093,3 +1093,8 @@ def remove_start(s, start):
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip(u'/').split(u'/')[-1]
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return "HEAD"
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 7cbee7335..80b722608 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.12.17.2'
+__version__ = '2013.12.23.2'