aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--devscripts/bash-completion.in6
-rw-r--r--devscripts/youtube_genalgo.py8
-rw-r--r--test/test_download.py7
-rw-r--r--youtube_dl/PostProcessor.py4
-rw-r--r--youtube_dl/YoutubeDL.py18
-rw-r--r--youtube_dl/__init__.py19
-rw-r--r--youtube_dl/aes.py202
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/addanime.py1
-rw-r--r--youtube_dl/extractor/appletrailers.py1
-rw-r--r--youtube_dl/extractor/common.py14
-rw-r--r--youtube_dl/extractor/generic.py12
-rw-r--r--youtube_dl/extractor/ign.py8
-rw-r--r--youtube_dl/extractor/kankan.py6
-rw-r--r--youtube_dl/extractor/mit.py74
-rw-r--r--youtube_dl/extractor/orf.py67
-rw-r--r--youtube_dl/extractor/sohu.py90
-rw-r--r--youtube_dl/extractor/trilulilu.py3
-rw-r--r--youtube_dl/extractor/unistra.py2
-rw-r--r--youtube_dl/extractor/wat.py1
-rw-r--r--youtube_dl/extractor/youporn.py18
-rw-r--r--youtube_dl/extractor/youtube.py8
-rw-r--r--youtube_dl/utils.py37
-rw-r--r--youtube_dl/version.py2
24 files changed, 557 insertions, 54 deletions
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
index 3b99a9614..bd10f63c2 100644
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -4,8 +4,12 @@ __youtube-dl()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="{{flags}}"
+ keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
- if [[ ${cur} == * ]] ; then
+ if [[ ${cur} =~ : ]]; then
+ COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+ return 0
+ elif [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0
fi
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 917e8f79d..97a0d7290 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -20,15 +20,15 @@ tests = [
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
- # 86 - vflh9ybst 2013/08/23
+ # 86 - vflHOr_nV 2013/08/30
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
+ "?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
- # 84 - vflh9ybst 2013/08/23 (sporadic)
+ # 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
- "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
+ ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
diff --git a/test/test_download.py b/test/test_download.py
index 21cb2e694..23a66254d 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -127,12 +127,11 @@ def generator(test_case):
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
- self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+ got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
- self.assertEqual(
- expected, got,
- u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+ self.assertEqual(expected, got,
+ u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index c02ed7148..ae56d2082 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.message)
+ raise AudioConversionError(err.msg)
def run(self, information):
path = information['filepath']
@@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
- msg = u'audio conversion failed: ' + e.message
+ msg = u'audio conversion failed: ' + e.msg
else:
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index d5f7c81eb..b289bd9e2 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -97,6 +97,7 @@ class YoutubeDL(object):
def __init__(self, params):
"""Create a FileDownloader object with the given options."""
self._ies = []
+ self._ies_instances = {}
self._pps = []
self._progress_hooks = []
self._download_retcode = 0
@@ -111,8 +112,21 @@ class YoutubeDL(object):
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
+ self._ies_instances[ie.ie_key()] = ie
ie.set_downloader(self)
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
def add_default_info_extractors(self):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ class YoutubeDL(object):
'''
if ie_key:
- ie = get_info_extractor(ie_key)()
- ie.set_downloader(self)
- ies = [ie]
+ ies = [self.get_info_extractor(ie_key)]
else:
ies = self._ies
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index bc6a6d180..431460c57 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -45,6 +45,7 @@ import sys
import warnings
import platform
+
from .utils import *
from .update import update_self
from .version import __version__
@@ -99,6 +100,16 @@ def parseOpts(overrideArguments=None):
pass
return None
+ def _hide_login_info(opts):
+ opts = list(opts)
+ for private_opt in ['-p', '--password', '-u', '--username']:
+ try:
+ i = opts.index(private_opt)
+ opts[i+1] = '<PRIVATE>'
+ except ValueError:
+ pass
+ return opts
+
max_width = 80
max_help_position = 80
@@ -357,9 +368,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
- sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
- sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
- sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+ sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+ sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+ sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
@@ -611,7 +622,7 @@ def _real_main(argv=None):
sys.exc_clear()
except:
pass
- sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+ sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
new file mode 100644
index 000000000..9a0c93fa6
--- /dev/null
+++ b/youtube_dl/aes.py
@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
+ returns the next counter block
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data=[]
+ for i in range(block_count):
+ counter_block = counter.next_value()
+ block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+ block += [0]*(BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ decrypted_data += xor(block, cipher_counter_block)
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds+1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = mix_columns(data)
+ data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+
+ return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(base64.b64decode(data))
+ password = bytes_to_intlist(password.encode('utf-8'))
+
+ key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ class Counter:
+ __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+
+ decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+ plaintext = intlist_to_bytes(decrypted_data)
+
+ return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+ (1,2,3,1),
+ (1,1,2,3),
+ (3,1,1,2))
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+def xor(data1, data2):
+ return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+ data_mixed = []
+ for row in range(4):
+ mixed = 0
+ for column in range(4):
+ addend = data[column]
+ if MIX_COLUMN_MATRIX[row][column] in (2,3):
+ addend <<= 1
+ if addend > 0xff:
+ addend &= 0xff
+ addend ^= 0x1b
+ if MIX_COLUMN_MATRIX[row][column] == 3:
+ addend ^= data[column]
+ mixed ^= addend & 0xff
+ data_mixed.append(mixed)
+ return data_mixed
+
+def mix_columns(data):
+ data_mixed = []
+ for i in range(4):
+ column = data[i*4 : (i+1)*4]
+ data_mixed += mix_column(column)
+ return data_mixed
+
+def shift_rows(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+ return data_shifted
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data)-1,-1,-1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index c76b99a81..90f1a4418 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -50,6 +50,7 @@ from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
+from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
@@ -58,6 +59,7 @@ from .myvideo import MyVideoIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ooyala import OoyalaIE
+from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE
@@ -69,6 +71,7 @@ from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
+from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index 46db8262f..82a785a19 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -1,4 +1,3 @@
-import ast
import re
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index b3bdb2955..8b191c196 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -4,7 +4,6 @@ import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
- ExtractorError,
)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 12169b2bb..77726ee24 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -114,6 +114,11 @@ class InfoExtractor(object):
"""Real extraction process. Redefine in subclasses."""
pass
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return cls.__name__[:-2]
+
@property
def IE_NAME(self):
return type(self).__name__[:-2]
@@ -140,12 +145,17 @@ class InfoExtractor(object):
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
else:
- encoding = 'utf-8'
- webpage_bytes = urlh.read()
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ else:
+ encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index bfc9bff49..dc4dea4ad 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_error,
compat_urllib_parse,
- compat_urllib_parse_urlparse,
compat_urllib_request,
+ compat_urlparse,
ExtractorError,
)
@@ -163,15 +163,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1))
- if video_url.startswith('//'):
- video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
- if '://' not in video_url:
- up = compat_urllib_parse_urlparse(url)
- if video_url.startswith('/'):
- video_url = up.scheme + '://' + up.netloc + video_url
- else: # relative path
- video_url = (up.scheme + '://' + up.netloc +
- up.path.rpartition('/')[0] + '/' + video_url)
+ video_url = compat_urlparse.urljoin(url, video_url)
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py
index 62abab655..b1c84278a 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported
"""
- _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
+ _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id')
+ page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id)
+ if page_type == 'articles':
+ video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+ return self.url_result(video_url, ie='IGN')
video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
- _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+ _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index 8537ba584..445d46501 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
- gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+ title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+ surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+ gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+ gcid = gcids[-1]
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info')
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
new file mode 100644
index 000000000..52be9232f
--- /dev/null
+++ b/youtube_dl/extractor/mit.py
@@ -0,0 +1,74 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+ IE_NAME = u'techtv.mit.edu'
+ _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+
+ _TEST = {
+ u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+ u'file': u'25418.mp4',
+ u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+ u'info_dict': {
+ u'title': u'MIT DNA Learning Center Set',
+ u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ raw_page = self._download_webpage(
+ 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+ clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
+
+ base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+ raw_page, u'base url')
+ formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
+ u'video formats')
+ formats = json.loads(formats_json)
+ formats = sorted(formats, key=lambda f: f['bitrate'])
+
+ title = get_element_by_id('edit-title', clean_page)
+ description = clean_html(get_element_by_id('edit-description', clean_page))
+ thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+ raw_page, u'thumbnail', flags=re.DOTALL)
+
+ return {'id': video_id,
+ 'title': title,
+ 'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+ 'ext': 'mp4',
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class MITIE(TechTVMITIE):
+ IE_NAME = u'video.mit.edu'
+ _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
+
+ _TEST = {
+ u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+ u'file': u'21783.mp4',
+ u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+ u'info_dict': {
+ u'title': u'The Government is Profiling You',
+ u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+ embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+ u'embed url')
+ return self.url_result(embed_url, ie='TechTVMIT')
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
new file mode 100644
index 000000000..41ef8e992
--- /dev/null
+++ b/youtube_dl/extractor/orf.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ ExtractorError,
+ find_xpath_attr,
+)
+
+class ORFIE(InfoExtractor):
+ _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+
+ _TEST = {
+ u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
+ u'file': u'6566957.flv',
+ u'info_dict': {
+ u'title': u'Wetter',
+ u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
+ },
+ u'params': {
+ # It uses rtmp
+ u'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ webpage = self._download_webpage(url, playlist_id)
+
+ flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
+ flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
+ flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
+ playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
+ playlist = json.loads(playlist_json)
+
+ videos = []
+ ns = '{http://tempuri.org/XMLSchema.xsd}'
+ xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
+ webpage_description = self._og_search_description(webpage)
+ for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
+ # Get best quality url
+ rtmp_url = None
+ for q in ['Q6A', 'Q4A', 'Q1A']:
+ video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
+ if video_url is not None:
+ rtmp_url = video_url.text
+ break
+ if rtmp_url is None:
+ raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
+ description = self._html_search_regex(
+ r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
+ u'description', default=webpage_description, flags=re.DOTALL)
+ videos.append({
+ '_type': 'video',
+ 'id': info['id'],
+ 'title': info['title'],
+ 'url': rtmp_url,
+ 'ext': 'flv',
+ 'description': description,
+ })
+
+ return videos
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
new file mode 100644
index 000000000..77bb0a8dc
--- /dev/null
+++ b/youtube_dl/extractor/sohu.py
@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SohuIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
+
+ _TEST = {
+ u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
+ u'file': u'382479172.mp4',
+ u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
+ u'info_dict': {
+ u'title': u'MV:Far East Movement《The Illest》',
+ },
+ }
+
+ def _real_extract(self, url):
+
+ def _fetch_data(vid_id):
+ base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ data_url = base_data_url + str(vid_id)
+ data_json = self._download_webpage(
+ data_url, video_id,
+ note=u'Downloading JSON data for ' + str(vid_id))
+ return json.loads(data_json)
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
+ webpage, u'video title')
+ title = raw_title.partition('-')[0].strip()
+
+ vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+ u'video path')
+ data = _fetch_data(vid)
+
+ QUALITIES = ('ori', 'super', 'high', 'nor')
+ vid_ids = [data['data'][q + 'Vid']
+ for q in QUALITIES
+ if data['data'][q + 'Vid'] != 0]
+ if not vid_ids:
+ raise ExtractorError(u'No formats available for this video')
+
+ # For now, we just pick the highest available quality
+ vid_id = vid_ids[-1]
+
+ format_data = data if vid == vid_id else _fetch_data(vid_id)
+ part_count = format_data['data']['totalBlocks']
+ allot = format_data['allot']
+ prot = format_data['prot']
+ clipsURL = format_data['data']['clipsURL']
+ su = format_data['data']['su']
+
+ playlist = []
+ for i in range(part_count):
+ part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
+ (allot, prot, clipsURL[i], su[i]))
+ part_str = self._download_webpage(
+ part_url, video_id,
+ note=u'Downloading part %d of %d' % (i+1, part_count))
+
+ part_info = part_str.split('|')
+ video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+ video_info = {
+ 'id': '%s_part%02d' % (video_id, i + 1),
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'mp4',
+ }
+ playlist.append(video_info)
+
+ if len(playlist) == 1:
+ info = playlist[0]
+ info['id'] = video_id
+ else:
+ info = {
+ '_type': 'playlist',
+ 'entries': playlist,
+ 'id': video_id,
+ }
+
+ return info
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index 1c46156c7..f278951ba 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -3,9 +3,6 @@ import re
import xml.etree.ElementTree
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
class TriluliluIE(InfoExtractor):
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py
index 5ba0a9061..516e18914 100644
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': {
u'title': u'M!ss Yella',
- u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
+ u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
},
}
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py
index 7d228edac..29c25f0e3 100644
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -6,7 +6,6 @@ import re
from .common import InfoExtractor
from ..utils import (
- compat_urllib_parse,
unified_strdate,
)
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index d1156bf42..c85fd4b5a 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -12,14 +12,16 @@ from ..utils import (
unescapeHTML,
unified_strdate,
)
-
+from ..aes import (
+ aes_decrypt_text
+)
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
- u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
+ u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html)
- if(len(links) == 0):
+
+ # Get link of hd video if available
+ mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
+ if mobj != None:
+ encrypted_video_url = mobj.group(u'encrypted_video_url')
+ video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
+ links = [video_url] + links
+
+ if not links:
raise ExtractorError(u'ERROR: no known formats available for video')
self.to_screen(u'Links found: %d' % len(links))
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
self._print_formats(formats)
return
- req_format = self._downloader.params.get('format', None)
+ req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format)
if req_format is None or req_format == 'best':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 8e486afd0..810ce6f5d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -335,7 +335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
- u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+ u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
@@ -423,11 +423,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
- return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
+ return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]
elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84:
- return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
+ return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82:
@@ -1161,7 +1161,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
+ _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True
def _real_extract(self, url):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index be788cf5a..201802cee 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,19 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import datetime
+import email.utils
import errno
import gzip
import io
import json
import locale
import os
+import platform
import re
+import socket
import sys
import traceback
import zlib
-import email.utils
-import socket
-import datetime
try:
import urllib.request as compat_urllib_request
@@ -212,7 +213,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key)
- assert re.match(r'^[a-zA-Z@\s]*$', val)
+ assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr)
else:
@@ -732,3 +733,31 @@ class DateRange(object):
return self.start <= date <= self.end
def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+ """ Returns the platform name as a compat_str """
+ res = platform.platform()
+ if isinstance(res, bytes):
+ res = res.decode(preferredencoding())
+
+ assert isinstance(res, compat_str)
+ return res
+
+
+def bytes_to_intlist(bs):
+ if not bs:
+ return []
+ if isinstance(bs[0], int): # Python 3
+ return list(bs)
+ else:
+ return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b''
+ if isinstance(chr(0), bytes): # Python 2
+ return ''.join([chr(x) for x in xs])
+ else:
+ return bytes(xs)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 0b56e48dc..b6284c6d6 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.08.28'
+__version__ = '2013.08.30'