aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/YoutubeDL.py8
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/depositfiles.py60
-rw-r--r--youtube_dl/extractor/generic.py4
-rw-r--r--youtube_dl/extractor/vevo.py2
-rw-r--r--youtube_dl/extractor/vgtrk.py (renamed from youtube_dl/extractor/vesti.py)10
-rw-r--r--youtube_dl/utils.py11
7 files changed, 22 insertions, 76 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 523eba3c1..1db27315c 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1167,7 +1167,7 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
- return self._opener.open(req)
+ return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):
if not self.params.get('verbose'):
@@ -1198,7 +1198,7 @@ class YoutubeDL(object):
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
- timeout = 600 if timeout_val is None else float(timeout_val)
+ self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
@@ -1236,7 +1236,3 @@ class YoutubeDL(object):
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
-
- # TODO remove this global modification
- compat_urllib_request.install_opener(opener)
- socket.setdefaulttimeout(timeout)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 4a14a2be6..49617a5e0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -53,7 +53,6 @@ from .dailymotion import (
DailymotionUserIE,
)
from .daum import DaumIE
-from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
@@ -248,8 +247,8 @@ from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
-from .vesti import VestiIE
from .vevo import VevoIE
+from .vgtrk import VGTRKIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
diff --git a/youtube_dl/extractor/depositfiles.py b/youtube_dl/extractor/depositfiles.py
deleted file mode 100644
index 2c9fb5f2e..000000000
--- a/youtube_dl/extractor/depositfiles.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import re
-import os
-import socket
-
-from .common import InfoExtractor
-from ..utils import (
- compat_http_client,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse,
- compat_urllib_request,
-
- ExtractorError,
-)
-
-
-class DepositFilesIE(InfoExtractor):
- """Information extractor for depositfiles.com"""
-
- _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
-
- def _real_extract(self, url):
- file_id = url.split('/')[-1]
- # Rebuild url in english locale
- url = 'http://depositfiles.com/en/files/' + file_id
-
- # Retrieve file webpage with 'Free download' button pressed
- free_download_indication = {'gateway_result' : '1'}
- request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
- try:
- self.report_download_webpage(file_id)
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
-
- # Search for the real file URL
- mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
- if (mobj is None) or (mobj.group(1) is None):
- # Try to figure out reason of the error.
- mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
- if (mobj is not None) and (mobj.group(1) is not None):
- restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
- raise ExtractorError(u'%s' % restriction_message)
- else:
- raise ExtractorError(u'Unable to extract download URL from: %s' % url)
-
- file_url = mobj.group(1)
- file_extension = os.path.splitext(file_url)[1][1:]
-
- # Search for file title
- file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
-
- return [{
- 'id': file_id.decode('utf-8'),
- 'url': file_url.decode('utf-8'),
- 'uploader': None,
- 'upload_date': None,
- 'title': file_title,
- 'ext': file_extension.decode('utf-8'),
- }]
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7a2e5dee0..7666cf207 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import os
import re
-import xml.etree.ElementTree
from .common import InfoExtractor
from .youtube import YoutubeIE
@@ -17,6 +16,7 @@ from ..utils import (
ExtractorError,
HEADRequest,
+ parse_xml,
smuggle_url,
unescapeHTML,
unified_strdate,
@@ -274,7 +274,7 @@ class GenericIE(InfoExtractor):
# Is it an RSS feed?
try:
- doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
+ doc = parse_xml(webpage)
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
except compat_xml_parse_error:
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 98a663496..888eb7402 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -57,7 +57,7 @@ class VevoIE(InfoExtractor):
'age_limit': 18,
'title': 'Tunnel Vision (Explicit)',
'uploader': 'Justin Timberlake',
- 'upload_date': '20130704',
+ 'upload_date': '20130703',
},
'params': {
'skip_download': 'true',
diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vgtrk.py
index 916681fae..429b8bc72 100644
--- a/youtube_dl/extractor/vesti.py
+++ b/youtube_dl/extractor/vgtrk.py
@@ -10,10 +10,9 @@ from ..utils import (
)
-class VestiIE(InfoExtractor):
- IE_NAME = 'vesti'
- IE_DESC = 'Вести.Ru'
- _VALID_URL = r'http://(?:(?:.+?\.)?vesti\.ru|(?:2\.)?russia\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
+class VGTRKIE(InfoExtractor):
+ IE_DESC = 'ВГТРК'
+ _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
_TESTS = [
{
@@ -85,7 +84,7 @@ class VestiIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
- 'skip': 'Blocked outside Russia'
+ 'skip': 'Blocked outside Russia',
},
{
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
@@ -142,6 +141,7 @@ class VestiIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'Blocked outside Russia',
},
{
'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d4abd4031..3943cc9c5 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -22,6 +22,7 @@ import struct
import subprocess
import sys
import traceback
+import xml.etree.ElementTree
import zlib
try:
@@ -1267,3 +1268,13 @@ def read_batch_urls(batch_fd):
def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+
+
+def parse_xml(s):
+ class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass # Ignore doctypes
+
+ parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
+ kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
+ return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)