diff options
-rw-r--r-- | youtube_dl/YoutubeDL.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/depositfiles.py | 60 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/vevo.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/vgtrk.py (renamed from youtube_dl/extractor/vesti.py) | 10 | ||||
-rw-r--r-- | youtube_dl/utils.py | 11 |
7 files changed, 22 insertions, 76 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 523eba3c1..1db27315c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1167,7 +1167,7 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - return self._opener.open(req) + return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): if not self.params.get('verbose'): @@ -1198,7 +1198,7 @@ class YoutubeDL(object): def _setup_opener(self): timeout_val = self.params.get('socket_timeout') - timeout = 600 if timeout_val is None else float(timeout_val) + self._socket_timeout = 600 if timeout_val is None else float(timeout_val) opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') @@ -1236,7 +1236,3 @@ class YoutubeDL(object): # (See https://github.com/rg3/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener - - # TODO remove this global modification - compat_urllib_request.install_opener(opener) - socket.setdefaulttimeout(timeout) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4a14a2be6..49617a5e0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -53,7 +53,6 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE -from .depositfiles import DepositFilesIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .defense import DefenseGouvFrIE @@ -248,8 +247,8 @@ from .ustream import UstreamIE, UstreamChannelIE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE -from .vesti import VestiIE from .vevo import VevoIE +from .vgtrk import VGTRKIE from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE diff --git a/youtube_dl/extractor/depositfiles.py b/youtube_dl/extractor/depositfiles.py deleted file mode 100644 index 2c9fb5f2e..000000000 --- a/youtube_dl/extractor/depositfiles.py +++ /dev/null @@ -1,60 +0,0 @@ -import re -import os -import socket - -from .common import InfoExtractor -from ..utils import ( - compat_http_client, - compat_str, - compat_urllib_error, - compat_urllib_parse, - compat_urllib_request, - - ExtractorError, -) - - -class DepositFilesIE(InfoExtractor): - """Information extractor for depositfiles.com""" - - _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' - - def _real_extract(self, url): - file_id = url.split('/')[-1] - # Rebuild url in english locale - url = 'http://depositfiles.com/en/files/' + file_id - - # Retrieve file webpage with 'Free download' button pressed - free_download_indication = {'gateway_result' : '1'} - request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) - try: - self.report_download_webpage(file_id) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) - - # Search for the real file URL - mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) - if (mobj is None) or (mobj.group(1) is None): - # Try to figure out reason of the error. - mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) - if (mobj is not None) and (mobj.group(1) is not None): - restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() - raise ExtractorError(u'%s' % restriction_message) - else: - raise ExtractorError(u'Unable to extract download URL from: %s' % url) - - file_url = mobj.group(1) - file_extension = os.path.splitext(file_url)[1][1:] - - # Search for file title - file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') - - return [{ - 'id': file_id.decode('utf-8'), - 'url': file_url.decode('utf-8'), - 'uploader': None, - 'upload_date': None, - 'title': file_title, - 'ext': file_extension.decode('utf-8'), - }] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a2e5dee0..7666cf207 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import os import re -import xml.etree.ElementTree from .common import InfoExtractor from .youtube import YoutubeIE @@ -17,6 +16,7 @@ from ..utils import ( ExtractorError, HEADRequest, + parse_xml, smuggle_url, unescapeHTML, unified_strdate, @@ -274,7 +274,7 @@ class GenericIE(InfoExtractor): # Is it an RSS feed? try: - doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8')) + doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) except compat_xml_parse_error: diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 98a663496..888eb7402 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -57,7 +57,7 @@ class VevoIE(InfoExtractor): 'age_limit': 18, 'title': 'Tunnel Vision (Explicit)', 'uploader': 'Justin Timberlake', - 'upload_date': '20130704', + 'upload_date': '20130703', }, 'params': { 'skip_download': 'true', diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vgtrk.py index 916681fae..429b8bc72 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vgtrk.py @@ -10,10 +10,9 @@ from ..utils import ( ) -class VestiIE(InfoExtractor): - IE_NAME = 'vesti' - IE_DESC = 'Вести.Ru' - _VALID_URL = r'http://(?:(?:.+?\.)?vesti\.ru|(?:2\.)?russia\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)' +class VGTRKIE(InfoExtractor): + IE_DESC = 'ВГТРК' + _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)' _TESTS = [ { @@ -85,7 +84,7 @@ class VestiIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'skip': 'Blocked outside Russia' + 'skip': 'Blocked outside Russia', }, { 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', @@ -142,6 +141,7 @@ class VestiIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': 'Blocked outside Russia', }, { 'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d4abd4031..3943cc9c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -22,6 +22,7 @@ import struct import subprocess import sys import traceback +import xml.etree.ElementTree import zlib try: @@ -1267,3 +1268,13 @@ def read_batch_urls(batch_fd): def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') + + +def parse_xml(s): + class TreeBuilder(xml.etree.ElementTree.TreeBuilder): + def doctype(self, name, pubid, system): + pass # Ignore doctypes + + parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) + kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} + return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) |