aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/generic.py52
-rw-r--r--youtube_dl/utils.py5
2 files changed, 46 insertions, 11 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index da933067a..209f68204 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -13,6 +13,8 @@ from ..utils import (
ExtractorError,
smuggle_url,
unescapeHTML,
+ unified_strdate,
+ url_basename,
)
from .brightcove import BrightcoveIE
@@ -71,6 +73,17 @@ class GenericIE(InfoExtractor):
u'skip_download': True,
},
},
+ # Direct link to a video
+ {
+ u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ u'file': u'trailer.mp4',
+ u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
+ u'info_dict': {
+ u'id': u'trailer',
+ u'title': u'trailer',
+ u'upload_date': u'20100513',
+ }
+ }
]
def report_download_webpage(self, video_id):
@@ -83,7 +96,7 @@ class GenericIE(InfoExtractor):
"""Report information extraction."""
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
- def _test_redirect(self, url):
+ def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
class HeadRequest(compat_urllib_request.Request):
def get_method(self):
@@ -131,29 +144,46 @@ class GenericIE(InfoExtractor):
response = opener.open(HeadRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
- new_url = response.geturl()
-
- if url == new_url:
- return False
-
- self.report_following_redirect(new_url)
- return new_url
+ return response
def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
+ video_id = os.path.splitext(url.split('/')[-1])[0]
try:
- new_url = self._test_redirect(url)
- if new_url:
+ response = self._send_head(url)
+
+ # Check for redirect
+ new_url = response.geturl()
+ if url != new_url:
+ self.report_following_redirect(new_url)
return self.url_result(new_url)
+
+ # Check for direct link to a video
+ content_type = response.headers.get('Content-Type', '')
+ m = re.match(r'^(?:audio|video)/(?P<format_id>.+)$', content_type)
+ if m:
+ upload_date = response.headers.get('Last-Modified')
+ if upload_date:
+ upload_date = unified_strdate(upload_date)
+ assert (url_basename(url) == 'trailer.mp4')
+ return {
+ 'id': video_id,
+ 'title': os.path.splitext(url_basename(url))[0],
+ 'formats': [{
+ 'format_id': m.group('format_id'),
+ 'url': url,
+ }],
+ 'upload_date': upload_date,
+ }
+
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
- video_id = url.split('/')[-1]
try:
webpage = self._download_webpage(url, video_id)
except ValueError:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2d12e2df9..d5069dcca 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -761,12 +761,17 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S',
]
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except:
pass
+ if upload_date is None:
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date
def determine_ext(url, default_ext=u'unknown_video'):