aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r--youtube_dl/extractor/common.py65
1 files changed, 53 insertions, 12 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index db472aace..2e6eeac08 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
import base64
import hashlib
import json
+import netrc
import os
import re
import socket
import sys
-import netrc
+import time
import xml.etree.ElementTree
from ..utils import (
@@ -17,6 +18,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
+ int_or_none,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
@@ -68,6 +70,7 @@ class InfoExtractor(object):
* vcodec Name of the video codec in use
* container Name of the container format
* filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
@@ -92,8 +95,12 @@ class InfoExtractor(object):
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"
- thumbnails: A list of dictionaries (with the entries "resolution" and
- "url") for the varying thumbnails
+ thumbnails: A list of dictionaries, with the following entries:
+ * "url"
+ * "width" (optional, int)
+ * "height" (optional, int)
+ * "resolution" (optional, string "{width}x{height"},
+ deprecated)
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
uploader: Full name of the video uploader.
@@ -295,8 +302,12 @@ class InfoExtractor(object):
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
errnote=u'Unable to download JSON metadata',
- transform_source=None):
- json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+ transform_source=None,
+ fatal=True):
+ json_string = self._download_webpage(
+ url_or_request, video_id, note, errnote, fatal=fatal)
+ if (not fatal) and json_string is False:
+ return None
if transform_source:
json_string = transform_source(json_string)
try:
@@ -363,7 +374,8 @@ class InfoExtractor(object):
else:
for p in pattern:
mobj = re.search(p, string, flags)
- if mobj: break
+ if mobj:
+ break
if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name
@@ -451,18 +463,22 @@ class InfoExtractor(object):
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
- regexes = self._og_regexes('video')
- if secure: regexes = self._og_regexes('video:secure_url') + regexes
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
- def _html_search_meta(self, name, html, display_name=None, fatal=False):
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
- (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
+ (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
- html, display_name, fatal=fatal)
+ html, display_name, fatal=fatal, **kwargs)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
@@ -547,6 +563,7 @@ class InfoExtractor(object):
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
@@ -568,6 +585,31 @@ class InfoExtractor(object):
else:
return url
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest')
+
+ formats = []
+ for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+ formats.append({
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'tbr': int_or_none(media_el.attrib.get('bitrate')),
+ 'width': int_or_none(media_el.attrib.get('width')),
+ 'height': int_or_none(media_el.attrib.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return formats
+
class SearchInfoExtractor(InfoExtractor):
"""
@@ -611,4 +653,3 @@ class SearchInfoExtractor(InfoExtractor):
@property
def SEARCH_KEY(self):
return self._SEARCH_KEY
-