aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/francetv.py2
-rw-r--r--youtube_dl/extractor/vbox7.py21
-rw-r--r--youtube_dl/extractor/vk.py23
-rw-r--r--youtube_dl/extractor/youtube.py2
4 files changed, 35 insertions, 13 deletions
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index edf555b29..db0bbec1e 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -60,7 +60,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
continue
video_url_parsed = compat_urllib_parse_urlparse(video_url)
f4m_url = self._download_webpage(
- 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
+ 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index dd026748d..722eb5236 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
+ compat_urlparse,
)
from ..utils import (
ExtractorError,
@@ -26,11 +27,21 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- redirect_page, urlh = self._download_webpage_handle(url, video_id)
- new_location = self._search_regex(r'window\.location = \'(.*)\';',
- redirect_page, 'redirect location')
- redirect_url = urlh.geturl() + new_location
- webpage = self._download_webpage(redirect_url, video_id,
+ # need to get the page 3 times for the correct jsSecretToken cookie
+ # which is necessary for the correct title
+ def get_session_id():
+ redirect_page = self._download_webpage(url, video_id)
+ session_id_url = self._search_regex(
+ r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
+ 'session id url')
+ self._download_webpage(
+ compat_urlparse.urljoin(url, session_id_url), video_id,
+ 'Getting session id')
+
+ get_session_id()
+ get_session_id()
+
+ webpage = self._download_webpage(url, video_id,
'Downloading redirect page')
title = self._html_search_regex(r'<title>(.*)</title>',
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index d0e772108..38ff3c1a9 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -13,6 +13,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
orderedSet,
+ str_to_int,
unescapeHTML,
unified_strdate,
)
@@ -34,6 +35,7 @@ class VKIE(InfoExtractor):
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'upload_date': '20120212',
+ 'view_count': int,
},
},
{
@@ -45,7 +47,8 @@ class VKIE(InfoExtractor):
'uploader': 'Tom Cruise',
'title': 'No name',
'duration': 9,
- 'upload_date': '20130721'
+ 'upload_date': '20130721',
+ 'view_count': int,
}
},
{
@@ -59,6 +62,7 @@ class VKIE(InfoExtractor):
'title': 'Lin Dan',
'duration': 101,
'upload_date': '20120730',
+ 'view_count': int,
}
},
{
@@ -73,7 +77,8 @@ class VKIE(InfoExtractor):
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
- 'upload_date': '20121218'
+ 'upload_date': '20121218',
+ 'view_count': int,
},
'skip': 'Requires vk account credentials',
},
@@ -100,6 +105,7 @@ class VKIE(InfoExtractor):
'title': 'Книга Илая',
'duration': 6771,
'upload_date': '20140626',
+ 'view_count': int,
},
'skip': 'Only works from Russia',
},
@@ -175,25 +181,29 @@ class VKIE(InfoExtractor):
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
- m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts:
- m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
+ m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
if m_opts_url:
opts_url = m_opts_url.group(1)
if opts_url.startswith('//'):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
- data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
+ data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
data = json.loads(data_json)
# Extract upload date
upload_date = None
- mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+ mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
if mobj is not None:
mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+ view_count = str_to_int(self._search_regex(
+ r'"mv_views_count_number"[^>]*>([\d,.]+) views<',
+ info_page, 'view count', fatal=False))
+
formats = [{
'format_id': k,
'url': v,
@@ -210,6 +220,7 @@ class VKIE(InfoExtractor):
'uploader': data.get('md_author'),
'duration': data.get('duration'),
'upload_date': upload_date,
+ 'view_count': view_count,
}
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3448bec4f..9e2671192 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1504,7 +1504,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
for pagenum in itertools.count(1):
url_query = {
- 'search_query': query,
+ 'search_query': query.encode('utf-8'),
'page': pagenum,
'spf': 'navigate',
}