aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/netzkino.py3
-rw-r--r--youtube_dl/extractor/sockshare.py5
-rw-r--r--youtube_dl/extractor/theonion.py17
-rw-r--r--youtube_dl/extractor/webofstories.py22
5 files changed, 21 insertions, 28 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 36a1f65bf..8dce96a64 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': 'Mrj4DVp2zeA',
'ext': 'mp4',
- 'upload_date': '20150204',
+ 'upload_date': '20150212',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'uploader_id': 'NationalArchives08',
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py
index 93567d1e3..bc17e20aa 100644
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dl/extractor/netzkino.py
@@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
'timestamp': 1344858571,
'age_limit': 12,
},
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py
index 7d3c0e937..b5fa6f1da 100644
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi',
- 'thumbnail': 're:^http://.*\.jpg$',
}
}
@@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
''', webpage, 'hash')
fields = {
- "hash": confirm_hash,
+ "hash": confirm_hash.encode('utf-8'),
"confirm": "Continue as Free User"
}
@@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
webpage, 'title', default=None)
thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"',
- webpage, 'thumbnail')
+ webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',
diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py
index b65d8e03f..10239c906 100644
--- a/youtube_dl/extractor/theonion.py
+++ b/youtube_dl/extractor/theonion.py
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
class TheOnionIE(InfoExtractor):
- _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+ _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- article_id = mobj.group('article_id')
-
- webpage = self._download_webpage(url, article_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID')
@@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
- if not sources:
- raise ExtractorError(
- 'No sources found for video %s' % video_id, expected=True)
-
formats = []
for src, type_ in sources:
if type_ == 'video/mp4':
@@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
})
elif type_ == 'application/x-mpegURL':
formats.extend(
- self._extract_m3u8_formats(src, video_id, preference=-1))
+ self._extract_m3u8_formats(src, display_id, preference=-1))
else:
self.report_warning(
'Encountered unexpected format: %s' % type_)
-
self._sort_formats(formats)
return {
'id': video_id,
+ 'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
index 396cf4e83..73077a312 100644
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)
- story_filename = self._search_regex(
- r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
- speaker_id = self._search_regex(
- r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
- story_id = self._search_regex(
- r'\.storyId\((\d+)\)', webpage, 'story ID')
- speaker_type = self._search_regex(
- r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
- great_life = self._search_regex(
- r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+ embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+ r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+ webpage, 'embed params').split(',')]
+
+ (
+ _, speaker_id, story_id, story_duration,
+ speaker_type, great_life, _thumbnail, _has_subtitles,
+ story_filename, _story_order) = embed_params
+
is_great_life_series = great_life == 'true'
- duration = int_or_none(self._search_regex(
- r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+ duration = int_or_none(story_duration)
# URL building, see: http://www.webofstories.com/scripts/player.js
ms_prefix = ''