aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/cultureunplugged.py63
-rw-r--r--youtube_dl/extractor/cwtv.py88
-rw-r--r--youtube_dl/extractor/zippcast.py94
-rw-r--r--youtube_dl/version.py2
6 files changed, 255 insertions, 1 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 6b73b8e06..0748fbba0 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1244,6 +1244,12 @@ class YoutubeDL(object):
except (ValueError, OverflowError, OSError):
pass
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
subtitles = info_dict.get('subtitles')
if subtitles:
for _, subtitle in subtitles.items():
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3975354da..2c56797a5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -130,6 +130,8 @@ from .crunchyroll import (
)
from .cspan import CSpanIE
from .ctsnews import CtsNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .cwtv import CWTVIE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
@@ -914,6 +916,7 @@ from .zingmp3 import (
ZingMp3SongIE,
ZingMp3AlbumIE,
)
+from .zippcast import ZippCastIE
_ALL_CLASSES = [
klass
diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py
new file mode 100644
index 000000000..9c764fe68
--- /dev/null
+++ b/youtube_dl/extractor/cultureunplugged.py
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class CultureUnpluggedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
+ _TESTS = [{
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
+ 'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
+ 'info_dict': {
+ 'id': '53662',
+ 'display_id': 'The-Next--Best-West',
+ 'ext': 'mp4',
+ 'title': 'The Next, Best West',
+ 'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'creator': 'Coldstream Creative',
+ 'duration': 2203,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ movie_data = self._download_json(
+ 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
+
+ video_url = movie_data['url']
+ title = movie_data['title']
+
+ description = movie_data.get('synopsis')
+ creator = movie_data.get('producer')
+ duration = int_or_none(movie_data.get('duration'))
+ view_count = int_or_none(movie_data.get('views'))
+
+ thumbnails = [{
+ 'url': movie_data['%s_thumb' % size],
+ 'id': size,
+ 'preference': preference,
+ } for preference, size in enumerate((
+ 'small', 'large')) if movie_data.get('%s_thumb' % size)]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'thumbnails': thumbnails,
+ }
diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py
new file mode 100644
index 000000000..36af67013
--- /dev/null
+++ b/youtube_dl/extractor/cwtv.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class CWTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
+ _TESTS = [{
+ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'info_dict': {
+ 'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'ext': 'mp4',
+ 'title': 'Legends of Yesterday',
+ 'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
+ 'duration': 2665,
+ 'series': 'Arrow',
+ 'season_number': 4,
+ 'season': '4',
+ 'episode_number': 8,
+ 'upload_date': '20151203',
+ 'timestamp': 1449122100,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'info_dict': {
+ 'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'ext': 'mp4',
+ 'title': 'Jeff Davis 4',
+ 'description': 'Jeff Davis is back to make you laugh.',
+ 'duration': 1263,
+ 'series': 'Whose Line Is It Anyway?',
+ 'season_number': 11,
+ 'season': '11',
+ 'episode_number': 20,
+ 'upload_date': '20151006',
+ 'timestamp': 1444107300,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
+
+ formats = self._extract_m3u8_formats(
+ video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
+
+ thumbnails = [{
+ 'url': image['uri'],
+ 'width': image.get('width'),
+ 'height': image.get('height'),
+ } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
+
+ video_metadata = video_data['assetFields']
+
+ subtitles = {
+ 'en': [{
+ 'url': video_metadata['UnicornCcUrl'],
+ }],
+ } if video_metadata.get('UnicornCcUrl') else None
+
+ return {
+ 'id': video_id,
+ 'title': video_metadata['title'],
+ 'description': video_metadata.get('description'),
+ 'duration': int_or_none(video_metadata.get('duration')),
+ 'series': video_metadata.get('seriesName'),
+ 'season_number': int_or_none(video_metadata.get('seasonNumber')),
+ 'season': video_metadata.get('seasonName'),
+ 'episode_number': int_or_none(video_metadata.get('episodeNumber')),
+ 'timestamp': parse_iso8601(video_data.get('startTime')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/zippcast.py b/youtube_dl/extractor/zippcast.py
new file mode 100644
index 000000000..de819376d
--- /dev/null
+++ b/youtube_dl/extractor/zippcast.py
@@ -0,0 +1,94 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ str_to_int,
+)
+
+
+class ZippCastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ # m3u8, hq direct link
+ 'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81',
+ 'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6',
+ 'info_dict': {
+ 'id': 'c9cfd5c7e44dbc29c81',
+ 'ext': 'mp4',
+ 'title': '[Vinesauce] Vinny - Digital Space Traveler',
+ 'description': 'Muted on youtube, but now uploaded in it\'s original form.',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'vinesauce',
+ 'view_count': int,
+ 'categories': ['Entertainment'],
+ 'tags': list,
+ },
+ }, {
+ # f4m, lq ipod direct link
+ 'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.zippcast.com/video/%s' % video_id, video_id)
+
+ formats = []
+ video_url = self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1', webpage,
+ 'video url', default=None, group='url')
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ 'preference': 0, # direct link is almost always of worse quality
+ })
+ src_url = self._search_regex(
+ r'src\s*:\s*(?:escape\()?(["\'])(?P<url>http://.+?)\1',
+ webpage, 'src', default=None, group='url')
+ ext = determine_ext(src_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src_url, video_id, f4m_id='hds', fatal=False))
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage)
+ uploader = self._search_regex(
+ r'<a[^>]+href="https?://[^/]+/profile/[^>]+>([^<]+)</a>',
+ webpage, 'uploader', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ view_count = str_to_int(self._search_regex(
+ r'>([\d,.]+) views!', webpage, 'view count', fatal=False))
+
+ categories = re.findall(
+ r'<a[^>]+href="https?://[^/]+/categories/[^"]+">([^<]+),?<',
+ webpage)
+ tags = re.findall(
+ r'<a[^>]+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<',
+ webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'categories': categories,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 4d433b667..02c438f3a 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.01.14'
+__version__ = '2016.01.15'