aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2017-02-03 10:15:03 +0100
committerRemita Amine <remitamine@gmail.com>2017-02-03 10:15:03 +0100
commit4ce3407d089ae8c34341e6d68267910683d4b500 (patch)
treedb424a97c7ea8b319e202a54918f9f021d546f3b
parenta0758dfa1afd5b04773ba3b3b17ac71d22054821 (diff)
downloadyoutube-dl-4ce3407d089ae8c34341e6d68267910683d4b500.tar.xz
[filmon] improve extraction
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/filmon.py222
2 files changed, 132 insertions, 95 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index c9b9ebd23..e4ee43ee3 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -287,7 +287,10 @@ from .fc2 import (
FC2EmbedIE,
)
from .fczenit import FczenitIE
-from .filmon import FilmOnIE, FilmOnVODIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py
index 987792fec..f775fe0ba 100644
--- a/youtube_dl/extractor/filmon.py
+++ b/youtube_dl/extractor/filmon.py
@@ -2,143 +2,177 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import qualities
-from ..compat import compat_urllib_request
-
-
-_QUALITY = qualities(('low', 'high'))
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ qualities,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
class FilmOnIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
+ IE_NAME = 'filmon'
+ _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://www.filmon.com/channel/filmon-sports',
- 'only_matching': True,
+ 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
+ 'info_dict': {
+ 'id': '24869',
+ 'ext': 'mp4',
+ 'title': 'Plan 9 From Outer Space',
+ 'description': 'Dead human, zombies and vampires',
+ },
}, {
- 'url': 'https://www.filmon.com/tv/2894',
- 'only_matching': True,
+ 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
+ 'info_dict': {
+ 'id': '2825',
+ 'title': 'Popeye Series 1',
+ 'description': 'The original series of Popeye.',
+ },
+ 'playlist_mincount': 8,
}]
def _real_extract(self, url):
- channel_id = self._match_id(url)
+ video_id = self._match_id(url)
- request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id))
- request.add_header('X-Requested-With', 'XMLHttpRequest')
- channel_info = self._download_json(request, channel_id)
- now_playing = channel_info['now_playing']
+ try:
+ response = self._download_json(
+ 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
+ video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
- thumbnails = []
- for thumb in now_playing.get('images', ()):
- if thumb['type'] != '2':
- continue
- thumbnails.append({
- 'url': thumb['url'],
- 'width': int(thumb['width']),
- 'height': int(thumb['height']),
- })
+ title = response['title']
+ description = strip_or_none(response.get('description'))
- formats = []
+ if response.get('type_id') == 1:
+ entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
+ return self.playlist_result(entries, video_id, title, description)
- for stream in channel_info['streams']:
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for format_id, stream in response.get('streams', {}).items():
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
formats.append({
- 'format_id': str(stream['id']),
- # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
- # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated
- # by that method are highly unstable (because the bitrate is variable)
- 'url': stream['url'],
- 'resolution': stream['name'],
- 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']),
+ 'format_id': format_id,
+ 'url': stream_url,
'ext': 'mp4',
- 'quality': _QUALITY(stream['quality']),
- 'preference': int(stream['watch-timeout']),
+ 'quality': QUALITY(stream.get('quality')),
+ 'protocol': 'm3u8_native',
})
self._sort_formats(formats)
+ thumbnails = []
+ poster = response.get('poster', {})
+ thumbs = poster.get('thumbs', {})
+ thumbs['poster'] = poster
+ for thumb_id, thumb in thumbs.items():
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb_id,
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
return {
- 'id': str(channel_info['id']),
- 'display_id': channel_info['alias'],
+ 'id': video_id,
+ 'title': title,
'formats': formats,
- # XXX: use the channel description (channel_info['description'])?
- 'uploader_id': channel_info['alias'],
- 'uploader': channel_info['title'], # XXX: kinda stretching it...
- 'title': now_playing.get('programme_name') or channel_info['title'],
- 'description': now_playing.get('programme_description'),
+ 'description': description,
'thumbnails': thumbnails,
- 'is_live': True,
}
-class FilmOnVODIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P<id>\d+)'
+class FilmOnChannelIE(InfoExtractor):
+ IE_NAME = 'filmon:channel'
+ _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
_TESTS = [{
- 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
+ # VOD
+ 'url': 'http://www.filmon.com/tv/sports-haters',
'info_dict': {
- 'id': '24869',
+ 'id': '4190',
'ext': 'mp4',
- 'title': 'Plan 9 From Outer Space',
- 'description': 'Dead human, zombies and vampires',
+ 'title': 'Sports Haters',
+ 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
},
}, {
- 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
- 'info_dict': {
- 'id': '2825',
- 'title': 'Popeye Series 1',
- },
- 'playlist_count': 8,
+ # LIVE
+ 'url': 'https://www.filmon.com/channel/filmon-sports',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.filmon.com/tv/2894',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
+ _THUMBNAIL_RES = [
+ ('logo', 56, 28),
+ ('big_logo', 106, 106),
+ ('extra_big_logo', 300, 300),
+ ]
- result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id)
- if result['code'] != 200:
- raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True)
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
- response = result['response']
+ try:
+ channel_data = self._download_json(
+ 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
- if response.get('episodes'):
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': response['title'],
- 'entries': [{
- '_type': 'url',
- 'url': 'https://www.filmon.com/vod/view/%s' % (ep),
- } for ep in response['episodes']]
- }
+ channel_id = compat_str(channel_data['id'])
+ is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
+ title = channel_data['title']
+ QUALITY = qualities(('low', 'high'))
formats = []
- for (id, stream) in response['streams'].items():
+ for stream in channel_data.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if not is_live:
+ formats.extend(self._extract_wowza_formats(
+ stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
+ continue
+ quality = stream.get('quality')
formats.append({
- 'format_id': id,
- 'url': stream['url'],
- 'resolution': stream['name'],
- 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']),
+ 'format_id': quality,
+ # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
+ # because it doesn't have bitrate variants anyway
+ 'url': stream_url,
'ext': 'mp4',
- 'quality': _QUALITY(stream['quality']),
- 'preference': int(stream['watch-timeout']),
+ 'quality': QUALITY(quality),
})
self._sort_formats(formats)
- poster = response['poster']
- thumbnails = [{
- 'id': 'poster',
- 'url': poster['url'],
- 'width': poster['width'],
- 'height': poster['height'],
- }]
- for (id, thumb) in poster['thumbs'].items():
+ thumbnails = []
+ for name, width, height in self._THUMBNAIL_RES:
thumbnails.append({
- 'id': id,
- 'url': thumb['url'],
- 'width': thumb['width'],
- 'height': thumb['height'],
+ 'id': name,
+ 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
+ 'width': width,
+ 'height': height,
})
return {
- 'id': video_id,
- 'title': response['title'],
- 'formats': formats,
- 'description': response['description'],
+ 'id': channel_id,
+ 'display_id': channel_data.get('alias'),
+ 'title': self._live_title(title) if is_live else title,
+ 'description': channel_data.get('description'),
'thumbnails': thumbnails,
+ 'formats': formats,
+ 'is_live': is_live,
}