aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2017-01-06 11:23:43 +0100
committerRemita Amine <remitamine@gmail.com>2017-01-06 11:23:43 +0100
commite5dfdc8164608d084dd4f788ac40c895f4dc1150 (patch)
treeaca40cd6e3f2cd2811dddff11fd99fd97966b6ce
parenta814da3f62640dfb993093958b2635a2afb39d15 (diff)
[sendtonews] improve info extraction
-rw-r--r--youtube_dl/extractor/sendtonews.py36
1 files changed, 26 insertions, 10 deletions
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py
index 941a82e1a..9880a5a78 100644
--- a/youtube_dl/extractor/sendtonews.py
+++ b/youtube_dl/extractor/sendtonews.py
@@ -8,6 +8,9 @@ from ..utils import (
float_or_none,
parse_iso8601,
update_url_query,
+ int_or_none,
+ determine_protocol,
+ unescapeHTML,
)
@@ -20,18 +23,18 @@ class SendtoNewsIE(JWPlatformBaseIE):
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588'
},
- 'playlist_count': 9,
+ 'playlist_count': 8,
# test the first video only to prevent lengthy tests
'playlist': [{
'info_dict': {
- 'id': '198180',
+ 'id': '240385',
'ext': 'mp4',
- 'title': 'Recap: CLE 5, LAA 4',
- 'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win',
- 'duration': 57.343,
+ 'title': 'Indians introduce Encarnacion',
+ 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
+ 'duration': 137.898,
'thumbnail': r're:https?://.*\.jpg$',
- 'upload_date': '20160815',
- 'timestamp': 1471221961,
+ 'upload_date': '20170105',
+ 'timestamp': 1483649762,
},
}],
'params': {
@@ -64,7 +67,20 @@ class SendtoNewsIE(JWPlatformBaseIE):
for video in playlist_data['playlistData'][0]:
info_dict = self._parse_jwplayer_data(
video['jwconfiguration'],
- require_title=False, rtmp_params={'no_resume': True})
+ require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
+
+ for f in info_dict['formats']:
+ if f.get('tbr'):
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'/(\d+)k/', f['url'], 'bitrate', default=None))
+ if not tbr:
+ continue
+ f.update({
+ 'format_id': '%s-%d' % (determine_protocol(f), tbr),
+ 'tbr': tbr,
+ })
+ self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id'))
thumbnails = []
if video.get('thumbnailUrl'):
@@ -78,8 +94,8 @@ class SendtoNewsIE(JWPlatformBaseIE):
'url': video['smThumbnailUrl'],
})
info_dict.update({
- 'title': video['S_headLine'],
- 'description': video.get('S_fullStory'),
+ 'title': video['S_headLine'].strip(),
+ 'description': unescapeHTML(video.get('S_fullStory')),
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),