aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2016-03-30 21:18:07 +0100
committerremitamine <remitamine@gmail.com>2016-03-30 21:18:07 +0100
commit6b820a2376a953657578f9a477ff7768d3633512 (patch)
treeabe4a07c8df8a899c120e7daddfeb9e701eeaf26 /youtube_dl/extractor
parente621a344e6ec3518420f0b13577726615c2f4485 (diff)
[myspace] improve extraction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/myspace.py80
1 files changed, 53 insertions, 27 deletions
diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py
index 83414a232..0d5238d77 100644
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dl/extractor/myspace.py
@@ -2,13 +2,13 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_str,
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
-from ..utils import ExtractorError
class MySpaceIE(InfoExtractor):
@@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor):
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage',
+ 'timestamp': 1414108751,
+ 'upload_date': '20141023',
},
'params': {
# rtmp download
@@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor):
'ext': 'mp4',
'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
- 'uploader': 'Jacob Soren',
+ 'uploader': 'Yumi K',
'uploader_id': 'SorenPromotions',
'upload_date': '20140725',
}
@@ -78,6 +80,19 @@ class MySpaceIE(InfoExtractor):
player_url = self._search_regex(
r'playerSwf":"([^"?]*)', webpage, 'player URL')
+ def rtmp_format_from_stream_url(stream_url, width=None, height=None):
+ rtmp_url, play_path = stream_url.split(';', 1)
+ return {
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'player_url': player_url,
+ 'protocol': 'rtmp',
+ 'ext': 'flv',
+ 'width': width,
+ 'height': height,
+ }
+
if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable
song_data = self._search_regex(
@@ -93,8 +108,8 @@ class MySpaceIE(InfoExtractor):
return self._search_regex(
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data')
- streamUrl = search_data('stream-url')
- if not streamUrl:
+ stream_url = search_data('stream-url')
+ if not stream_url:
vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id')
if vevo_id:
@@ -106,36 +121,47 @@ class MySpaceIE(InfoExtractor):
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
- info = {
+ return {
'id': video_id,
'title': self._og_search_title(webpage),
'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(search_data('duration')),
+ 'formats': [rtmp_format_from_stream_url(stream_url)]
}
else:
- context = json.loads(self._search_regex(
- r'context = ({.*?});', webpage, 'context'))
- video = context['video']
- streamUrl = video['streamUrl']
- info = {
- 'id': compat_str(video['mediaId']),
+ video = self._parse_json(self._search_regex(
+ r'context = ({.*?});', webpage, 'context'),
+ video_id)['video']
+ formats = []
+ hls_stream_url = video.get('hlsStreamUrl')
+ if hls_stream_url:
+ formats.append({
+ 'format_id': 'hls',
+ 'url': hls_stream_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+ stream_url = video.get('streamUrl')
+ if stream_url:
+ formats.append(rtmp_format_from_stream_url(
+ stream_url,
+ int_or_none(video.get('width')),
+ int_or_none(video.get('height'))))
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
'title': video['title'],
- 'description': video['description'],
- 'thumbnail': video['imageUrl'],
- 'uploader': video['artistName'],
- 'uploader_id': video['artistUsername'],
+ 'description': video.get('description'),
+ 'thumbnail': video.get('imageUrl'),
+ 'uploader': video.get('artistName'),
+ 'uploader_id': video.get('artistUsername'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'formats': formats,
}
- rtmp_url, play_path = streamUrl.split(';', 1)
- info.update({
- 'url': rtmp_url,
- 'play_path': play_path,
- 'player_url': player_url,
- 'ext': 'flv',
- })
- return info
-
class MySpaceAlbumIE(InfoExtractor):
IE_NAME = 'MySpace:album'