From 17b598d30cae2c287f3556f874ddf0fc5d028aec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 21:05:55 +0800 Subject: [common] _parse_dash_manifest() from youtube.py --- youtube_dl/extractor/common.py | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b3d57dfce..7ad255672 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,6 +1330,58 @@ class InfoExtractor(object): }) return entries + def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): + formats = [] + for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + mime_type = a.attrib.get('mimeType') + for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + if mime_type == 'text/vtt': + # TODO implement WebVTT downloading + pass + elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + if segment_list is not None: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'protocol': 'http_dash_segments', + }) + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + codecs = r.attrib.get('codecs') + if codecs: + if full_info.get('acodec') == 'none': + full_info['vcodec'] = codecs + elif full_info.get('vcodec') == 'none': + full_info['acodec'] = codecs + formats.append(full_info) + else: + existing_format.update(f) + else: + self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() -- cgit v1.2.3 From b323e1707d8f058b88a5f15f3418b31cf969399d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 21:27:43 +0800 Subject: [common] Modify _parse_dash_manifest for use in Facebook --- youtube_dl/extractor/common.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7ad255672..83628a68f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,22 +1330,24 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): + def _parse_dash_manifest(self, video_id, dash_doc, default_ns='urn:mpeg:DASH:schema:MPD:2011', formats_dict={}, fatal=True): + def _add_ns(tag): + return '{%s}%s' % (default_ns, tag) + formats = [] - for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')): mime_type = a.attrib.get('mimeType') - for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue + for r in a.findall(_add_ns('Representation')): + mime_type = r.attrib.get('mimeType') or mime_type + url_el = r.find(_add_ns('BaseURL')) if mime_type == 'text/vtt': # TODO implement WebVTT downloading pass elif mime_type.startswith('audio/') or mime_type.startswith('video/'): - segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') + segment_list = r.find(_add_ns('SegmentList')) format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + video_url = url_el.text if url_el else None + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el else None) f = { 'format_id': format_id, 'url': video_url, @@ -1357,17 +1359,20 @@ class InfoExtractor(object): 'fps': int_or_none(r.attrib.get('frameRate')), } if segment_list is not None: + initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL'] f.update({ - 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'initialization_url': initialization_url, + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))], 'protocol': 'http_dash_segments', }) + if not f.get('url'): + f['url'] = initialization_url try: existing_format = next( fo for fo in formats if fo['format_id'] == format_id) except StopIteration: - full_info = self._formats.get(format_id, {}).copy() + full_info = formats_dict.get(format_id, {}).copy() full_info.update(f) codecs = r.attrib.get('codecs') if codecs: -- cgit v1.2.3 From 5ea1eb78f5e8d9b2ede35504dc3b999f5f89bbaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 21:36:01 +0800 Subject: [common] Fix for youtube --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 83628a68f..243db71dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1346,8 +1346,8 @@ class InfoExtractor(object): elif mime_type.startswith('audio/') or mime_type.startswith('video/'): segment_list = r.find(_add_ns('SegmentList')) format_id = r.attrib['id'] - video_url = url_el.text if url_el else None - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el else None) + video_url = url_el.text if url_el is not None else None + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) f = { 'format_id': format_id, 'url': video_url, -- cgit v1.2.3 From df374b52228e8a083d045f9bc56847e418ef452e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 21:42:27 +0800 Subject: [common] Prefer the manifest than formats_dict in determining codecs --- youtube_dl/extractor/common.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 243db71dc..f1313ef04 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1376,10 +1376,15 @@ class InfoExtractor(object): full_info.update(f) codecs = r.attrib.get('codecs') if codecs: - if full_info.get('acodec') == 'none': - full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none': - full_info['acodec'] = codecs + if mime_type.startswith('video/'): + vcodec, acodec = codecs, 'none' + else: # mime_type.startswith('audio/') + vcodec, acodec = 'none', codecs + + full_info.update({ + 'vcodec': vcodec, + 'acodec': acodec, + }) formats.append(full_info) else: existing_format.update(f) -- cgit v1.2.3 From 16f38a699f7c6d2820720d704a2373ba28c568b1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 22:40:13 +0800 Subject: [common] Rename to namespace For consistency with _parse_smil_* --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f1313ef04..a05efec9e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,9 +1330,9 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, default_ns='urn:mpeg:DASH:schema:MPD:2011', formats_dict={}, fatal=True): - def _add_ns(tag): - return '{%s}%s' % (default_ns, tag) + def _parse_dash_manifest(self, video_id, dash_doc, namespace=None, formats_dict={}, fatal=True): + def _add_ns(path): + return self._xpath_ns(path, namespace) formats = [] for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')): -- cgit v1.2.3 From c94678957fbe4483b2b7c8b3e6824cb7a215d42d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 22:45:16 +0800 Subject: [common] Remove unused arguments --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a05efec9e..5a2b7a721 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,7 +1330,7 @@ class InfoExtractor(object): }) return entries - def _parse_dash_manifest(self, video_id, dash_doc, namespace=None, formats_dict={}, fatal=True): + def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}): def _add_ns(path): return self._xpath_ns(path, namespace) -- cgit v1.2.3 From 2d2fa82d172a10a49fb5449fa35bc409de778f05 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 Jan 2016 22:52:23 +0800 Subject: [common] Add _extract_dash_manifest_formats --- youtube_dl/extractor/common.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a2b7a721..199a04d1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,6 +1330,21 @@ class InfoExtractor(object): }) return entries + def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True): + return self._download_xml( + dash_manifest_url, video_id, + note='Downloading DASH manifest', + errnote='Could not download DASH manifest', + fatal=fatal) + + def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}): + dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal) + if dash_doc is False: + return [] + + return self._parse_dash_manifest( + dash_doc, namespace=namespace, formats_dict=formats_dict) + def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}): def _add_ns(path): return self._xpath_ns(path, namespace) -- cgit v1.2.3