diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-01-30 21:27:43 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-01-30 21:27:43 +0800 | 
| commit | b323e1707d8f058b88a5f15f3418b31cf969399d (patch) | |
| tree | 957717be82b21d869e732e93be324fc47d21765f | |
| parent | 17b598d30cae2c287f3556f874ddf0fc5d028aec (diff) | |
[common] Modify _parse_dash_manifest for use in Facebook
| -rw-r--r-- | youtube_dl/extractor/common.py | 29 | 
1 files changed, 17 insertions, 12 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7ad255672..83628a68f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,22 +1330,24 @@ class InfoExtractor(object):              })          return entries -    def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): +    def _parse_dash_manifest(self, video_id, dash_doc, default_ns='urn:mpeg:DASH:schema:MPD:2011', formats_dict={}, fatal=True): +        def _add_ns(tag): +            return '{%s}%s' % (default_ns, tag) +          formats = [] -        for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): +        for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):              mime_type = a.attrib.get('mimeType') -            for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): -                url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') -                if url_el is None: -                    continue +            for r in a.findall(_add_ns('Representation')): +                mime_type = r.attrib.get('mimeType') or mime_type +                url_el = r.find(_add_ns('BaseURL'))                  if mime_type == 'text/vtt':                      # TODO implement WebVTT downloading                      pass                  elif mime_type.startswith('audio/') or mime_type.startswith('video/'): -                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') +                    segment_list = r.find(_add_ns('SegmentList'))                      format_id = r.attrib['id'] -                    video_url = url_el.text -                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) +                    video_url = url_el.text if url_el else None +                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el else None)                      f = {                          'format_id': format_id,                          'url': video_url, @@ -1357,17 +1359,20 @@ class InfoExtractor(object):                          'fps': int_or_none(r.attrib.get('frameRate')),                      }                      if segment_list is not None: +                        initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']                          f.update({ -                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], -                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], +                            'initialization_url': initialization_url, +                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],                              'protocol': 'http_dash_segments',                          }) +                        if not f.get('url'): +                            f['url'] = initialization_url                      try:                          existing_format = next(                              fo for fo in formats                              if fo['format_id'] == format_id)                      except StopIteration: -                        full_info = self._formats.get(format_id, {}).copy() +                        full_info = formats_dict.get(format_id, {}).copy()                          full_info.update(f)                          codecs = r.attrib.get('codecs')                          if codecs: | 
