diff options
| -rw-r--r-- | youtube_dl/downloader/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/downloader/dash.py | 66 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 27 | 
3 files changed, 94 insertions, 1 deletions
| diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f110830c4..dccc59212 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .hls import NativeHlsFD  from .http import HttpFD  from .rtsp import RtspFD  from .rtmp import RtmpFD +from .dash import DashSegmentsFD  from ..utils import (      determine_protocol, @@ -20,6 +21,7 @@ PROTOCOL_MAP = {      'mms': RtspFD,      'rtsp': RtspFD,      'f4m': F4mFD, +    'http_dash_segments': DashSegmentsFD,  } diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..a4685d307 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import FileDownloader +from ..compat import compat_urllib_request + + +class DashSegmentsFD(FileDownloader): +    """ +    Download segments in a DASH manifest +    """ +    def real_download(self, filename, info_dict): +        self.report_destination(filename) +        tmpfilename = self.temp_name(filename) +        base_url = info_dict['url'] +        segment_urls = info_dict['segment_urls'] + +        is_test = self.params.get('test', False) +        remaining_bytes = self._TEST_FILE_SIZE if is_test else None +        byte_counter = 0 + +        def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): +            self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) +            req = compat_urllib_request.Request(target_url) +            if remaining_bytes is not None: +                req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + +            data = self.ydl.urlopen(req).read() + +            if remaining_bytes is not None: +                data = data[:remaining_bytes] + +            outf.write(data) +            return len(data) + +        def combine_url(base_url, target_url): +            if re.match(r'^https?://', target_url): +                return target_url +            return '%s/%s' % (base_url, target_url) + +        with open(tmpfilename, 'wb') as outf: +            append_url_to_file( +                outf, combine_url(base_url, info_dict['initialization_url']), +                'initialization segment') +            for i, segment_url in enumerate(segment_urls): +                segment_len = append_url_to_file( +                    outf, combine_url(base_url, segment_url), +                    'segment %d / %d' % (i + 1, len(segment_urls)), +                    remaining_bytes) +                byte_counter += segment_len +                if remaining_bytes is not None: +                    remaining_bytes -= segment_len +                    if remaining_bytes <= 0: +                        break + +        self.try_rename(tmpfilename, filename) + +        self._hook_progress({ +            'downloaded_bytes': byte_counter, +            'total_bytes': byte_counter, +            'filename': filename, +            'status': 'finished', +        }) + +        return True diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e7f5c7861..9a08924ef 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -535,7 +535,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'uploader': 'dorappi2000',                  'formats': 'mincount:33',              }, -        } +        }, +        # DASH manifest with segment_list +        { +            'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', +            'md5': '8ce563a1d667b599d21064e982ab9e31', +            'info_dict': { +                'id': 'CsmdDsKjzN8', +                'ext': 'mp4', +                'upload_date': '20150510', +                'uploader': 'Airtek', +                'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', +                'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', +                'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', +            }, +            'params': { +                'youtube_include_dash_manifest': True, +                'format': '135',  # bestvideo +            } +        },      ]      def __init__(self, *args, **kwargs): @@ -826,6 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                      # TODO implement WebVTT downloading                      pass                  elif mime_type.startswith('audio/') or mime_type.startswith('video/'): +                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')                      format_id = r.attrib['id']                      video_url = url_el.text                      filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) @@ -839,6 +858,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                          'filesize': filesize,                          'fps': int_or_none(r.attrib.get('frameRate')),                      } +                    if segment_list is not None: +                        f.update({ +                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], +                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], +                            'protocol': 'http_dash_segments', +                        })                      try:                          existing_format = next(                              fo for fo in formats | 
