[YoutubeDL] Support DASH manifest downloading

author: Yen Chi Hsuan <yan12125@gmail.com> 2015-06-03 23:10:18 +0800
committer: Yen Chi Hsuan <yan12125@gmail.com> 2015-06-03 23:10:18 +0800
commit: 6800d3372f35e08dcc4d34d06601815bf0cb0a3d (patch)
tree: f619f599f021410b83367e80517de198f2556185
parent: 8f9478412424b87e4fb77be53d239c13932b078a (diff)
3 files changed, 60 insertions, 0 deletions
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
new file mode 100644
index 000000000..18eca2c04
--- /dev/null
+++ b/youtube_dl/downloader/dash.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+from .common import FileDownloader
+from ..compat import compat_urllib_request
+
+import re
+
+
+class DashSegmentsFD(FileDownloader):
+    """
+    Download segments in a DASH manifest
+    """
+    def real_download(self, filename, info_dict):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+        base_url = info_dict['url']
+        segment_urls = info_dict['segment_urls']
+
+        self.byte_counter = 0
+
+        def append_url_to_file(outf, target_url, target_name):
+            self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
+            req = compat_urllib_request.Request(target_url)
+            data = self.ydl.urlopen(req).read()
+            outf.write(data)
+            self.byte_counter += len(data)
+
+        def combine_url(base_url, target_url):
+            if re.match(r'^https?://', target_url):
+                return target_url
+            return '%s/%s' % (base_url, target_url)
+
+        with open(tmpfilename, 'wb') as outf:
+            append_url_to_file(
+                outf, combine_url(base_url, info_dict['initialization_url']),
+                'initialization segment')
+            for i, segment_url in enumerate(segment_urls):
+                append_url_to_file(
+                    outf, combine_url(base_url, segment_url),
+                    'segment %d / %d' % (i + 1, len(segment_urls)))
+
+        self.try_rename(tmpfilename, filename)
+
+        self._hook_progress({
+            'downloaded_bytes': self.byte_counter,
+            'total_bytes': self.byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
+        return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index b7f144af9..ceacb8522 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -6,6 +6,7 @@ import socket
 import time
 
 from .common import FileDownloader
+from .dash import DashSegmentsFD
 from ..compat import (
     compat_urllib_request,
     compat_urllib_error,
@@ -19,6 +20,9 @@ from ..utils import (
 
 class HttpFD(FileDownloader):
     def real_download(self, filename, info_dict):
+        if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))):
+            return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict)
+
         url = info_dict['url']
         tmpfilename = self.temp_name(filename)
         stream = None
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index aacb999ce..5d1297e0d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # TODO implement WebVTT downloading
                     pass
                 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
                     format_id = r.attrib['id']
                     video_url = url_el.text
                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         'filesize': filesize,
                         'fps': int_or_none(r.attrib.get('frameRate')),
                     }
+                    if segment_list:
+                        f.update({
+                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')]
+                        })
                     try:
                         existing_format = next(
                             fo for fo in formats
author	Yen Chi Hsuan <yan12125@gmail.com>	2015-06-03 23:10:18 +0800
committer	Yen Chi Hsuan <yan12125@gmail.com>	2015-06-03 23:10:18 +0800
commit	6800d3372f35e08dcc4d34d06601815bf0cb0a3d (patch)
tree	f619f599f021410b83367e80517de198f2556185
parent	8f9478412424b87e4fb77be53d239c13932b078a (diff)