diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-09-04 22:06:50 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-09-04 22:06:50 +0200 |
commit | 150f20828be552763dddce1c45b9a4e642cff599 (patch) | |
tree | cfba8bf6406a8161481a86653e138346f91d4315 /youtube_dl/extractor/daum.py | |
parent | 08523ee20a57e7ac28d895165f3b759b311e8495 (diff) |
Add extractor for daum.net (closes #1330)
Diffstat (limited to 'youtube_dl/extractor/daum.py')
-rw-r--r-- | youtube_dl/extractor/daum.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py new file mode 100644 index 000000000..9b4566999 --- /dev/null +++ b/youtube_dl/extractor/daum.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + determine_ext, +) + + +class DaumIE(InfoExtractor): + _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' + IE_NAME = u'daum.net' + + _TEST = { + u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', + u'file': u'52554690.mp4', + u'info_dict': { + u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', + u'upload_date': u'20130831', + u'duration': 3868, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', + webpage, u'full id') + query = compat_urllib_parse.urlencode({'vid': full_id}) + info_xml = self._download_webpage( + 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, + u'Downloading video info') + urls_xml = self._download_webpage( + 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, + video_id, u'Downloading video formats info') + info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) + urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) + + self.to_screen(u'%s: Getting video urls' % video_id) + formats = [] + for format_el in urls.findall('result/output_list/output_list'): + profile = format_el.attrib['profile'] + format_query = compat_urllib_parse.urlencode({ + 'vid': full_id, + 'profile': profile, + }) + url_xml = self._download_webpage( + 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, + video_id, note=False) + url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8')) + format_url = url_doc.find('result/url').text + formats.append({ + 'url': format_url, + 'ext': determine_ext(format_url), + 'format_id': profile, + }) + + info = { + 'id': video_id, + 'title': info.find('TITLE').text, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': int(info.find('DURATION').text), + 'upload_date': info.find('REGDTTM').text[:8], + } + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + return info |