[afreecatv] Add new extractor for afreecatv.com VODs

author: Peter Rowlands <peter@pmrowla.com> 2016-05-05 03:11:04 +0900
committer: Peter Rowlands <peter@pmrowla.com> 2016-05-05 03:59:23 +0900
commit: 57cf9b7f0689ed7b643ce863427c3211e407e5bf (patch)
tree: 3422b12e452066251ae696d3840ff6a1327dada2
parent: bc7e77a04be6094e64263f9c622cff3cd1fc13cb (diff)
2 files changed, 85 insertions, 0 deletions
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
new file mode 100644
index 000000000..d57546e49
--- /dev/null
+++ b/youtube_dl/extractor/afreecatv.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class AfreecaTVIE(InfoExtractor):
+    IE_DESC = 'afreecatv.com'
+    _VALID_URL = r'''(?x)^
+        https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+        (?:
+            /app/(?:index|read_ucc_bbs)\.cgi|
+            /player/[Pp]layer\.(?:swf|html))
+        \?.*?\bnTitleNo=(?P<id>\d+)'''
+    _TEST = {
+        'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
+        'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
+        'info_dict': {
+            'id': '36164052',
+            'ext': 'mp4',
+            'title': '데일리 에이프릴 요정들의 시상식!',
+            'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$',
+            'uploader': 'dailyapril',
+            'uploader_id': 'dailyapril',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        parsed_url = compat_urllib_parse_urlparse(url)
+        info_url = compat_urlparse.urlunparse(parsed_url._replace(
+            netloc='afbbs.afreecatv.com:8080',
+            path='/api/video/get_video_info.php'))
+        video_xml = self._download_xml(info_url, video_id)
+
+        track = video_xml.find('track')
+        if track.find('flag').text != 'SUCCEED':
+            raise ExtractorError('Specified AfreecaTV video does not exist',
+                                 expected=True)
+        title = track.find('title').text
+        uploader = track.find('nickname').text
+        uploader_id = track.find('bj_id').text
+        duration = int_or_none(track.find('duration').text)
+        thumbnail = track.find('titleImage').text
+
+        entries = []
+        for video in track.findall('video'):
+            for video_file in video.findall('file'):
+                entries.append({
+                    'id': video_file.get('key'),
+                    'title': title,
+                    'duration': int_or_none(video_file.get('duration')),
+                    'formats': [{'url': video_file.text}]
+                })
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
+
+        if len(entries) > 1:
+            info['_type'] = 'multi_video'
+            info['entries'] = entries
+        elif len(entries) == 1:
+            info['formats'] = entries[0]['formats']
+        else:
+            raise ExtractorError(
+                'No files found for the specified AfreecaTV video, either'
+                ' the URL is incorrect or the video has been made private.',
+                expected=True)
+
+        return info
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index ef4431364..f85d75933 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -16,6 +16,7 @@ from .adobetv import (
     AdobeTVVideoIE,
 )
 from .adultswim import AdultSwimIE
+from .afreecatv import AfreecaTVIE
 from .aenetworks import AENetworksIE
 from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
author	Peter Rowlands <peter@pmrowla.com>	2016-05-05 03:11:04 +0900
committer	Peter Rowlands <peter@pmrowla.com>	2016-05-05 03:59:23 +0900
commit	57cf9b7f0689ed7b643ce863427c3211e407e5bf (patch)
tree	3422b12e452066251ae696d3840ff6a1327dada2
parent	bc7e77a04be6094e64263f9c622cff3cd1fc13cb (diff)