[appleconnect] Add new extractor (fixes #6189)

Closes #6190.
author: Zach Bruggeman <mail@bruggie.com> 2015-07-10 19:27:48 -0700
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-07-19 13:00:30 +0200
commit: 1dc31c2786b34f833acc5fc646afcf992f71444d (patch)
tree: b956b80e740dc296a7bec2c912f981b3fca1b420 /youtube_dl
parent: 32470bf619d31605dc9c51ad107839a097f829f4 (diff)
2 files changed, 51 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d70de7690..5033d67ed 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -19,6 +19,7 @@ from .anysex import AnySexIE
 from .aol import AolIE
 from .allocine import AllocineIE
 from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE, ARDMediathekIE
diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py
new file mode 100644
index 000000000..ea7a70393
--- /dev/null
+++ b/youtube_dl/extractor/appleconnect.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    str_to_int,
+    ExtractorError
+)
+
+
+class AppleConnectIE(InfoExtractor):
+    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
+    _TEST = {
+        'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+        'md5': '10d0f2799111df4cb1c924520ca78f98',
+        'info_dict': {
+            'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+            'ext': 'm4v',
+            'title': 'Energy',
+            'uploader': 'Drake',
+            'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+            'upload_date': '20150710',
+            'timestamp': 1436545535,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        try:
+            video_json = self._html_search_regex(
+                r'class="auc-video-data">(\{.*?\})', webpage, 'json')
+        except ExtractorError:
+            raise ExtractorError('This post doesn\'t contain a video', expected=True)
+
+        video_data = self._parse_json(video_json, video_id)
+        timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
+        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+
+        return {
+            'id': video_id,
+            'url': video_data['sslSrc'],
+            'title': video_data['title'],
+            'description': video_data['description'],
+            'uploader': video_data['artistName'],
+            'thumbnail': video_data['artworkUrl'],
+            'timestamp': timestamp,
+            'like_count': like_count,
+        }
author	Zach Bruggeman <mail@bruggie.com>	2015-07-10 19:27:48 -0700
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-07-19 13:00:30 +0200
commit	1dc31c2786b34f833acc5fc646afcf992f71444d (patch)
tree	b956b80e740dc296a7bec2c912f981b3fca1b420 /youtube_dl
parent	32470bf619d31605dc9c51ad107839a097f829f4 (diff)