diff options
| author | Zach Bruggeman <mail@bruggie.com> | 2015-07-10 19:27:48 -0700 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-07-19 13:00:30 +0200 | 
| commit | 1dc31c2786b34f833acc5fc646afcf992f71444d (patch) | |
| tree | b956b80e740dc296a7bec2c912f981b3fca1b420 | |
| parent | 32470bf619d31605dc9c51ad107839a097f829f4 (diff) | |
[appleconnect] Add new extractor (fixes #6189)
Closes #6190.
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/appleconnect.py | 50 | 
2 files changed, 51 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d70de7690..5033d67ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -19,6 +19,7 @@ from .anysex import AnySexIE  from .aol import AolIE  from .allocine import AllocineIE  from .aparat import AparatIE +from .appleconnect import AppleConnectIE  from .appletrailers import AppleTrailersIE  from .archiveorg import ArchiveOrgIE  from .ard import ARDIE, ARDMediathekIE diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py new file mode 100644 index 000000000..ea7a70393 --- /dev/null +++ b/youtube_dl/extractor/appleconnect.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    str_to_int, +    ExtractorError +) + + +class AppleConnectIE(InfoExtractor): +    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' +    _TEST = { +        'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', +        'md5': '10d0f2799111df4cb1c924520ca78f98', +        'info_dict': { +            'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', +            'ext': 'm4v', +            'title': 'Energy', +            'uploader': 'Drake', +            'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg', +            'upload_date': '20150710', +            'timestamp': 1436545535, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        try: +            video_json = self._html_search_regex( +                r'class="auc-video-data">(\{.*?\})', webpage, 'json') +        except ExtractorError: +            raise ExtractorError('This post doesn\'t contain a video', expected=True) + +        video_data = self._parse_json(video_json, video_id) +        timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) +        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) + +        return { +            'id': video_id, +            'url': video_data['sslSrc'], +            'title': video_data['title'], +            'description': video_data['description'], +            'uploader': video_data['artistName'], +            'thumbnail': video_data['artworkUrl'], +            'timestamp': timestamp, +            'like_count': like_count, +        } | 
