diff options
| author | remitamine <remitamine@gmail.com> | 2016-02-10 22:16:21 +0100 | 
|---|---|---|
| committer | remitamine <remitamine@gmail.com> | 2016-02-10 22:16:21 +0100 | 
| commit | 80f772c28a3277376620ed7f50308e12437e358d (patch) | |
| tree | 98904acd87d0c1490be3380b48dc2eb16a9e6336 | |
| parent | f817d9bec180a932541564bdb9baf3db43e47faa (diff) | |
[crackle] Add new extractor
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/comcarcoff.py | 16 | ||||
| -rw-r--r-- | youtube_dl/extractor/crackle.py | 92 | 
3 files changed, 102 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 18951c287..f08f27480 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -126,6 +126,7 @@ from .comcarcoff import ComCarCoffIE  from .commonmistakes import CommonMistakesIE, UnicodeBOMIE  from .condenast import CondeNastIE  from .cracked import CrackedIE +from .crackle import CrackleIE  from .criterion import CriterionIE  from .crooksandliars import CrooksAndLiarsIE  from .crunchyroll import ( diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 2efa200b5..7dff68492 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -2,6 +2,7 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..compat import compat_str  from ..utils import (      int_or_none,      parse_duration, @@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):      _TESTS = [{          'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',          'info_dict': { -            'id': 'miranda-sings-happy-thanksgiving-miranda', +            'id': '2494164',              'ext': 'mp4',              'upload_date': '20141127',              'timestamp': 1417107600,              'duration': 1232,              'title': 'Happy Thanksgiving Miranda',              'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', -            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',          },          'params': {              'skip_download': 'requires ffmpeg', @@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):                  r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),              display_id)['videoData'] -        video_id = full_data['activeVideo']['video'] -        video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] +        display_id = full_data['activeVideo']['video'] +        video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] +        video_id = compat_str(video_data['mediaId'])          thumbnails = [{              'url': video_data['images']['thumb'],          }, {              'url': video_data['images']['poster'],          }] -        formats = self._extract_m3u8_formats( -            video_data['mediaUrl'], video_id, ext='mp4')          timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(              video_data.get('pubDate')) @@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):              video_data.get('duration'))          return { +            '_type': 'url_transparent', +            'url': 'crackle:%s' % video_id,              'id': video_id,              'display_id': display_id,              'title': video_data['title'], @@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):              'timestamp': timestamp,              'duration': duration,              'thumbnails': thumbnails, -            'formats': formats, +            'season_number': int_or_none(video_data.get('season')), +            'episode_number': int_or_none(video_data.get('episode')),              'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),          } diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py new file mode 100644 index 000000000..a478333a2 --- /dev/null +++ b/youtube_dl/extractor/crackle.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class CrackleIE(InfoExtractor): +    _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' +    _TEST = { +        'url': 'http://www.crackle.com/the-art-of-more/2496419', +        'info_dict': { +            'id': '2496419', +            'ext': 'mp4', +            'title': 'Heavy Lies the Head', +            'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        } +    } + +    # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx +    _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' +    _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' +    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' + +    # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx +    _MEDIA_FILE_SLOTS = { +        'c544.flv': { +            'width': 544, +            'height': 306, +        }, +        '360p.mp4': { +            'width': 640, +            'height': 360, +        }, +        '480p.mp4': { +            'width': 852, +            'height': 478, +        }, +        '480p_1mbps.mp4': { +            'width': 852, +            'height': 478, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        item = self._download_xml( +            'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i') +        title = item.attrib['t'] + +        thumbnail = None +        subtitles = {} +        formats = self._extract_m3u8_formats('http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), video_id, 'mp4', fatal=None) +        path = item.attrib.get('p') +        if path: +            thumbnail = self._THUMBNAIL_TEMPLATE % path +            http_base_url = 'http://ahttp.crackle.com/' + path +            for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): +                formats.append({ +                    'url': http_base_url + mfs_path, +                    'format_id': mfs_path.split('.')[0], +                    'width': mfs_info['width'], +                    'height': mfs_info['height'], +                }) +            for cc in item.findall('cc'): +                locale = cc.attrib.get('l') +                v = cc.attrib.get('v') +                if locale and v: +                    if locale not in subtitles: +                        subtitles[locale] = [] +                    subtitles[locale] = [{ +                        'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), +                        'ext': 'ttml', +                    }] +        self._sort_formats(formats, ('width', 'height', 'tbr')) + +        return { +            'id': video_id, +            'title': title, +            'description': item.attrib.get('d'), +            'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, +            'series': item.attrib.get('sn'), +            'season_number': int_or_none(item.attrib.get('se')), +            'episode_number': int_or_none(item.attrib.get('ep')), +            'thumbnail': thumbnail, +            'subtitles': subtitles, +            'formats': formats, +        } | 
