diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/allocine.py | 89 | 
2 files changed, 90 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 24b046173..12cca5c2e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE  from .aftonbladet import AftonbladetIE  from .anitube import AnitubeIE  from .aol import AolIE +from .allocine import AllocineIE  from .aparat import AparatIE  from .appletrailers import AppleTrailersIE  from .archiveorg import ArchiveOrgIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..34f0cd49b --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_str, +    qualities, +    determine_ext, +) + + +class AllocineIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?' + +    _TESTS = [{ +        'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', +        'md5': '0c9fcf59a841f65635fa300ac43d8269', +        'info_dict': { +            'id': '19546517', +            'ext': 'mp4', +            'title': 'Astérix - Le Domaine des Dieux Teaser VF', +            'description': 'md5:4a754271d9c6f16c72629a8a993ee884', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }, { +        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', +        'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', +        'info_dict': { +            'id': '19540403', +            'ext': 'mp4', +            'title': 'Planes 2 Bande-annonce VF', +            'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }, { +        'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', +        'md5': '101250fb127ef9ca3d73186ff22a47ce', +        'info_dict': { +            'id': '19544709', +            'ext': 'mp4', +            'title': 'Dragons 2 - Bande annonce finale VF', +            'description': 'md5:e74a4dc750894bac300ece46c7036490', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        typ = mobj.group('typ') +        display_id = mobj.group('id') + +        webpage = self._download_webpage(url, display_id) + +        if typ == 'film': +            video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') +        else: +            player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + +            player_data = json.loads(player) +            video_id = compat_str(player_data['refMedia']) + +        xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + +        video = xml.find('.//AcVisionVideo').attrib +        quality = qualities(['ld', 'md', 'hd']) + +        formats = [] +        for k, v in video.items(): +            if re.match(r'.+_path', k): +                format_id = k.split('_')[0] +                formats.append({ +                    'format_id': format_id, +                    'quality': quality(format_id), +                    'url': v, +                    'ext': determine_ext(v), +                }) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video['videoTitle'], +            'thumbnail': self._og_search_thumbnail(webpage), +            'formats': formats, +            'description': self._og_search_description(webpage), +        } | 
