diff options
Diffstat (limited to 'youtube_dl/extractor/la7.py')
| -rw-r--r-- | youtube_dl/extractor/la7.py | 94 | 
1 files changed, 54 insertions, 40 deletions
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index b08f6e3c9..826dc9476 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -1,60 +1,74 @@ +# coding: utf-8  from __future__ import unicode_literals  from .common import InfoExtractor  from ..utils import ( -    parse_duration, +    determine_ext, +    js_to_json,  )  class LA7IE(InfoExtractor): -    IE_NAME = 'la7.tv' -    _VALID_URL = r'''(?x) -        https?://(?:www\.)?la7\.tv/ -        (?: -            richplayer/\?assetid=| -            \?contentId= -        ) -        (?P<id>[0-9]+)''' - -    _TEST = { -        'url': 'http://www.la7.tv/richplayer/?assetid=50355319', -        'md5': 'ec7d1f0224d20ba293ab56cf2259651f', +    IE_NAME = 'la7.it' +    _VALID_URL = r'''(?x)(https?://)?(?: +        (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/| +        tg\.la7\.it/repliche-tgla7\?id= +    )(?P<id>.+)''' + +    _TESTS = [{ +        # 'src' is a plain URL +        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', +        'md5': '6054674766e7988d3e02f2148ff92180', +        'info_dict': { +            'id': 'inccool8-02-10-2015-163722', +            'ext': 'mp4', +            'title': 'Inc.Cool8', +            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico', +            'thumbnail': 're:^https?://.*', +        }, +    }, { +        # 'src' is a dictionary +        'url': 'http://tg.la7.it/repliche-tgla7?id=189080', +        'md5': '6b0d8888d286e39870208dfeceaf456b',          'info_dict': { -            'id': '50355319', +            'id': '189080',              'ext': 'mp4', -            'title': 'IL DIVO', -            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci', -            'duration': 6254, +            'title': 'TG LA7',          }, -        'skip': 'Blocked in the US', -    } +    }, { +        'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) -        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id -        doc = self._download_xml(xml_url, video_id) - -        video_title = doc.find('title').text -        description = doc.find('description').text -        duration = parse_duration(doc.find('duration').text) -        thumbnail = doc.find('img').text -        view_count = int(doc.find('views').text) - -        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') - -        formats = [{ -            'format': vnode.find('quality').text, -            'tbr': int(vnode.find('quality').text), -            'url': vnode.find('fms').text.strip().replace('mp4:', prefix), -        } for vnode in doc.findall('.//videos/video')] + +        webpage = self._download_webpage(url, video_id) + +        player_data = self._parse_json( +            self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'), +            video_id, transform_source=js_to_json) + +        source = player_data['src'] +        source_urls = source.values() if isinstance(source, dict) else [source] + +        formats = [] +        for source_url in source_urls: +            ext = determine_ext(source_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    source_url, video_id, ext='mp4', +                    entry_protocol='m3u8_native', m3u8_id='hls')) +            else: +                formats.append({ +                    'url': source_url, +                })          self._sort_formats(formats)          return {              'id': video_id, -            'title': video_title, -            'description': description, -            'thumbnail': thumbnail, -            'duration': duration, +            'title': player_data['title'], +            'description': self._og_search_description(webpage, default=None), +            'thumbnail': player_data.get('poster'),              'formats': formats, -            'view_count': view_count,          }  | 
