youtube_dl/extractor/ign.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

import re
import json

from .common import InfoExtractor
from ..utils import (
    determine_ext,
)

class IGNIE(InfoExtractor):
    _VALID_URL = r'http://www.ign.com/videos/.+/(?P<name>.+)'
    IE_NAME = u'ign.com'

    _TEST = {
        u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
        u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
        u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
        u'info_dict': {
            u'title': u'The Last of Us Review',
            u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        config_url = url + '.config'
        webpage = self._download_webpage(url, name)
        config = json.loads(self._download_webpage(config_url, name, u'Downloading video info'))

        self.report_extraction(name)
        description = self._html_search_regex(r'<span class="page-object-description">(.+?)</span>',
                                              webpage, 'video description', flags=re.DOTALL)
        media = config['playlist']['media']
        video_url = media['url']

        return {'id': media['metadata']['videoId'],
                'url': video_url,
                'ext': determine_ext(video_url),
                'title': media['metadata']['title'],
                'description': description,
                'thumbnail': media['poster'][0]['url'].replace('{size}', 'small'),
                }