diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-07 23:07:50 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-07 23:07:50 +0800 | 
| commit | 84b91dd4e3cd45498fb6cd3c7aa611816f61a3d5 (patch) | |
| tree | 9496e6d65c8a1c4cce64ec5765d85c12fd9bbecb /youtube_dl/extractor/gamestar.py | |
| parent | 92c9c2a88b8bc6e19b6fc9ac5e3814aa75c437e9 (diff) | |
[gamestar] Fix metadata extraction (closes #10479)
Diffstat (limited to 'youtube_dl/extractor/gamestar.py')
| -rw-r--r-- | youtube_dl/extractor/gamestar.py | 51 | 
1 files changed, 17 insertions, 34 deletions
| diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 69058a583..341e72733 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -1,14 +1,10 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      int_or_none, -    parse_duration, -    str_to_int, -    unified_strdate, +    remove_end,  ) @@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor):              'id': '76110',              'ext': 'mp4',              'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', -            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.', -            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg', +            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', +            'thumbnail': 're:^https?://.*\.jpg$', +            'timestamp': 1406542020,              'upload_date': '20140728',              'duration': 17          } @@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        og_title = self._og_search_title(webpage) -        title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title) -          url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id -        description = self._og_search_description(webpage).strip() - -        thumbnail = self._proto_relative_url( -            self._og_search_thumbnail(webpage), scheme='http:') - -        upload_date = unified_strdate(self._html_search_regex( -            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)  ', -            webpage, 'upload_date', fatal=False)) - -        duration = parse_duration(self._html_search_regex( -            r'  Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration', -            fatal=False)) - -        view_count = str_to_int(self._html_search_regex( -            r'  Zuschauer: ([0-9\.]+)  ', webpage, -            'view_count', fatal=False)) +        # TODO: there are multiple ld+json objects in the webpage, +        # while _search_json_ld finds only the first one +        json_ld = self._parse_json(self._search_regex( +            r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>', +            webpage, 'JSON-LD', group='json_ld'), video_id) +        info_dict = self._json_ld(json_ld, video_id) +        info_dict['title'] = remove_end(info_dict['title'], ' - GameStar') +        view_count = json_ld.get('interactionCount')          comment_count = int_or_none(self._html_search_regex( -            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count', +            r'([0-9]+) Kommentare</span>', webpage, 'comment_count',              fatal=False)) -        return { +        info_dict.update({              'id': video_id, -            'title': title,              'url': url,              'ext': 'mp4', -            'thumbnail': thumbnail, -            'description': description, -            'upload_date': upload_date, -            'duration': duration,              'view_count': view_count,              'comment_count': comment_count -        } +        }) + +        return info_dict | 
