diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-07-03 03:12:15 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-07-03 03:17:35 +0700 | 
| commit | 1b734adb2d3bb17af67b5fef933a6fafe71e4cf4 (patch) | |
| tree | 82533db334a26fc17bd3e3ce13dce1f2af272a6a | |
| parent | 9b724d7277716fc6f0a0cdc5cdc857ac0ea2642b (diff) | |
[xtube] Fix extraction (Closes #9953, closes #9961)
| -rw-r--r-- | youtube_dl/extractor/xtube.py | 49 | 
1 files changed, 33 insertions, 16 deletions
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 4075b8a4f..83bc1fef2 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -4,17 +4,23 @@ import itertools  import re  from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote  from ..utils import (      int_or_none,      orderedSet, +    parse_duration,      sanitized_Request,      str_to_int,  )  class XTubeIE(InfoExtractor): -    _VALID_URL = r'(?:xtube:|https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-))(?P<id>[^/?&#]+)' +    _VALID_URL = r'''(?x) +                        (?: +                            xtube:| +                            https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) +                        ) +                        (?P<id>[^/?&#]+) +                    '''      _TESTS = [{          # old URL schema @@ -27,6 +33,8 @@ class XTubeIE(InfoExtractor):              'description': 'contains:an ET kind of thing',              'uploader': 'greenshowers',              'duration': 450, +            'view_count': int, +            'comment_count': int,              'age_limit': 18,          }      }, { @@ -51,21 +59,30 @@ class XTubeIE(InfoExtractor):          req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')          webpage = self._download_webpage(req, display_id) -        flashvars = self._parse_json( -            self._search_regex( -                r'xt\.playerOps\s*=\s*({.+?});', webpage, 'player ops'), -            video_id)['flashvars'] - -        title = flashvars.get('title') or self._search_regex( -            r'<h1>([^<]+)</h1>', webpage, 'title') -        video_url = compat_urllib_parse_unquote(flashvars['video_url']) -        duration = int_or_none(flashvars.get('video_duration')) - -        uploader = self._search_regex( -            r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', -            webpage, 'uploader', fatal=False) +        sources = self._parse_json(self._search_regex( +            r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id) + +        formats = [] +        for format_id, format_url in sources.items(): +            formats.append({ +                'url': format_url, +                'format_id': format_id, +                'height': int_or_none(format_id), +            }) +        self._sort_formats(formats) + +        title = self._search_regex( +            (r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), +            webpage, 'title', group='title')          description = self._search_regex(              r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) +        uploader = self._search_regex( +            (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', +             r'<span[^>]+class="nickname"[^>]*>([^<]+)'), +            webpage, 'uploader', fatal=False) +        duration = parse_duration(self._search_regex( +            r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>', +            webpage, 'duration', fatal=False))          view_count = str_to_int(self._search_regex(              r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',              webpage, 'view count', fatal=False)) @@ -76,7 +93,6 @@ class XTubeIE(InfoExtractor):          return {              'id': video_id,              'display_id': display_id, -            'url': video_url,              'title': title,              'description': description,              'uploader': uploader, @@ -84,6 +100,7 @@ class XTubeIE(InfoExtractor):              'view_count': view_count,              'comment_count': comment_count,              'age_limit': 18, +            'formats': formats,          }  | 
