diff options
| -rw-r--r-- | youtube_dl/extractor/xvideos.py | 42 | 
1 files changed, 31 insertions, 11 deletions
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index eca603028..085c8d4f3 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -14,8 +14,16 @@ from ..utils import (  class XVideosIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)' -    _TEST = { +    _VALID_URL = r'''(?x) +                    https?:// +                        (?: +                            (?:www\.)?xvideos\.com/video| +                            flashservice\.xvideos\.com/embedframe/| +                            static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= +                        ) +                        (?P<id>[0-9]+) +                    ''' +    _TESTS = [{          'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',          'md5': '14cea69fcb84db54293b1e971466c2e1',          'info_dict': { @@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor):              'duration': 108,              'age_limit': 18,          } -    } +    }, { +        'url': 'https://flashservice.xvideos.com/embedframe/4588838', +        'only_matching': True, +    }, { +        'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) + +        webpage = self._download_webpage( +            'http://www.xvideos.com/video%s/' % video_id, video_id)          mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)          if mobj:              raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) -        video_title = self._html_search_regex( -            r'<title>(.*?)\s+-\s+XVID', webpage, 'title') -        video_thumbnail = self._search_regex( +        title = self._html_search_regex( +            (r'<title>(?P<title>.+?)\s+-\s+XVID', +             r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), +            webpage, 'title', default=None, +            group='title') or self._og_search_title(webpage) + +        thumbnail = self._search_regex(              r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) -        video_duration = int_or_none(self._og_search_property( +        duration = int_or_none(self._og_search_property(              'duration', webpage, default=None)) or parse_duration(              self._search_regex(                  r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)', @@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor):          return {              'id': video_id,              'formats': formats, -            'title': video_title, -            'duration': video_duration, -            'thumbnail': video_thumbnail, +            'title': title, +            'duration': duration, +            'thumbnail': thumbnail,              'age_limit': 18,          }  | 
