diff options
| -rw-r--r-- | youtube_dl/extractor/xhamster.py | 87 | 
1 files changed, 83 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index be3624ef2..52f8ded2f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -6,10 +6,12 @@ from .common import InfoExtractor  from ..compat import compat_str  from ..utils import (      clean_html, +    determine_ext,      dict_get,      ExtractorError,      int_or_none,      parse_duration, +    try_get,      unified_strdate,  ) @@ -32,6 +34,7 @@ class XHamsterIE(InfoExtractor):              'display_id': 'femaleagent_shy_beauty_takes_the_bait',              'ext': 'mp4',              'title': 'FemaleAgent Shy beauty takes the bait', +            'timestamp': 1350194821,              'upload_date': '20121014',              'uploader': 'Ruseful2011',              'duration': 893, @@ -45,6 +48,7 @@ class XHamsterIE(InfoExtractor):              'display_id': 'britney_spears_sexy_booty',              'ext': 'mp4',              'title': 'Britney Spears  Sexy Booty', +            'timestamp': 1379123460,              'upload_date': '20130914',              'uploader': 'jojo747400',              'duration': 200, @@ -61,6 +65,7 @@ class XHamsterIE(InfoExtractor):              'id': '5667973',              'ext': 'mp4',              'title': '....', +            'timestamp': 1454948101,              'upload_date': '20160208',              'uploader': 'parejafree',              'duration': 72, @@ -96,6 +101,83 @@ class XHamsterIE(InfoExtractor):          if error:              raise ExtractorError(error, expected=True) +        age_limit = self._rta_search(webpage) + +        def get_height(s): +            return int_or_none(self._search_regex( +                r'^(\d+)[pP]', s, 'height', default=None)) + +        initials = self._parse_json( +            self._search_regex( +                r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials', +                default='{}'), +            video_id, fatal=False) +        if initials: +            video = initials['videoModel'] +            title = video['title'] +            formats = [] +            for format_id, formats_dict in video['sources'].items(): +                if not isinstance(formats_dict, dict): +                    continue +                for quality, format_item in formats_dict.items(): +                    if format_id == 'download': +                        # Download link takes some time to be generated, +                        # skipping for now +                        continue +                        if not isinstance(format_item, dict): +                            continue +                        format_url = format_item.get('link') +                        filesize = int_or_none( +                            format_item.get('size'), invscale=1000000) +                    else: +                        format_url = format_item +                        filesize = None +                    if not isinstance(format_url, compat_str): +                        continue +                    formats.append({ +                        'format_id': '%s-%s' % (format_id, quality), +                        'url': format_url, +                        'ext': determine_ext(format_url, 'mp4'), +                        'height': get_height(quality), +                        'filesize': filesize, +                    }) +            self._sort_formats(formats) + +            categories_list = video.get('categories') +            if isinstance(categories_list, list): +                categories = [] +                for c in categories_list: +                    if not isinstance(c, dict): +                        continue +                    c_name = c.get('name') +                    if isinstance(c_name, compat_str): +                        categories.append(c_name) +            else: +                categories = None + +            return { +                'id': video_id, +                'display_id': display_id, +                'title': title, +                'description': video.get('description'), +                'timestamp': int_or_none(video.get('created')), +                'uploader': try_get( +                    video, lambda x: x['author']['name'], compat_str), +                'thumbnail': video.get('thumbURL'), +                'duration': int_or_none(video.get('duration')), +                'view_count': int_or_none(video.get('views')), +                'like_count': int_or_none(try_get( +                    video, lambda x: x['rating']['likes'], int)), +                'dislike_count': int_or_none(try_get( +                    video, lambda x: x['rating']['dislikes'], int)), +                'comment_count': int_or_none(video.get('views')), +                'age_limit': age_limit, +                'categories': categories, +                'formats': formats, +            } + +        # Old layout fallback +          title = self._html_search_regex(              [r'<h1[^>]*>([^<]+)</h1>',               r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"', @@ -119,8 +201,7 @@ class XHamsterIE(InfoExtractor):              formats.append({                  'format_id': format_id,                  'url': format_url, -                'height': int_or_none(self._search_regex( -                    r'^(\d+)[pP]', format_id, 'height', default=None)) +                'height': get_height(format_id),              })          video_url = self._search_regex( @@ -167,8 +248,6 @@ class XHamsterIE(InfoExtractor):          mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)          comment_count = mobj.group('commentcount') if mobj else 0 -        age_limit = self._rta_search(webpage) -          categories_html = self._search_regex(              r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,              'categories', default=None) | 
