diff options
| -rw-r--r-- | youtube_dl/extractor/xhamster.py | 80 | 
1 files changed, 68 insertions, 12 deletions
| diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 76aeaf9a4..f73b9778f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -11,11 +11,14 @@ from ..utils import (      dict_get,      extract_attributes,      ExtractorError, +    float_or_none,      int_or_none,      parse_duration, +    str_or_none,      try_get,      unified_strdate,      url_or_none, +    urljoin,  ) @@ -146,36 +149,89 @@ class XHamsterIE(InfoExtractor):              video = initials['videoModel']              title = video['title']              formats = [] -            for format_id, formats_dict in video['sources'].items(): +            format_urls = set() +            format_sizes = {} +            sources = try_get(video, lambda x: x['sources'], dict) or {} +            for format_id, formats_dict in sources.items():                  if not isinstance(formats_dict, dict):                      continue +                download_sources = try_get(sources, lambda x: x['download'], dict) or {} +                for quality, format_dict in download_sources.items(): +                    if not isinstance(format_dict, dict): +                        continue +                    format_sizes[quality] = float_or_none(format_dict.get('size'))                  for quality, format_item in formats_dict.items():                      if format_id == 'download':                          # Download link takes some time to be generated,                          # skipping for now                          continue -                        if not isinstance(format_item, dict): -                            continue -                        format_url = format_item.get('link') -                        filesize = int_or_none( -                            format_item.get('size'), invscale=1000000) -                    else: -                        format_url = format_item -                        filesize = None +                    format_url = format_item                      format_url = url_or_none(format_url) -                    if not format_url: +                    if not format_url or format_url in format_urls:                          continue +                    format_urls.add(format_url)                      formats.append({                          'format_id': '%s-%s' % (format_id, quality),                          'url': format_url,                          'ext': determine_ext(format_url, 'mp4'),                          'height': get_height(quality), -                        'filesize': filesize, +                        'filesize': format_sizes.get(quality),                          'http_headers': {                              'Referer': urlh.geturl(),                          },                      }) -            self._sort_formats(formats) +            xplayer_sources = try_get( +                initials, lambda x: x['xplayerSettings']['sources'], dict) +            if xplayer_sources: +                hls_sources = xplayer_sources.get('hls') +                if isinstance(hls_sources, dict): +                    for hls_format_key in ('url', 'fallback'): +                        hls_url = hls_sources.get(hls_format_key) +                        if not hls_url: +                            continue +                        hls_url = urljoin(url, hls_url) +                        if not hls_url or hls_url in format_urls: +                            continue +                        format_urls.add(hls_url) +                        formats.extend(self._extract_m3u8_formats( +                            hls_url, video_id, 'mp4', entry_protocol='m3u8_native', +                            m3u8_id='hls', fatal=False)) +                standard_sources = xplayer_sources.get('standard') +                if isinstance(standard_sources, dict): +                    for format_id, formats_list in standard_sources.items(): +                        if not isinstance(formats_list, list): +                            continue +                        for standard_format in formats_list: +                            if not isinstance(standard_format, dict): +                                continue +                            for standard_format_key in ('url', 'fallback'): +                                standard_url = standard_format.get(standard_format_key) +                                if not standard_url: +                                    continue +                                standard_url = urljoin(url, standard_url) +                                if not standard_url or standard_url in format_urls: +                                    continue +                                format_urls.add(standard_url) +                                ext = determine_ext(standard_url, 'mp4') +                                if ext == 'm3u8': +                                    formats.extend(self._extract_m3u8_formats( +                                        standard_url, video_id, 'mp4', entry_protocol='m3u8_native', +                                        m3u8_id='hls', fatal=False)) +                                    continue +                                quality = (str_or_none(standard_format.get('quality')) +                                           or str_or_none(standard_format.get('label')) +                                           or '') +                                formats.append({ +                                    'format_id': '%s-%s' % (format_id, quality), +                                    'url': standard_url, +                                    'ext': ext, +                                    'height': get_height(quality), +                                    'filesize': format_sizes.get(quality), +                                    'http_headers': { +                                        'Referer': standard_url, +                                    }, +                                }) +            self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))              categories_list = video.get('categories')              if isinstance(categories_list, list): | 
