diff options
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/aol.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/dispeak.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/gdcvault.py | 100 | ||||
| -rw-r--r-- | youtube_dl/extractor/jwplatform.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/kaltura.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/linkedin.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/mgtv.py | 46 | ||||
| -rw-r--r-- | youtube_dl/extractor/stv.py | 94 | ||||
| -rw-r--r-- | youtube_dl/extractor/tiktok.py | 35 | ||||
| -rw-r--r-- | youtube_dl/extractor/yourporn.py | 9 | 
12 files changed, 258 insertions, 81 deletions
| @@ -700,7 +700,7 @@ Note that on Windows you may need to use double quotes instead of single.  # Download best mp4 format available or any other best if no mp4 available  $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' -# Download best format available but not better that 480p +# Download best format available but no better than 480p  $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'  # Download best video only format but no bigger than 50 MB diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..dffa9733d 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_parse_qs, +    compat_urllib_parse_urlparse, +)  from ..utils import (      ExtractorError,      int_or_none, @@ -12,12 +16,12 @@ from ..utils import (  class AolIE(InfoExtractor): -    IE_NAME = 'on.aol.com' -    _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)' +    IE_NAME = 'aol.com' +    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'      _TESTS = [{          # video with 5min ID -        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', +        'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',          'md5': '18ef68f48740e86ae94b98da815eec42',          'info_dict': {              'id': '518167793', @@ -34,7 +38,7 @@ class AolIE(InfoExtractor):          }      }, {          # video with vidible ID -        'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', +        'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',          'info_dict': {              'id': '5707d6b8e4b090497b04f706',              'ext': 'mp4', @@ -49,16 +53,16 @@ class AolIE(InfoExtractor):              'skip_download': True,          }      }, { -        'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', +        'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',          'only_matching': True,      }, { -        'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', +        'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',          'only_matching': True,      }, { -        'url': 'http://on.aol.com/video/519442220', +        'url': 'aol-video:5707d6b8e4b090497b04f706',          'only_matching': True,      }, { -        'url': 'aol-video:5707d6b8e4b090497b04f706', +        'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',          'only_matching': True,      }] @@ -73,7 +77,7 @@ class AolIE(InfoExtractor):          video_data = response['data']          formats = [] -        m3u8_url = video_data.get('videoMasterPlaylist') +        m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))          if m3u8_url:              formats.extend(self._extract_m3u8_formats(                  m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) @@ -96,6 +100,12 @@ class AolIE(InfoExtractor):                          'width': int(mobj.group(1)),                          'height': int(mobj.group(2)),                      }) +                else: +                    qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) +                    f.update({ +                        'width': int_or_none(qs.get('w', [None])[0]), +                        'height': int_or_none(qs.get('h', [None])[0]), +                    })                  formats.append(f)          self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c05f601e2..c345e0274 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor):              stream_name = xpath_text(a_format, 'streamName', fatal=True)              video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')              url = video_root + video_path -            vbr = xpath_text(a_format, 'bitrate') +            bitrate = xpath_text(a_format, 'bitrate') +            tbr = int_or_none(bitrate) +            vbr = int_or_none(self._search_regex( +                r'-(\d+)\.mp4', video_path, 'vbr', default=None)) +            abr = tbr - vbr if tbr and vbr else None              video_formats.append({ +                'format_id': bitrate,                  'url': url, -                'vbr': int_or_none(vbr), +                'tbr': tbr, +                'vbr': vbr, +                'abr': abr,              })          return video_formats diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41ab36213..cc19af5c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1093,6 +1093,7 @@ from .streamcloud import StreamcloudIE  from .streamcz import StreamCZIE  from .streetvoice import StreetVoiceIE  from .stretchinternet import StretchInternetIE +from .stv import STVPlayerIE  from .sunporno import SunPornoIE  from .svt import (      SVTIE, diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 8806dc48a..2f555c1d4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,22 +3,24 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from .kaltura import KalturaIE  from ..utils import (      HEADRequest,      sanitized_Request, +    smuggle_url,      urlencode_postdata,  )  class GDCVaultIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?' +    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'      _NETRC_MACHINE = 'gdcvault'      _TESTS = [          {              'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',              'md5': '7ce8388f544c88b7ac11c7ab1b593704',              'info_dict': { -                'id': '1019721', +                'id': '201311826596_AWNY',                  'display_id': 'Doki-Doki-Universe-Sweet-Simple',                  'ext': 'mp4',                  'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' @@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor):          {              'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',              'info_dict': { -                'id': '1015683', +                'id': '201203272_1330951438328RSXR',                  'display_id': 'Embracing-the-Dark-Art-of',                  'ext': 'flv',                  'title': 'Embracing the Dark Art of Mathematical Modeling in AI' @@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor):              'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',              'md5': 'a8efb6c31ed06ca8739294960b2dbabd',              'info_dict': { -                'id': '1023460', +                'id': '840376_BQRC',                  'ext': 'mp4',                  'display_id': 'Tenacious-Design-and-The-Interface',                  'title': 'Tenacious Design and The Interface of \'Destiny\'', @@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor):              # Multiple audios              'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',              'info_dict': { -                'id': '1014631', -                'ext': 'flv', +                'id': '12396_1299111843500GMPX', +                'ext': 'mp4',                  'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',              }, -            'params': { -                'skip_download': True,  # Requires rtmpdump -                'format': 'jp',  # The japanese audio -            } +            # 'params': { +            #     'skip_download': True,  # Requires rtmpdump +            #     'format': 'jp',  # The japanese audio +            # }          },          {              # gdc-player.html              'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',              'info_dict': { -                'id': '1435', +                'id': '9350_1238021887562UHXB',                  'display_id': 'An-American-engine-in-Tokyo', -                'ext': 'flv', +                'ext': 'mp4',                  'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',              }, +        }, +        { +            # Kaltura Embed +            'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', +            'info_dict': { +                'id': '0_h1fg8j3p', +                'ext': 'mp4', +                'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', +                'timestamp': 1554401811, +                'upload_date': '20190404', +                'uploader_id': 'joe@blazestreaming.com', +            },              'params': { -                'skip_download': True,  # Requires rtmpdump +                'format': 'mp4-408',              },          },      ] @@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor):          return start_page      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) - -        video_id = mobj.group('id') -        display_id = mobj.group('name') or video_id +        video_id, name = re.match(self._VALID_URL, url).groups() +        display_id = name or video_id          webpage_url = 'http://www.gdcvault.com/play/' + video_id          start_page = self._download_webpage(webpage_url, display_id) @@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor):              start_page, 'url', default=None)          if direct_url:              title = self._html_search_regex( -                r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>', +                r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',                  start_page, 'title')              video_url = 'http://www.gdcvault.com' + direct_url              # resolve the url so that we can detect the correct extension -            head = self._request_webpage(HEADRequest(video_url), video_id) -            video_url = head.geturl() +            video_url = self._request_webpage( +                HEADRequest(video_url), video_id).geturl()              return {                  'id': video_id, @@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor):                  'title': title,              } -        PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>' - -        xml_root = self._html_search_regex( -            PLAYER_REGEX, start_page, 'xml root', default=None) -        if xml_root is None: -            # Probably need to authenticate -            login_res = self._login(webpage_url, display_id) -            if login_res is None: -                self.report_warning('Could not login.') -            else: -                start_page = login_res -                # Grab the url from the authenticated page -                xml_root = self._html_search_regex( -                    PLAYER_REGEX, start_page, 'xml root') - -        xml_name = self._html_search_regex( -            r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>', -            start_page, 'xml filename', default=None) -        if xml_name is None: -            # Fallback to the older format +        embed_url = KalturaIE._extract_url(start_page) +        if embed_url: +            embed_url = smuggle_url(embed_url, {'source_url': url}) +            ie_key = 'Kaltura' +        else: +            PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>' + +            xml_root = self._html_search_regex( +                PLAYER_REGEX, start_page, 'xml root', default=None) +            if xml_root is None: +                # Probably need to authenticate +                login_res = self._login(webpage_url, display_id) +                if login_res is None: +                    self.report_warning('Could not login.') +                else: +                    start_page = login_res +                    # Grab the url from the authenticated page +                    xml_root = self._html_search_regex( +                        PLAYER_REGEX, start_page, 'xml root') +              xml_name = self._html_search_regex( -                r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', +                r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',                  start_page, 'xml filename') +            embed_url = '%s/xml/%s' % (xml_root, xml_name) +            ie_key = 'DigitallySpeaking'          return {              '_type': 'url_transparent',              'id': video_id,              'display_id': display_id, -            'url': '%s/xml/%s' % (xml_root, xml_name), -            'ie_key': 'DigitallySpeaking', +            'url': embed_url, +            'ie_key': ie_key,          } diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index d19a6a774..647b905f1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor  class JWPlatformIE(InfoExtractor): -    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' +    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'      _TESTS = [{          'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',          'md5': 'fa8899fa601eb7c83a64e9d568bdf325', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index fdf7f5bbc..79162f665 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor):          )          if mobj:              embed_info = mobj.groupdict() +            for k, v in embed_info.items(): +                embed_info[k] = v.strip()              url = 'kaltura:%(partner_id)s:%(id)s' % embed_info              escaped_pid = re.escape(embed_info['partner_id'])              service_url = re.search( diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 5a86b0064..26fc703d1 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -9,11 +9,13 @@ from ..utils import (      float_or_none,      int_or_none,      urlencode_postdata, +    urljoin,  )  class LinkedInLearningBaseIE(InfoExtractor):      _NETRC_MACHINE = 'linkedin' +    _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'      def _call_api(self, course_slug, fields, video_slug=None, resolution=None):          query = { @@ -50,11 +52,10 @@ class LinkedInLearningBaseIE(InfoExtractor):              return          login_page = self._download_webpage( -            'https://www.linkedin.com/uas/login?trk=learning', -            None, 'Downloading login page') -        action_url = self._search_regex( +            self._LOGIN_URL, None, 'Downloading login page') +        action_url = urljoin(self._LOGIN_URL, self._search_regex(              r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url', -            default='https://www.linkedin.com/uas/login-submit', group='url') +            default='https://www.linkedin.com/uas/login-submit', group='url'))          data = self._hidden_inputs(login_page)          data.update({              'session_key': email, diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index d53d96aae..84137df50 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -1,22 +1,32 @@  # coding: utf-8  from __future__ import unicode_literals +import base64 +import time +import uuid +  from .common import InfoExtractor -from ..compat import compat_str -from ..utils import int_or_none +from ..compat import ( +    compat_HTTPError, +    compat_str, +) +from ..utils import ( +    ExtractorError, +    int_or_none, +)  class MGTVIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'      IE_DESC = '芒果TV' +    _GEO_COUNTRIES = ['CN']      _TESTS = [{          'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', -        'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',          'info_dict': {              'id': '3116640',              'ext': 'mp4', -            'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗', +            'title': '我是歌手 第四季',              'description': '我是歌手第四季双年巅峰会',              'duration': 7461,              'thumbnail': r're:^https?://.*\.jpg$', @@ -28,16 +38,30 @@ class MGTVIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -        api_data = self._download_json( -            'http://pcweb.api.mgtv.com/player/video', video_id, -            query={'video_id': video_id}, -            headers=self.geo_verification_headers())['data'] +        try: +            api_data = self._download_json( +                'https://pcweb.api.mgtv.com/player/video', video_id, query={ +                    'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1], +                    'video_id': video_id, +                }, headers=self.geo_verification_headers())['data'] +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: +                error = self._parse_json(e.cause.read().decode(), None) +                if error.get('code') == 40005: +                    self.raise_geo_restricted(countries=self._GEO_COUNTRIES) +                raise ExtractorError(error['msg'], expected=True) +            raise          info = api_data['info']          title = info['title'].strip() -        stream_domain = api_data['stream_domain'][0] +        stream_data = self._download_json( +            'https://pcweb.api.mgtv.com/player/getSource', video_id, query={ +                'pm2': api_data['atc']['pm2'], +                'video_id': video_id, +            }, headers=self.geo_verification_headers())['data'] +        stream_domain = stream_data['stream_domain'][0]          formats = [] -        for idx, stream in enumerate(api_data['stream']): +        for idx, stream in enumerate(stream_data['stream']):              stream_path = stream.get('url')              if not stream_path:                  continue @@ -47,7 +71,7 @@ class MGTVIE(InfoExtractor):              format_url = format_data.get('info')              if not format_url:                  continue -            tbr = int_or_none(self._search_regex( +            tbr = int_or_none(stream.get('filebitrate') or self._search_regex(                  r'_(\d+)_mp4/', format_url, 'tbr', default=None))              formats.append({                  'format_id': compat_str(tbr or idx), diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( +    compat_parse_qs, +    compat_urllib_parse_urlparse +) +from ..utils import ( +    extract_attributes, +    float_or_none, +    int_or_none, +    str_or_none, +) + + +class STVPlayerIE(InfoExtractor): +    IE_NAME = 'stv:player' +    _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' +    _TEST = { +        'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', +        'md5': '2ad867d4afd641fa14187596e0fbc91b', +        'info_dict': { +            'id': '6016487034001', +            'ext': 'mp4', +            'upload_date': '20190321', +            'title': 'Interview with the cast ahead of new Victoria', +            'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', +            'timestamp': 1553179628, +            'uploader_id': '1486976045', +        }, +        'skip': 'this resource is unavailable outside of the UK', +    } +    _PUBLISHER_ID = '1486976045' +    _PTYPE_MAP = { +        'episode': 'episodes', +        'video': 'shortform', +    } + +    def _real_extract(self, url): +        ptype, video_id = re.match(self._VALID_URL, url).groups() +        webpage = self._download_webpage(url, video_id) + +        qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( +            r'itemprop="embedURL"[^>]+href="([^"]+)', +            webpage, 'embed URL', default=None)).query) +        publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + +        player_attr = extract_attributes(self._search_regex( +            r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + +        info = {} +        duration = ref_id = series = video_id = None +        api_ref_id = player_attr.get('data-player-api-refid') +        if api_ref_id: +            resp = self._download_json( +                'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), +                api_ref_id, fatal=False) +            if resp: +                result = resp.get('results') or {} +                video = result.get('video') or {} +                video_id = str_or_none(video.get('id')) +                ref_id = video.get('guid') +                duration = video.get('length') +                programme = result.get('programme') or {} +                series = programme.get('name') or programme.get('shortName') +                subtitles = {} +                _subtitles = result.get('_subtitles') or {} +                for ext, sub_url in _subtitles.items(): +                    subtitles.setdefault('en', []).append({ +                        'ext': 'vtt' if ext == 'webvtt' else ext, +                        'url': sub_url, +                    }) +                info.update({ +                    'description': result.get('summary'), +                    'subtitles': subtitles, +                    'view_count': int_or_none(result.get('views')), +                }) +        if not video_id: +            video_id = qs.get('videoId', [None])[0] or self._search_regex( +                r'<link\s+itemprop="url"\s+href="(\d+)"', +                webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid']) + +        info.update({ +            '_type': 'url_transparent', +            'duration': float_or_none(duration or player_attr.get('data-duration'), 1000), +            'id': video_id, +            'ie_key': 'BrightcoveNew', +            'series': series or player_attr.get('data-programme-name'), +            'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id), +        }) +        return info diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 083e9f36d..66088b9ab 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor):  class TikTokIE(TikTokBaseIE): -    _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)' -    _TEST = { +    _VALID_URL = r'''(?x) +                        https?:// +                            (?: +                                (?:m\.)?tiktok\.com/v| +                                (?:www\.)?tiktok\.com/share/video +                            ) +                            /(?P<id>\d+) +                    ''' +    _TESTS = [{          'url': 'https://m.tiktok.com/v/6606727368545406213.html',          'md5': 'd584b572e92fcd48888051f238022420',          'info_dict': { @@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE):              'comment_count': int,              'repost_count': int,          } -    } +    }, { +        'url': 'https://www.tiktok.com/share/video/6606727368545406213', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) +        webpage = self._download_webpage( +            'https://m.tiktok.com/v/%s.html' % video_id, video_id)          data = self._parse_json(self._search_regex(              r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)          return self._extract_aweme(data)  class TikTokUserIE(TikTokBaseIE): -    _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)' -    _TEST = { +    _VALID_URL = r'''(?x) +                        https?:// +                            (?: +                                (?:m\.)?tiktok\.com/h5/share/usr| +                                (?:www\.)?tiktok\.com/share/user +                            ) +                            /(?P<id>\d+) +                    ''' +    _TESTS = [{          'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',          'info_dict': {              'id': '188294915489964032',          },          'playlist_mincount': 24, -    } +    }, { +        'url': 'https://www.tiktok.com/share/user/188294915489964032', +        'only_matching': True, +    }]      def _real_extract(self, url):          user_id = self._match_id(url) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 2c63f9752..b1d1eb6b6 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -8,8 +8,8 @@ from ..utils import (  class YourPornIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P<id>[^/?#&.]+)' +    _TESTS = [{          'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',          'md5': '6f8682b6464033d87acaa7a8ff0c092e',          'info_dict': { @@ -23,7 +23,10 @@ class YourPornIE(InfoExtractor):          'params': {              'skip_download': True,          }, -    } +    }, { +        'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) | 
