diff options
| author | Remita Amine <remitamine@gmail.com> | 2020-12-26 09:37:40 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2020-12-26 09:37:40 +0100 | 
| commit | 4c7d7215cd069bd15239d5a43ecb764e60a49ce7 (patch) | |
| tree | cd170914c8f7847836010f467f7ed6a32f429c15 | |
| parent | 0370d9eb3dd3b6779cb88fe128fe870fdfb2ea58 (diff) | |
[theweatherchannel] fix extraction (closes #25930)(closes #26051)
| -rw-r--r-- | youtube_dl/extractor/theweatherchannel.py | 43 | 
1 files changed, 33 insertions, 10 deletions
diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py index c34a49d03..b2a8c3797 100644 --- a/youtube_dl/extractor/theweatherchannel.py +++ b/youtube_dl/extractor/theweatherchannel.py @@ -1,18 +1,22 @@  # coding: utf-8  from __future__ import unicode_literals +import json +import re +  from .theplatform import ThePlatformIE  from ..utils import (      determine_ext,      parse_duration, +    parse_iso8601,  )  class TheWeatherChannelIE(ThePlatformIE): -    _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' +    _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'      _TESTS = [{          'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', -        'md5': 'ab924ac9574e79689c24c6b95e957def', +        'md5': 'c4cbe74c9c17c5676b704b950b73dd92',          'info_dict': {              'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',              'ext': 'mp4', @@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):              'description': 'md5:55606ce1378d4c72e6545e160c9d9695',              'uploader': 'TWC - Digital (No Distro)',              'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', +            'upload_date': '20160720', +            'timestamp': 1469018835,          } +    }, { +        'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india', +        'only_matching': True,      }]      def _real_extract(self, url): -        display_id = self._match_id(url) -        webpage = self._download_webpage(url, display_id) -        drupal_settings = self._parse_json(self._search_regex( -            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', -            webpage, 'drupal settings'), display_id) -        video_id = drupal_settings['twc']['contexts']['node']['uuid'] -        video_data = self._download_json( -            'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) +        asset_name, locale, display_id = re.match(self._VALID_URL, url).groups() +        if not locale: +            locale = 'en-US' +        video_data = list(self._download_json( +            'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{ +                'name': 'getCMSAssetsUrlConfig', +                'params': { +                    'language': locale.replace('-', '_'), +                    'query': { +                        'assetName': { +                            '$in': asset_name, +                        }, +                    }, +                } +            }]).encode(), headers={ +                'Content-Type': 'application/json', +            })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0] +        video_id = video_data['id']          seo_meta = video_data.get('seometa', {})          title = video_data.get('title') or seo_meta['title'] @@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):                  })          self._sort_formats(formats) +        cc_url = video_data.get('cc_url') +          return {              'id': video_id,              'display_id': display_id, @@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):              'duration': parse_duration(video_data.get('duration')),              'uploader': video_data.get('providername'),              'uploader_id': video_data.get('providerid'), +            'timestamp': parse_iso8601(video_data.get('publishdate')), +            'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,              'thumbnails': thumbnails,              'formats': formats,          }  | 
