diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-11-24 20:32:17 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-11-24 20:32:17 +0800 | 
| commit | 44444f0d3ba8e448cc824d7722d865794fb6d5d3 (patch) | |
| tree | 3efee5d6499b668842bc0cb1f78f6184c81a580c | |
| parent | c867adc68c5dda0fafb2535c1a02ea32549b9d10 (diff) | |
[cbslocal] Support newyork.cbslocal.com
Closes #11285
| -rw-r--r-- | ChangeLog | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/cbslocal.py | 39 | 
2 files changed, 41 insertions, 4 deletions
@@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [cbslocal] Recognize New York site (#11285) + +  version 2016.11.22  Extractors diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 289709c97..8d5f11dd1 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals  from .anvato import AnvatoIE  from .sendtonews import SendtoNewsIE  from ..compat import compat_urlparse -from ..utils import unified_timestamp +from ..utils import ( +    parse_iso8601, +    unified_timestamp, +)  class CBSLocalIE(AnvatoIE): -    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' +    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'      _TESTS = [{          # Anvato backend @@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE):              # m3u8 download              'skip_download': True,          }, +    }, { +        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', +        'info_dict': { +            'id': '3580809', +            'ext': 'mp4', +            'title': 'A Very Blue Anniversary', +            'description': 'CBS2’s Cindy Hsu has more.', +            'thumbnail': 're:^https?://.*', +            'timestamp': 1479962220, +            'upload_date': '20161124', +            'uploader': 'CBS', +            'subtitles': { +                'en': 'mincount:5', +            }, +            'categories': [ +                'Stations\\Spoken Word\\WCBSTV', +                'Syndication\\AOL', +                'Syndication\\MSN', +                'Syndication\\NDN', +                'Syndication\\Yahoo', +                'Content\\News', +                'Content\\News\\Local News', +            ], +            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], +        },      }]      def _real_extract(self, url): @@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE):          info_dict = self._extract_anvato_videos(webpage, display_id)          time_str = self._html_search_regex( -            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) -        timestamp = unified_timestamp(time_str) +            r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) +        if time_str: +            timestamp = unified_timestamp(time_str) +        else: +            timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))          info_dict.update({              'display_id': display_id,  | 
