diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-06-30 20:52:26 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-06-30 20:52:26 +0600 | 
| commit | 9d0b581feaa3adc72ed3a53b7e8c86fadfddb6e1 (patch) | |
| tree | b46cfffce874db996e75ad2616bc74744400bbdb | |
| parent | f0714c9f869a8b4c2032566af12c107491472361 (diff) | |
[youtube] Prefer meta for upload date and modernize
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 19 | 
1 files changed, 10 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ed382e10b..6769a009d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -999,15 +999,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])          # upload date -        upload_date = None -        mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage) -        if mobj is None: -            mobj = re.search( -                r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.*?)</strong>', -                video_webpage) -        if mobj is not None: -            upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) -            upload_date = unified_strdate(upload_date) +        upload_date = self._html_search_meta( +            'datePublished', video_webpage, 'upload date', default=None) +        if not upload_date: +            upload_date = self._search_regex( +                [r'(?s)id="eow-date.*?>(.*?)</span>', +                 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'], +                video_webpage, 'upload date', default=None) +            if upload_date: +                upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) +        upload_date = unified_strdate(upload_date)          m_cat_container = self._search_regex(              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', | 
