From bf50b0383e4d6728bbbf1d0ee70cf586a90efb40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 27 Apr 2013 15:14:20 +0200 Subject: Fix some IEs that didn't return the uploade_date in the YYYYMMDD format Create a function unified_strdate in utils.py to fix these problems --- youtube_dl/InfoExtractors.py | 16 ++++++---------- youtube_dl/utils.py | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 936af9cb4..88ea567f8 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] - for expression in format_expressions: - try: - upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') - except: - pass + upload_date = unified_strdate(upload_date) # description video_description = get_element_by_id("eow-description", video_webpage) @@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor): shortMediaId = mediaId.split(':')[-1] showId = mediaId.split(':')[-2].replace('.com', '') officialTitle = itemEl.findall('./title')[0].text - officialDate = itemEl.findall('./pubDate')[0].text + officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + compat_urllib_parse.urlencode({'uri': mediaId})) @@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor): streams = json.loads(stream_json) mediaURL = streams['http_mp3_128_url'] + upload_date = unified_strdate(info['created_at']) return [{ 'id': info['id'], 'url': mediaURL, 'uploader': info['user']['username'], - 'upload_date': info['created_at'], + 'upload_date': upload_date, 'title': info['title'], 'ext': u'mp3', 'description': info['description'], @@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor): self._downloader.report_warning(u'unable to extract video date') upload_date = None else: - upload_date = result.group('date').strip() + upload_date = unified_strdate(result.group('date').strip()) # Get the video uploader result = re.search(r'Submitted:(?P.*)', webpage) @@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor): if result is None: self._downloader.report_error(u'unable to extract video title') return - upload_date = result.group('date') + upload_date = unified_strdate(result.group('date')) info = {'id': video_id, 'url': video_url, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e5d756b8b..3a2f0022f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -569,7 +569,22 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_request = http_request https_response = http_response - + +def unified_strdate(date_str): + """Return a string with the date in the format YYYYMMDD""" + upload_date = None + #Replace commas + date_str = date_str.replace(',',' ') + # %z (UTC offset) is only supported in python>=3.2 + date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') + except: + pass + return upload_date + def date_from_str(date_str): """Return a datetime object from a string in the format YYYYMMDD""" return datetime.datetime.strptime(date_str, "%Y%m%d").date() -- cgit v1.2.3