aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_utils.py7
-rwxr-xr-xyoutube_dl/InfoExtractors.py16
-rw-r--r--youtube_dl/utils.py17
3 files changed, 29 insertions, 11 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index f9d58268b..343409a7a 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -15,6 +15,7 @@ from youtube_dl.utils import sanitize_filename
from youtube_dl.utils import unescapeHTML
from youtube_dl.utils import orderedSet
from youtube_dl.utils import DateRange
+from youtube_dl.utils import unified_strdate
if sys.version_info < (3, 0):
_compat_str = lambda b: b.decode('unicode-escape')
@@ -104,6 +105,12 @@ class TestUtil(unittest.TestCase):
self.assertTrue("19690721" in _ac)
_firstmilenium = DateRange(end="10000101")
self.assertTrue("07110427" in _firstmilenium)
+
+ def test_unified_dates(self):
+ self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
+ self.assertEqual(unified_strdate('8/7/2009'), '20090708')
+ self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
+ self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 936af9cb4..88ea567f8 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
- for expression in format_expressions:
- try:
- upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
- except:
- pass
+ upload_date = unified_strdate(upload_date)
# description
video_description = get_element_by_id("eow-description", video_webpage)
@@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):
shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '')
officialTitle = itemEl.findall('./title')[0].text
- officialDate = itemEl.findall('./pubDate')[0].text
+ officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
@@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):
streams = json.loads(stream_json)
mediaURL = streams['http_mp3_128_url']
+ upload_date = unified_strdate(info['created_at'])
return [{
'id': info['id'],
'url': mediaURL,
'uploader': info['user']['username'],
- 'upload_date': info['created_at'],
+ 'upload_date': upload_date,
'title': info['title'],
'ext': u'mp3',
'description': info['description'],
@@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):
self._downloader.report_warning(u'unable to extract video date')
upload_date = None
else:
- upload_date = result.group('date').strip()
+ upload_date = unified_strdate(result.group('date').strip())
# Get the video uploader
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
@@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):
if result is None:
self._downloader.report_error(u'unable to extract video title')
return
- upload_date = result.group('date')
+ upload_date = unified_strdate(result.group('date'))
info = {'id': video_id,
'url': video_url,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e5d756b8b..3a2f0022f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -569,7 +569,22 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_request = http_request
https_response = http_response
-
+
+def unified_strdate(date_str):
+ """Return a string with the date in the format YYYYMMDD"""
+ upload_date = None
+ #Replace commas
+ date_str = date_str.replace(',',' ')
+ # %z (UTC offset) is only supported in python>=3.2
+ date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+ format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+ for expression in format_expressions:
+ try:
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ except:
+ pass
+ return upload_date
+
def date_from_str(date_str):
"""Return a datetime object from a string in the format YYYYMMDD"""
return datetime.datetime.strptime(date_str, "%Y%m%d").date()