diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-04-27 15:14:20 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-04-27 15:14:20 +0200 | 
| commit | bf50b0383e4d6728bbbf1d0ee70cf586a90efb40 (patch) | |
| tree | b6614ea9625d8f756aa174e4a03796270be1df61 | |
| parent | bd55852517a40d011b303559f4cd78773a2f3de5 (diff) | |
Fix some IEs that didn't return the uploade_date in the YYYYMMDD format
Create a function unified_strdate in utils.py to fix these problems
| -rw-r--r-- | test/test_utils.py | 7 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 16 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 17 | 
3 files changed, 29 insertions, 11 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index f9d58268b..343409a7a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -15,6 +15,7 @@ from youtube_dl.utils import sanitize_filename  from youtube_dl.utils import unescapeHTML  from youtube_dl.utils import orderedSet  from youtube_dl.utils import DateRange +from youtube_dl.utils import unified_strdate  if sys.version_info < (3, 0):      _compat_str = lambda b: b.decode('unicode-escape') @@ -104,6 +105,12 @@ class TestUtil(unittest.TestCase):          self.assertTrue("19690721" in _ac)          _firstmilenium = DateRange(end="10000101")          self.assertTrue("07110427" in _firstmilenium) +         +    def test_unified_dates(self): +        self.assertEqual(unified_strdate('December 21, 2010'), '20101221') +        self.assertEqual(unified_strdate('8/7/2009'), '20090708') +        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') +        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 936af9cb4..88ea567f8 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):          mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)          if mobj is not None:              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) -            format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] -            for expression in format_expressions: -                try: -                    upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') -                except: -                    pass +            upload_date = unified_strdate(upload_date)          # description          video_description = get_element_by_id("eow-description", video_webpage) @@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):              shortMediaId = mediaId.split(':')[-1]              showId = mediaId.split(':')[-2].replace('.com', '')              officialTitle = itemEl.findall('./title')[0].text -            officialDate = itemEl.findall('./pubDate')[0].text +            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)              configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +                          compat_urllib_parse.urlencode({'uri': mediaId})) @@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):          streams = json.loads(stream_json)          mediaURL = streams['http_mp3_128_url'] +        upload_date = unified_strdate(info['created_at'])          return [{              'id':       info['id'],              'url':      mediaURL,              'uploader': info['user']['username'], -            'upload_date':  info['created_at'], +            'upload_date': upload_date,              'title':    info['title'],              'ext':      u'mp3',              'description': info['description'], @@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):              self._downloader.report_warning(u'unable to extract video date')              upload_date = None          else: -            upload_date = result.group('date').strip() +            upload_date = unified_strdate(result.group('date').strip())          # Get the video uploader          result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) @@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):          if result is None:              self._downloader.report_error(u'unable to extract video title')              return -        upload_date = result.group('date') +        upload_date = unified_strdate(result.group('date'))          info = {'id': video_id,                  'url': video_url, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e5d756b8b..3a2f0022f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -569,7 +569,22 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):      https_request = http_request      https_response = http_response -     + +def unified_strdate(date_str): +    """Return a string with the date in the format YYYYMMDD""" +    upload_date = None +    #Replace commas +    date_str = date_str.replace(',',' ') +    # %z (UTC offset) is only supported in python>=3.2 +    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) +    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] +    for expression in format_expressions: +        try: +            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') +        except: +            pass +    return upload_date +  def date_from_str(date_str):      """Return a datetime object from a string in the format YYYYMMDD"""      return datetime.datetime.strptime(date_str, "%Y%m%d").date() | 
