diff options
| -rw-r--r-- | test/test_utils.py | 16 | ||||
| -rw-r--r-- | test/test_youtube_lists.py | 7 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 6 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 20 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 10 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 45 | 
6 files changed, 91 insertions, 13 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index eeaaa7fad..343409a7a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -14,6 +14,8 @@ from youtube_dl.utils import timeconvert  from youtube_dl.utils import sanitize_filename  from youtube_dl.utils import unescapeHTML  from youtube_dl.utils import orderedSet +from youtube_dl.utils import DateRange +from youtube_dl.utils import unified_strdate  if sys.version_info < (3, 0):      _compat_str = lambda b: b.decode('unicode-escape') @@ -95,6 +97,20 @@ class TestUtil(unittest.TestCase):      def test_unescape_html(self):          self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) +         +    def test_daterange(self): +        _20century = DateRange("19000101","20000101") +        self.assertFalse("17890714" in _20century) +        _ac = DateRange("00010101") +        self.assertTrue("19690721" in _ac) +        _firstmilenium = DateRange(end="10000101") +        self.assertTrue("07110427" in _firstmilenium) +         +    def test_unified_dates(self): +        self.assertEqual(unified_strdate('December 21, 2010'), '20101221') +        self.assertEqual(unified_strdate('8/7/2009'), '20090708') +        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') +        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')  if __name__ == '__main__':      unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index c7f00af32..b11e6ccaa 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -71,6 +71,13 @@ class TestYoutubeLists(unittest.TestCase):          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]          self.assertFalse('pElCt5oNDuI' in ytie_results)          self.assertFalse('KdPEApIVdWM' in ytie_results) +         +    def test_youtube_playlist_empty(self): +        dl = FakeDownloader() +        ie = YoutubePlaylistIE(dl) +        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] +        self.assertIsPlaylist(result) +        self.assertEqual(len(result['entries']), 0)      def test_youtube_course(self):          dl = FakeDownloader() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index d0378fb14..2db686d62 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -89,6 +89,7 @@ class FileDownloader(object):      keepvideo:         Keep the video file after post-processing      min_filesize:      Skip files smaller than this size      max_filesize:      Skip files larger than this size +    daterange:         A DateRange object, download only if the upload_date is in the range.      """      params = None @@ -424,6 +425,11 @@ class FileDownloader(object):          if rejecttitle:              if re.search(rejecttitle, title, re.IGNORECASE):                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' +        date = info_dict.get('upload_date', None) +        if date is not None: +            dateRange = self.params.get('daterange', DateRange()) +            if date not in dateRange: +                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)          return None      def extract_info(self, url, download = True, ie_name = None): diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 0e2c7795d..620cce189 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):          mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)          if mobj is not None:              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) -            format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] -            for expression in format_expressions: -                try: -                    upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') -                except: -                    pass +            upload_date = unified_strdate(upload_date)          # description          video_description = get_element_by_id("eow-description", video_webpage) @@ -1723,12 +1718,11 @@ class YoutubePlaylistIE(InfoExtractor):              if 'feed' not in response:                  self._downloader.report_error(u'Got a malformed response from YouTube API')                  return +            playlist_title = response['feed']['title']['$t']              if 'entry' not in response['feed']:                  # Number of videos is a multiple of self._MAX_RESULTS                  break -            playlist_title = response['feed']['title']['$t'] -              videos += [ (entry['yt$position']['$t'], entry['content']['src'])                          for entry in response['feed']['entry']                          if 'content' in entry ] @@ -2386,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):              shortMediaId = mediaId.split(':')[-1]              showId = mediaId.split(':')[-2].replace('.com', '')              officialTitle = itemEl.findall('./title')[0].text -            officialDate = itemEl.findall('./pubDate')[0].text +            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)              configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +                          compat_urllib_parse.urlencode({'uri': mediaId})) @@ -2696,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):          streams = json.loads(stream_json)          mediaURL = streams['http_mp3_128_url'] +        upload_date = unified_strdate(info['created_at'])          return [{              'id':       info['id'],              'url':      mediaURL,              'uploader': info['user']['username'], -            'upload_date':  info['created_at'], +            'upload_date': upload_date,              'title':    info['title'],              'ext':      u'mp3',              'description': info['description'], @@ -3561,6 +3556,7 @@ class FunnyOrDieIE(InfoExtractor):  class SteamIE(InfoExtractor):      _VALID_URL = r"""http://store.steampowered.com/ +                (agecheck/)?                  (?P<urltype>video|app)/ #If the page is only for videos or for a game                  (?P<gameID>\d+)/?                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID @@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):              self._downloader.report_warning(u'unable to extract video date')              upload_date = None          else: -            upload_date = result.group('date').strip() +            upload_date = unified_strdate(result.group('date').strip())          # Get the video uploader          result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) @@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):          if result is None:              self._downloader.report_error(u'unable to extract video title')              return -        upload_date = result.group('date') +        upload_date = unified_strdate(result.group('date'))          info = {'id': video_id,                  'url': video_url, diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index d491402c6..ce754ffd3 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -157,6 +157,9 @@ def parseOpts(overrideArguments=None):      selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)      selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)      selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) +    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) +    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) +    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)      authentication.add_option('-u', '--username', @@ -447,6 +450,10 @@ def _real_main(argv=None):      if opts.recodevideo is not None:          if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:              parser.error(u'invalid video recode format specified') +    if opts.date is not None: +        date = DateRange.day(opts.date) +    else: +        date = DateRange(opts.dateafter, opts.datebefore)      if sys.version_info < (3,):          # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) @@ -513,7 +520,8 @@ def _real_main(argv=None):          'test': opts.test,          'keepvideo': opts.keepvideo,          'min_filesize': opts.min_filesize, -        'max_filesize': opts.max_filesize +        'max_filesize': opts.max_filesize, +        'daterange': date          })      if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 017f06c42..3a2f0022f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -12,6 +12,7 @@ import traceback  import zlib  import email.utils  import json +import datetime  try:      import urllib.request as compat_urllib_request @@ -568,3 +569,47 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):      https_request = http_request      https_response = http_response + +def unified_strdate(date_str): +    """Return a string with the date in the format YYYYMMDD""" +    upload_date = None +    #Replace commas +    date_str = date_str.replace(',',' ') +    # %z (UTC offset) is only supported in python>=3.2 +    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) +    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] +    for expression in format_expressions: +        try: +            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') +        except: +            pass +    return upload_date + +def date_from_str(date_str): +    """Return a datetime object from a string in the format YYYYMMDD""" +    return datetime.datetime.strptime(date_str, "%Y%m%d").date() +     +class DateRange(object): +    """Represents a time interval between two dates""" +    def __init__(self, start=None, end=None): +        """start and end must be strings in the format accepted by date""" +        if start is not None: +            self.start = date_from_str(start) +        else: +            self.start = datetime.datetime.min.date() +        if end is not None: +            self.end = date_from_str(end) +        else: +            self.end = datetime.datetime.max.date() +        if self.start >= self.end: +            raise ValueError('Date range: "%s" , the start date must be before the end date' % self) +    @classmethod +    def day(cls, day): +        """Returns a range that only contains the given day""" +        return cls(day,day) +    def __contains__(self, date): +        """Check if the date is in the range""" +        date = date_from_str(date) +        return self.start <= date and date <= self.end +    def __str__(self): +        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) | 
