From df8db1aa2107f204fa14c157d7a536e45ceb65c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 26 Feb 2013 23:33:58 +0100 Subject: Create extract_info method --- youtube_dl/FileDownloader.py | 85 +++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 41 deletions(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 192ad37d2..b26c34729 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -377,6 +377,44 @@ class FileDownloader(object): if re.search(rejecttitle, title, re.IGNORECASE): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None + + def extract_info(self, url): + ''' + Returns a list with a dictionary for each video we find. + ''' + suitable_found = False + for ie in self._ies: + # Go to next InfoExtractor if not suitable + if not ie.suitable(url): + continue + + # Warn if the _WORKING attribute is False + if not ie.working(): + self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' + u'and will probably not work. If you want to go on, use the -i option.') + + # Suitable InfoExtractor found + suitable_found = True + + # Extract information from URL and process it + try: + videos = ie.extract(url) + for video in videos or []: + if not 'extractor' in video: + #The extractor has already been set somewher else + video['extractor'] = ie.IE_NAME + return videos + except ExtractorError as de: # An error we somewhat expected + self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) + break + except Exception as e: + if self.params.get('ignoreerrors', False): + self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) + break + else: + raise + if not suitable_found: + self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -488,49 +526,14 @@ class FileDownloader(object): raise SameFileError(self.params['outtmpl']) for url in url_list: - suitable_found = False - for ie in self._ies: - # Go to next InfoExtractor if not suitable - if not ie.suitable(url): - continue - - # Warn if the _WORKING attribute is False - if not ie.working(): - self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' - u'and will probably not work. If you want to go on, use the -i option.') + videos = self.extract_info(url) - # Suitable InfoExtractor found - suitable_found = True - - # Extract information from URL and process it + for video in videos or []: try: - videos = ie.extract(url) - except ExtractorError as de: # An error we somewhat expected - self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) - break - except Exception as e: - if self.params.get('ignoreerrors', False): - self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) - break - else: - raise - - if len(videos or []) > 1 and self.fixed_template(): - raise SameFileError(self.params['outtmpl']) - - for video in videos or []: - video['extractor'] = ie.IE_NAME - try: - self.increment_downloads() - self.process_info(video) - except UnavailableVideoError: - self.trouble(u'\nERROR: unable to download video') - - # Suitable InfoExtractor had been found; go to next URL - break - - if not suitable_found: - self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) + self.increment_downloads() + self.process_info(video) + except UnavailableVideoError: + self.trouble(u'\nERROR: unable to download video') return self._download_retcode -- cgit v1.2.3 From 631f73978c0ee851950ac697dfd73f9092abd3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Mar 2013 22:16:42 +0100 Subject: Add a method for extracting info from a list of urls --- youtube_dl/FileDownloader.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index b26c34729..f668b362b 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -415,6 +415,14 @@ class FileDownloader(object): raise if not suitable_found: self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) + def extract_info_iterable(self, urls): + ''' + Return the videos founded for the urls + ''' + results = [] + for url in urls: + results.extend(self.extract_info(url)) + return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" -- cgit v1.2.3 From 6ac7f082c469b3b2153735ae8475e1d0fc8b5439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 20:14:32 +0100 Subject: `extract_info` now expects `ie.extract` to return a list in the format proposed in issue 608. Each element should have a '_type' key specifying if it's a video, an url or a playlist. `extract_info` will process each element to get the full info --- youtube_dl/FileDownloader.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 9b630c123..68fad11bc 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -410,12 +410,9 @@ class FileDownloader(object): # Extract information from URL and process it try: - videos = ie.extract(url) - for video in videos or []: - if not 'extractor' in video: - #The extractor has already been set somewher else - video['extractor'] = ie.IE_NAME - return videos + ie_results = ie.extract(url) + results = self.process_ie_results(ie_results, ie) + return results except ExtractorError as de: # An error we somewhat expected self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) break @@ -435,6 +432,29 @@ class FileDownloader(object): for url in urls: results.extend(self.extract_info(url)) return results + + def process_ie_results(self, ie_results, ie): + """ + Take the results of the ie and return a list of videos. + For url elements it will seartch the suitable ie and get the videos + For playlist elements it will process each of the elements of the 'entries' key + """ + results = [] + for result in ie_results or []: + result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system + if result_type == 'video': + if not 'extractor' in result: + #The extractor has already been set somewhere else + result['extractor'] = ie.IE_NAME + results.append(result) + elif result_type == 'url': + #We get the videos pointed by the url + results.extend(self.extract_info(result['url'])) + elif result_type == 'playlist': + #We process each entry in the playlist + entries_result = self.process_ie_results(result['entries'], ie) + results.extend(entries_result) + return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" -- cgit v1.2.3 From 7eab8dc7504cf1f5f1dd03eb62e266ce24948b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Mar 2013 12:32:42 +0100 Subject: Pass the playlist info_dict to process_info the playlist value can be used in the output template --- youtube_dl/FileDownloader.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 6af2acbee..d2b9be9ef 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -460,12 +460,21 @@ class FileDownloader(object): elif result_type == 'playlist': #We process each entry in the playlist entries_result = self.process_ie_results(result['entries'], ie) - results.extend(entries_result) + result['entries'] = entries_result + results.extend([result]) return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + if info_dict.get('_type','video') == 'playlist': + playlist = info_dict.get('title', None) or info_dict.get('id', None) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) + for video in info_dict['entries']: + video['playlist'] = playlist + self.process_info(video) + return + # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] -- cgit v1.2.3 From d39919c03e45b3e8f804c23f78fae33cb4adc7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 5 Apr 2013 13:01:59 +0200 Subject: Add progress counter for playlists Closes #276 --- youtube_dl/FileDownloader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 2237d355d..ba3277577 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -492,8 +492,10 @@ class FileDownloader(object): if info_dict.get('_type','video') == 'playlist': playlist = info_dict.get('title', None) or info_dict.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) - for video in info_dict['entries']: + n_videos = len(info_dict['entries']) + for i,video in enumerate(info_dict['entries'],1): video['playlist'] = playlist + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) self.process_info(video) return -- cgit v1.2.3 From 146c12a2dafdb9ff0e5138aa0f9da38bddca6c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Apr 2013 00:05:04 +0200 Subject: Change the order for extracting/downloading Now it gets a video info and directly downloads it, the it pass to the next video founded. --- youtube_dl/FileDownloader.py | 103 ++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 50 deletions(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ba3277577..58be5caee 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -419,9 +419,10 @@ class FileDownloader(object): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None - def extract_info(self, url): + def extract_info(self, url, download = True): ''' Returns a list with a dictionary for each video we find. + If 'download', also downloads the videos. ''' suitable_found = False for ie in self._ies: @@ -440,7 +441,12 @@ class FileDownloader(object): # Extract information from URL and process it try: ie_results = ie.extract(url) - results = self.process_ie_results(ie_results, ie) + results = [] + for ie_result in ie_results: + if not 'extractor' in ie_result: + #The extractor has already been set somewhere else + ie_result['extractor'] = ie.IE_NAME + results.append(self.process_ie_result(ie_result, download)) return results except ExtractorError as de: # An error we somewhat expected self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) @@ -453,51 +459,51 @@ class FileDownloader(object): raise if not suitable_found: self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) - def extract_info_iterable(self, urls): - ''' - Return the videos founded for the urls - ''' - results = [] - for url in urls: - results.extend(self.extract_info(url)) - return results - def process_ie_results(self, ie_results, ie): + def process_ie_result(self, ie_result, download = True): """ - Take the results of the ie and return a list of videos. - For url elements it will seartch the suitable ie and get the videos + Take the result of the ie and return a list of videos. + For url elements it will search the suitable ie and get the videos For playlist elements it will process each of the elements of the 'entries' key + + It will also download the videos if 'download'. """ - results = [] - for result in ie_results or []: - result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system - if result_type == 'video': - if not 'extractor' in result: - #The extractor has already been set somewhere else - result['extractor'] = ie.IE_NAME - results.append(result) - elif result_type == 'url': - #We get the videos pointed by the url - results.extend(self.extract_info(result['url'])) - elif result_type == 'playlist': - #We process each entry in the playlist - entries_result = self.process_ie_results(result['entries'], ie) - result['entries'] = entries_result - results.extend([result]) - return results + result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system + if result_type == 'video': + if 'playlist' not in ie_result: + #It isn't part of a playlist + ie_result['playlist'] = None + if download: + #Do the download: + self.process_info(ie_result) + return ie_result + elif result_type == 'url': + #We get the video pointed by the url + result = self.extract_info(ie_result['url'], download)[0] + return result + elif result_type == 'playlist': + #We process each entry in the playlist + playlist = ie_result.get('title', None) or ie_result.get('id', None) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) + n_videos = len(ie_result['entries']) + playlist_results = [] + for i,entry in enumerate(ie_result['entries'],1): + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) + entry_result = self.process_ie_result(entry, False) + entry_result['playlist'] = playlist + #We must do the download here to correctly set the 'playlist' key + if download: + self.process_info(entry_result) + playlist_results.append(entry_result) + result = ie_result.copy() + result['entries'] = playlist_results + return result def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - if info_dict.get('_type','video') == 'playlist': - playlist = info_dict.get('title', None) or info_dict.get('id', None) - self.to_screen(u'[download] Downloading playlist: %s' % playlist) - n_videos = len(info_dict['entries']) - for i,video in enumerate(info_dict['entries'],1): - video['playlist'] = playlist - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) - self.process_info(video) - return + #We increment the download the download count here to match the previous behaviour. + self.increment_downloads() # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] @@ -633,17 +639,14 @@ class FileDownloader(object): raise SameFileError(self.params['outtmpl']) for url in url_list: - videos = self.extract_info(url) - - for video in videos or []: - try: - self.increment_downloads() - self.process_info(video) - except UnavailableVideoError: - self.trouble(u'\nERROR: unable to download video') - except MaxDownloadsReached: - self.to_screen(u'[info] Maximum number of downloaded files reached.') - raise + try: + #It also downloads the videos + videos = self.extract_info(url) + except UnavailableVideoError: + self.trouble(u'\nERROR: unable to download video') + except MaxDownloadsReached: + self.to_screen(u'[info] Maximum number of downloaded files reached.') + raise return self._download_retcode -- cgit v1.2.3 From bce878a7c1678ac698ecd556b2c77a1e2f2306df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Apr 2013 14:32:03 +0200 Subject: Implement the playlist/start options in FileDownloader It makes it available for all the InfoExtractors --- youtube_dl/FileDownloader.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 58be5caee..5a5141ba5 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -485,10 +485,25 @@ class FileDownloader(object): #We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) - n_videos = len(ie_result['entries']) + playlist_results = [] - for i,entry in enumerate(ie_result['entries'],1): - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) + + n_all_entries = len(ie_result['entries']) + playliststart = self.params.get('playliststart', 1) - 1 + playlistend = self.params.get('playlistend', -1) + + if playlistend == -1: + entries = ie_result['entries'][playliststart:] + else: + entries = ie_result['entries'][playliststart:playlistend] + + n_entries = len(entries) + + self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % + (ie_result['extractor'], playlist, n_all_entries, n_entries)) + + for i,entry in enumerate(entries,1): + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) entry_result = self.process_ie_result(entry, False) entry_result['playlist'] = playlist #We must do the download here to correctly set the 'playlist' key -- cgit v1.2.3 From d281274bf250065f876bb4f75fb6f711e1a26eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 16 Apr 2013 15:13:29 +0200 Subject: Add a playlist_index key to the info_dict, can be used in the output template --- youtube_dl/FileDownloader.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'youtube_dl/FileDownloader.py') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 5a5141ba5..4dabbb440 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -389,6 +389,8 @@ class FileDownloader(object): template_dict['epoch'] = int(time.time()) template_dict['autonumber'] = u'%05d' % self._num_downloads + if template_dict['playlist_index'] is not None: + template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k,v: sanitize_filename( u'NA' if v is None else compat_str(v), @@ -473,6 +475,7 @@ class FileDownloader(object): if 'playlist' not in ie_result: #It isn't part of a playlist ie_result['playlist'] = None + ie_result['playlist_index'] = None if download: #Do the download: self.process_info(ie_result) @@ -506,6 +509,7 @@ class FileDownloader(object): self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) entry_result = self.process_ie_result(entry, False) entry_result['playlist'] = playlist + entry_result['playlist_index'] = i + playliststart #We must do the download here to correctly set the 'playlist' key if download: self.process_info(entry_result) -- cgit v1.2.3