diff options
| author | Filippo Valsorda <filippo.valsorda@gmail.com> | 2012-08-20 00:58:10 +0200 | 
|---|---|---|
| committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2012-08-20 00:58:10 +0200 | 
| commit | 202e76cfb01ba8011861344977884c51d90808c1 (patch) | |
| tree | 8a6e6b267414e6a2a5f4255265bce963b6502611 | |
| parent | 9bb8dc8e42da4e47008a0acd2a1d69e00193e02d (diff) | |
Made the YouTubeIE regex verbose/commented
| -rw-r--r-- | youtube_dl/InfoExtractors.py | 25 | 
1 files changed, 23 insertions, 2 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..3875e7fd8 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -95,7 +95,24 @@ class InfoExtractor(object):  class YoutubeIE(InfoExtractor):  	"""Information extractor for youtube.com.""" -	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' +	_VALID_URL = r"""^ +	                 ( +	                     (?:https?://)?                                       # http(s):// (optional) +	                     (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/) # the various hostnames, with wildcard subdomains +	                     (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs +	                     (?:                                                  # the various things that can precede the ID: +	                         (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ +	                         |(?:                                             # or the v= param in all its forms +	                             (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) +	                             (?:\?|\#!?)                                  # the params delimiter ? or # or #! +	                             (?:.+&)?                                     # any other preceding param (like /?s=tuff&v=xxxx) +	                             v= +	                         ) +	                     )?                                                   # optional -> youtube.com/xxxx is OK +	                 )?                                                       # all until now is optional -> you can pass the naked ID +	                 ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID +	                 (?(1).+)?                                                # if we found the ID, everything can follow +	                 $"""  	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'  	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'  	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -134,6 +151,10 @@ class YoutubeIE(InfoExtractor):  	}	  	IE_NAME = u'youtube' +	def suitable(self, url): +		"""Receives a URL and returns True if suitable for this IE.""" +		return re.match(self._VALID_URL, url, re.VERBOSE) is not None +  	def report_lang(self):  		"""Report attempt to set language."""  		self._downloader.to_screen(u'[youtube] Setting language') @@ -268,7 +289,7 @@ class YoutubeIE(InfoExtractor):  			url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')  		# Extract video id from URL -		mobj = re.match(self._VALID_URL, url) +		mobj = re.match(self._VALID_URL, url, re.VERBOSE)  		if mobj is None:  			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)  			return | 
