Merge pull request #403 from FiloSottile/re_VERBOSE

author: Filippo Valsorda <filosottile.wiki@gmail.com> 2012-09-29 17:02:38 +0200
committer: Filippo Valsorda <filosottile.wiki@gmail.com> 2012-09-29 17:05:40 +0200
commit: 5c961d89df83006aede903547ec8b270ed42b785 (patch)
tree: ac24b798620704997a95c5a241bab8dec74a54f6
parent: 3c4d6c9ebae975bafcf3ae1b8d419cb734b50581 (diff)
parent: 202e76cfb01ba8011861344977884c51d90808c1 (diff)
1 files changed, 24 insertions, 2 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 4fcff77ff..bdb2ec311 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -97,7 +97,25 @@ class InfoExtractor(object):
 class YoutubeIE(InfoExtractor):
 	"""Information extractor for youtube.com."""
 
-	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|tube\.majestyc\.net/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
+	_VALID_URL = r"""^
+	                 (
+	                     (?:https?://)?                                       # http(s):// (optional)
+	                     (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+	                     	tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+	                     (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs
+	                     (?:                                                  # the various things that can precede the ID:
+	                         (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
+	                         |(?:                                             # or the v= param in all its forms
+	                             (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+	                             (?:\?|\#!?)                                  # the params delimiter ? or # or #!
+	                             (?:.+&)?                                     # any other preceding param (like /?s=tuff&v=xxxx)
+	                             v=
+	                         )
+	                     )?                                                   # optional -> youtube.com/xxxx is OK
+	                 )?                                                       # all until now is optional -> you can pass the naked ID
+	                 ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
+	                 (?(1).+)?                                                # if we found the ID, everything can follow
+	                 $"""
 	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@@ -136,6 +154,10 @@ class YoutubeIE(InfoExtractor):
 	}	
 	IE_NAME = u'youtube'
 
+	def suitable(self, url):
+		"""Receives a URL and returns True if suitable for this IE."""
+		return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+
 	def report_lang(self):
 		"""Report attempt to set language."""
 		self._downloader.to_screen(u'[youtube] Setting language')
@@ -270,7 +292,7 @@ class YoutubeIE(InfoExtractor):
 			url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
 
 		# Extract video id from URL
-		mobj = re.match(self._VALID_URL, url)
+		mobj = re.match(self._VALID_URL, url, re.VERBOSE)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 			return
author	Filippo Valsorda <filosottile.wiki@gmail.com>	2012-09-29 17:02:38 +0200
committer	Filippo Valsorda <filosottile.wiki@gmail.com>	2012-09-29 17:05:40 +0200
commit	5c961d89df83006aede903547ec8b270ed42b785 (patch)
tree	ac24b798620704997a95c5a241bab8dec74a54f6
parent	3c4d6c9ebae975bafcf3ae1b8d419cb734b50581 (diff)
parent	202e76cfb01ba8011861344977884c51d90808c1 (diff)