aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo.valsorda@gmail.com>2012-08-20 00:58:10 +0200
committerFilippo Valsorda <filippo.valsorda@gmail.com>2012-08-20 00:58:10 +0200
commit202e76cfb01ba8011861344977884c51d90808c1 (patch)
tree8a6e6b267414e6a2a5f4255265bce963b6502611
parent9bb8dc8e42da4e47008a0acd2a1d69e00193e02d (diff)
Made the YouTubeIE regex verbose/commented
-rw-r--r--youtube_dl/InfoExtractors.py25
1 files changed, 23 insertions, 2 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index ddb4aa16b..3875e7fd8 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -95,7 +95,24 @@ class InfoExtractor(object):
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
- _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
+ _VALID_URL = r"""^
+ (
+ (?:https?://)? # http(s):// (optional)
+ (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/) # the various hostnames, with wildcard subdomains
+ (?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs
+ (?: # the various things that can precede the ID:
+ (?:(?:v|embed|e)/) # v/ or embed/ or e/
+ |(?: # or the v= param in all its forms
+ (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:\?|\#!?) # the params delimiter ? or # or #!
+ (?:.+&)? # any other preceding param (like /?s=tuff&v=xxxx)
+ v=
+ )
+ )? # optional -> youtube.com/xxxx is OK
+ )? # all until now is optional -> you can pass the naked ID
+ ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
+ (?(1).+)? # if we found the ID, everything can follow
+ $"""
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@@ -134,6 +151,10 @@ class YoutubeIE(InfoExtractor):
}
IE_NAME = u'youtube'
+ def suitable(self, url):
+ """Receives a URL and returns True if suitable for this IE."""
+ return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+
def report_lang(self):
"""Report attempt to set language."""
self._downloader.to_screen(u'[youtube] Setting language')
@@ -268,7 +289,7 @@ class YoutubeIE(InfoExtractor):
url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
# Extract video id from URL
- mobj = re.match(self._VALID_URL, url)
+ mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return