diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2013-12-17 04:13:36 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2013-12-17 04:13:36 +0100 | 
| commit | 29eb5174031cfc0b5de556da3da7761ac377de4e (patch) | |
| tree | bdc0158bbf6a069777719d70c89b96939c1c651a | |
| parent | 44c471c3b873473157adb8ba8a55667ab54b2602 (diff) | |
Add webpage_url_basename info_dict field (Fixes #1938)
| -rw-r--r-- | test/test_utils.py | 25 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 4 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 7 | 
3 files changed, 27 insertions, 9 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 0fa66beec..5f4fdb771 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -13,20 +13,21 @@ import xml.etree.ElementTree  #from youtube_dl.utils import htmlentity_transform  from youtube_dl.utils import ( -    timeconvert, -    sanitize_filename, -    unescapeHTML, -    orderedSet,      DateRange, -    unified_strdate, +    encodeFilename,      find_xpath_attr,      get_meta_content, -    xpath_with_ns, -    smuggle_url, -    unsmuggle_url, +    orderedSet, +    sanitize_filename,      shell_quote, -    encodeFilename, +    smuggle_url,      str_to_int, +    timeconvert, +    unescapeHTML, +    unified_strdate, +    unsmuggle_url, +    url_basename, +    xpath_with_ns,  )  if sys.version_info < (3, 0): @@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):          self.assertEqual(str_to_int('123,456'), 123456)          self.assertEqual(str_to_int('123.456'), 123456) +    def test_url_basename(self): +        self.assertEqual(url_basename(u'http://foo.de/'), u'') +        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz') +        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz') +        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz') +        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b1f87415b..2a078adfb 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -47,6 +47,7 @@ from .utils import (      subtitles_filename,      takewhile_inclusive,      UnavailableVideoError, +    url_basename,      write_json_file,      write_string,      YoutubeDLHandler, @@ -484,6 +485,7 @@ class YoutubeDL(object):                      {                          'extractor': ie.IE_NAME,                          'webpage_url': url, +                        'webpage_url_basename': url_basename(url),                          'extractor_key': ie.ie_key(),                      })                  if process: @@ -576,6 +578,7 @@ class YoutubeDL(object):                      'playlist_index': i + playliststart,                      'extractor': ie_result['extractor'],                      'webpage_url': ie_result['webpage_url'], +                    'webpage_url_basename': url_basename(ie_result['webpage_url']),                      'extractor_key': ie_result['extractor_key'],                  } @@ -596,6 +599,7 @@ class YoutubeDL(object):                      {                          'extractor': ie_result['extractor'],                          'webpage_url': ie_result['webpage_url'], +                        'webpage_url_basename': url_basename(ie_result['webpage_url']),                          'extractor_key': ie_result['extractor_key'],                      })                  return r diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dbfac0f43..a249c7ec1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1084,3 +1084,10 @@ def remove_start(s, start):      if s.startswith(start):          return s[len(start):]      return s + + +def url_basename(url): +    m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url) +    if not m: +        return u'' +    return m.group(1)  | 
