diff options
| -rw-r--r-- | README.md | 10 | ||||
| -rw-r--r-- | test/test_playlists.py | 12 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 14 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 7 | ||||
| -rw-r--r-- | youtube_dl/downloader/http.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/cmt.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/comedycentral.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/dreisat.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/imdb.py | 29 | ||||
| -rw-r--r-- | youtube_dl/extractor/mixcloud.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/mtv.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/wistia.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 1 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 7 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
19 files changed, 109 insertions, 24 deletions
@@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.                                 empty string (--proxy "") for direct connection      --no-check-certificate     Suppress HTTPS certificate validation.      --cache-dir DIR            Location in the filesystem where youtube-dl can -                               store downloaded information permanently. By +                               store some downloaded information permanently. By                                 default $XDG_CACHE_HOME/youtube-dl or ~/.cache -                               /youtube-dl . +                               /youtube-dl . At the moment, only YouTube player +                               files (for videos with obfuscated signatures) are +                               cached, but that may change.      --no-cache-dir             Disable filesystem caching      --bidi-workaround          Work around terminals that lack bidirectional                                 text support. Requires bidiv or fribidi @@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services  ###  Is anyone going to need the feature?  Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +###  Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --git a/test/test_playlists.py b/test/test_playlists.py index 1b7b4e3d8..9d522b357 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,8 @@ from youtube_dl.extractor import (      BandcampAlbumIE,      SmotriCommunityIE,      SmotriUserIE, -    IviCompilationIE +    IviCompilationIE, +    ImdbListIE,  ) @@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], u'dezhurnyi_angel/season2')          self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')          self.assertTrue(len(result['entries']) >= 20) +         +    def test_imdb_list(self): +        dl = FakeYDL() +        ie = ImdbListIE(dl) +        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'sMjedvGDd8U') +        self.assertEqual(result['title'], u'Animated and Family Films') +        self.assertTrue(len(result['entries']) >= 48)  if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a9a3639d7..08037deda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -148,6 +148,7 @@ class YoutubeDL(object):      socket_timeout:    Time to wait for unresponsive hosts, in seconds      bidi_workaround:   Work around buggy terminals without bidirectional text                         support, using fridibi +    debug_printtraffic:Print out sent and received HTTP traffic      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -164,6 +165,8 @@ class YoutubeDL(object):      def __init__(self, params=None):          """Create a FileDownloader object with the given options.""" +        if params is None: +            params = {}          self._ies = []          self._ies_instances = {}          self._pps = [] @@ -172,7 +175,7 @@ class YoutubeDL(object):          self._num_downloads = 0          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self._err_file = sys.stderr -        self.params = {} if params is None else params +        self.params = params          if params.get('bidi_workaround', False):              try: @@ -1014,7 +1017,7 @@ class YoutubeDL(object):      def list_formats(self, info_dict):          def format_note(fdict):              res = u'' -            if f.get('ext') in ['f4f', 'f4m']: +            if fdict.get('ext') in ['f4f', 'f4m']:                  res += u'(unsupported) '              if fdict.get('format_note') is not None:                  res += fdict['format_note'] + u' ' @@ -1124,10 +1127,13 @@ class YoutubeDL(object):              if 'http' in proxies and 'https' not in proxies:                  proxies['https'] = proxies['http']          proxy_handler = compat_urllib_request.ProxyHandler(proxies) + +        debuglevel = 1 if self.params.get('debug_printtraffic') else 0          https_handler = make_HTTPS_handler( -            self.params.get('nocheckcertificate', False)) +            self.params.get('nocheckcertificate', False), debuglevel=debuglevel) +        ydlh = YoutubeDLHandler(debuglevel=debuglevel)          opener = compat_urllib_request.build_opener( -            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) +            https_handler, proxy_handler, cookie_processor, ydlh)          # Delete the default user-agent header, which would otherwise apply in          # cases where our custom HTTP handler doesn't come into play          # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 657e3fd07..b29cf6758 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -186,7 +186,7 @@ def parseOpts(overrideArguments=None):      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')      general.add_option(          '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', -        help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') +        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')      general.add_option(          '--no-cache-dir', action='store_const', const=None, dest='cachedir',          help='Disable filesystem caching') @@ -334,7 +334,9 @@ def parseOpts(overrideArguments=None):      verbosity.add_option('--youtube-print-sig-code',              action='store_true', dest='youtube_print_sig_code', default=False,              help=optparse.SUPPRESS_HELP) - +    verbosity.add_option('--print-traffic', +            dest='debug_printtraffic', action='store_true', default=False, +            help=optparse.SUPPRESS_HELP)      filesystem.add_option('-t', '--title',              action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@ -696,6 +698,7 @@ def _real_main(argv=None):          'proxy': opts.proxy,          'socket_timeout': opts.socket_timeout,          'bidi_workaround': opts.bidi_workaround, +        'debug_printtraffic': opts.debug_printtraffic,      }      with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 14b88efd3..8407727ba 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -133,7 +133,7 @@ class HttpFD(FileDownloader):                      return False              try:                  stream.write(data_block) -            except (IOError, OSError): +            except (IOError, OSError) as err:                  self.to_stderr(u"\n")                  self.report_error(u'unable to write data: %s' % str(err))                  return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b59110b15..9c1374373 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,7 @@ from .channel9 import Channel9IE  from .cinemassacre import CinemassacreIE  from .clipfish import ClipfishIE  from .clipsyndicate import ClipsyndicateIE +from .cmt import CMTIE  from .cnn import CNNIE  from .collegehumor import CollegeHumorIE  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE @@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE  from .hypem import HypemIE  from .ign import IGNIE, OneUPIE -from .imdb import ImdbIE +from .imdb import ( +    ImdbIE, +    ImdbListIE +)  from .ina import InaIE  from .infoq import InfoQIE  from .instagram import InstagramIE diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..88e0e9aba --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): +    IE_NAME = u'cmt.com' +    _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' +    _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + +    _TESTS = [ +        { +            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', +            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', +            u'info_dict': { +                u'id': u'989124', +                u'ext': u'mp4', +                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', +                u'description': u'Blame It All On My Roots', +            }, +        }, +    ] diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index a54ce3ee7..27bd8256e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,7 +12,9 @@ from ..utils import (  class ComedyCentralIE(MTVServicesInfoExtractor): -    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' +    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ +        (video-clips|episodes|cc-studios|video-collections) +        /(?P<title>.*)'''      _FEED_URL = u'http://comedycentral.com/feeds/mrss/'      _TEST = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f34d36cb0..f7247752e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,7 +69,8 @@ class InfoExtractor(object):                                   download, lower-case.                                   "http", "https", "rtsp", "rtmp" or so.                      * preference Order number of this format. If this field is -                                 present, the formats get sorted by this field. +                                 present and not None, the formats get sorted +                                 by this field.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default.      url:            Final video URL. diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 416e25156..0b11d1f10 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -10,11 +10,11 @@ from ..utils import (  class DreiSatIE(InfoExtractor):      IE_NAME = '3sat' -    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' +    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'      _TEST = {          u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", -        u'file': u'36983.webm', -        u'md5': u'57c97d0469d71cf874f6815aa2b7c944', +        u'file': u'36983.mp4', +        u'md5': u'9dcfe344732808dbfcc901537973c922',          u'info_dict': {              u"title": u"Kaffeeland Schweiz",              u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",  diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a14c98f9..377ae91c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):              return self.url_result('http://' + url)          video_id = os.path.splitext(url.split('/')[-1])[0] +        self.to_screen(u'%s: Requesting header' % video_id) +          try:              response = self._send_head(url) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index e5332cce8..16926b4d3 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):              'description': descr,              'thumbnail': format_info['slate'],          } + +class ImdbListIE(InfoExtractor): +    IE_NAME = u'imdb:list' +    IE_DESC = u'Internet Movie Database lists' +    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' +     +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        list_id = mobj.group('id') +         +        # RSS XML is sometimes malformed +        rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') +        list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title') +         +        # Export is independent of actual author_id, but returns 404 if no author_id is provided. +        # However, passing dummy author_id seems to be enough. +        csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, +                                     list_id, u'Downloading list CSV') +         +        entries = [] +        for item in csv.split('\n')[1:]: +            cols = item.split(',') +            if len(cols) < 2: +                continue +            item_id = cols[1][1:-1] +            if item_id.startswith('vi'): +                entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) +         +        return self.playlist_result(entries, list_id, list_title)
\ No newline at end of file diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 125d81551..7c54ea0f4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):          info = json.loads(json_data)          preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') -        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') +        song_url = preview_url.replace('/previews/', '/c/originals/')          template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)          final_song_url = self._get_url(template_url)          if final_song_url is None: diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f521a..f1cf41e2d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('videoid') -        uri = mobj.group('mgid') +        uri = mobj.groupdict().get('mgid')          if uri is None:              webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e22ff9c38..951e977bd 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):                              (?!sets/)(?P<title>[\w\d-]+)/?                              (?P<token>[^?]+?)?(?:[?].*)?$)                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) -                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*) +                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)                      )                      '''      IE_NAME = u'soundcloud' @@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):          if track_id is not None:              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID              full_title = track_id -        elif mobj.group('widget'): +        elif mobj.group('player'):              query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)              return self.url_result(query['url'][0], ie='Soundcloud')          else: diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 584550455..bc31c2e64 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):                  'height': a['height'],                  'filesize': a['size'],                  'ext': a['ext'], +                'preference': 1 if atype == 'original' else None,              })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b0e29c2a8..9424d5e26 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},          '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},          '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, +        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},          # Dash mp4 audio          '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 83a274043..da5143c8e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -539,7 +539,8 @@ def formatSeconds(secs):      else:          return '%d' % secs -def make_HTTPS_handler(opts_no_check_certificate): + +def make_HTTPS_handler(opts_no_check_certificate, **kwargs):      if sys.version_info < (3, 2):          import httplib @@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate):          class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):              def https_open(self, req):                  return self.do_open(HTTPSConnectionV3, req) -        return HTTPSHandlerV3() +        return HTTPSHandlerV3(**kwargs)      else:          context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)          context.verify_mode = (ssl.CERT_NONE @@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate):              context.load_default_certs()          except AttributeError:              pass  # Python < 3.4 -        return compat_urllib_request.HTTPSHandler(context=context) +        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)  class ExtractorError(Exception):      """Error during info extraction.""" diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 332913b31..bf5fc8212 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.26' +__version__ = '2014.01.03'  | 
