diff options
32 files changed, 465 insertions, 154 deletions
| @@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.                                 empty string (--proxy "") for direct connection      --no-check-certificate     Suppress HTTPS certificate validation.      --cache-dir DIR            Location in the filesystem where youtube-dl can -                               store downloaded information permanently. By +                               store some downloaded information permanently. By                                 default $XDG_CACHE_HOME/youtube-dl or ~/.cache -                               /youtube-dl . +                               /youtube-dl . At the moment, only YouTube player +                               files (for videos with obfuscated signatures) are +                               cached, but that may change.      --no-cache-dir             Disable filesystem caching      --bidi-workaround          Work around terminals that lack bidirectional                                 text support. Requires bidiv or fribidi @@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services  ###  Is anyone going to need the feature?  Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +###  Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --git a/test/test_playlists.py b/test/test_playlists.py index 1b7b4e3d8..9d522b357 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,8 @@ from youtube_dl.extractor import (      BandcampAlbumIE,      SmotriCommunityIE,      SmotriUserIE, -    IviCompilationIE +    IviCompilationIE, +    ImdbListIE,  ) @@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], u'dezhurnyi_angel/season2')          self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')          self.assertTrue(len(result['entries']) >= 20) +         +    def test_imdb_list(self): +        dl = FakeYDL() +        ie = ImdbListIE(dl) +        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'sMjedvGDd8U') +        self.assertEqual(result['title'], u'Animated and Family Films') +        self.assertTrue(len(result['entries']) >= 48)  if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index e5778cd83..bee355ee0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -18,6 +18,7 @@ from youtube_dl.utils import (      find_xpath_attr,      get_meta_content,      orderedSet, +    parse_duration,      sanitize_filename,      shell_quote,      smuggle_url, @@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase):              url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),              u'trailer.mp4') +    def test_parse_duration(self): +        self.assertEqual(parse_duration(None), None) +        self.assertEqual(parse_duration('1'), 1) +        self.assertEqual(parse_duration('1337:12'), 80232) +        self.assertEqual(parse_duration('9:12:43'), 33163) +        self.assertEqual(parse_duration('x:y'), None) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 69aedf87a..097e1a9e4 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -10,6 +10,7 @@ from .utils import (      PostProcessingError,      shell_quote,      subtitles_filename, +    prepend_extension,  ) @@ -496,13 +497,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):              return True, info          filename = info['filepath'] -        ext = os.path.splitext(filename)[1][1:] -        temp_filename = filename + u'.temp' +        temp_filename = prepend_extension(filename, 'temp')          options = ['-c', 'copy']          for (name, value) in metadata.items():              options.extend(['-metadata', '%s=%s' % (name, value)]) -        options.extend(['-f', ext])          self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)          self.run_ffmpeg(filename, temp_filename, options) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a9a3639d7..08037deda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -148,6 +148,7 @@ class YoutubeDL(object):      socket_timeout:    Time to wait for unresponsive hosts, in seconds      bidi_workaround:   Work around buggy terminals without bidirectional text                         support, using fridibi +    debug_printtraffic:Print out sent and received HTTP traffic      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -164,6 +165,8 @@ class YoutubeDL(object):      def __init__(self, params=None):          """Create a FileDownloader object with the given options.""" +        if params is None: +            params = {}          self._ies = []          self._ies_instances = {}          self._pps = [] @@ -172,7 +175,7 @@ class YoutubeDL(object):          self._num_downloads = 0          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self._err_file = sys.stderr -        self.params = {} if params is None else params +        self.params = params          if params.get('bidi_workaround', False):              try: @@ -1014,7 +1017,7 @@ class YoutubeDL(object):      def list_formats(self, info_dict):          def format_note(fdict):              res = u'' -            if f.get('ext') in ['f4f', 'f4m']: +            if fdict.get('ext') in ['f4f', 'f4m']:                  res += u'(unsupported) '              if fdict.get('format_note') is not None:                  res += fdict['format_note'] + u' ' @@ -1124,10 +1127,13 @@ class YoutubeDL(object):              if 'http' in proxies and 'https' not in proxies:                  proxies['https'] = proxies['http']          proxy_handler = compat_urllib_request.ProxyHandler(proxies) + +        debuglevel = 1 if self.params.get('debug_printtraffic') else 0          https_handler = make_HTTPS_handler( -            self.params.get('nocheckcertificate', False)) +            self.params.get('nocheckcertificate', False), debuglevel=debuglevel) +        ydlh = YoutubeDLHandler(debuglevel=debuglevel)          opener = compat_urllib_request.build_opener( -            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) +            https_handler, proxy_handler, cookie_processor, ydlh)          # Delete the default user-agent header, which would otherwise apply in          # cases where our custom HTTP handler doesn't come into play          # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c37d28c59..b29cf6758 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -44,6 +44,7 @@ __license__ = 'Public Domain'  import codecs  import getpass +import locale  import optparse  import os  import random @@ -185,7 +186,7 @@ def parseOpts(overrideArguments=None):      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')      general.add_option(          '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', -        help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') +        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')      general.add_option(          '--no-cache-dir', action='store_const', const=None, dest='cachedir',          help='Disable filesystem caching') @@ -333,7 +334,9 @@ def parseOpts(overrideArguments=None):      verbosity.add_option('--youtube-print-sig-code',              action='store_true', dest='youtube_print_sig_code', default=False,              help=optparse.SUPPRESS_HELP) - +    verbosity.add_option('--print-traffic', +            dest='debug_printtraffic', action='store_true', default=False, +            help=optparse.SUPPRESS_HELP)      filesystem.add_option('-t', '--title',              action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@ -473,6 +476,8 @@ def parseOpts(overrideArguments=None):              write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')              write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')              write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') +            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % +                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))      return parser, opts, args @@ -693,6 +698,7 @@ def _real_main(argv=None):          'proxy': opts.proxy,          'socket_timeout': opts.socket_timeout,          'bidi_workaround': opts.bidi_workaround, +        'debug_printtraffic': opts.debug_printtraffic,      }      with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 14b88efd3..8407727ba 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -133,7 +133,7 @@ class HttpFD(FileDownloader):                      return False              try:                  stream.write(data_block) -            except (IOError, OSError): +            except (IOError, OSError) as err:                  self.to_stderr(u"\n")                  self.report_error(u'unable to write data: %s' % str(err))                  return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2caa078b5..21d564dba 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,7 @@ from .channel9 import Channel9IE  from .cinemassacre import CinemassacreIE  from .clipfish import ClipfishIE  from .clipsyndicate import ClipsyndicateIE +from .cmt import CMTIE  from .cnn import CNNIE  from .collegehumor import CollegeHumorIE  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE @@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE  from .hypem import HypemIE  from .ign import IGNIE, OneUPIE -from .imdb import ImdbIE +from .imdb import ( +    ImdbIE, +    ImdbListIE +)  from .ina import InaIE  from .infoq import InfoQIE  from .instagram import InstagramIE @@ -91,6 +95,7 @@ from .ivi import (  from .jeuxvideo import JeuxVideoIE  from .jukebox import JukeboxIE  from .justintv import JustinTVIE +from .jpopsukitv import JpopsukiIE  from .kankan import KankanIE  from .keezmovies import KeezMoviesIE  from .kickstarter import KickStarterIE @@ -101,6 +106,7 @@ from .lynda import (      LyndaIE,      LyndaCourseIE  ) +from .macgamestore import MacGameStoreIE  from .mdr import MDRIE  from .metacafe import MetacafeIE  from .metacritic import MetacriticIE diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 3a32c14c5..15aee2786 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -10,14 +10,14 @@ from ..utils import (  class BandcampIE(InfoExtractor): -    IE_NAME = u'Bandcamp'      _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'      _TESTS = [{          u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',          u'file': u'1812978515.mp3', -        u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', +        u'md5': u'c557841d5e50261777a6585648adf439',          u'info_dict': { -            u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" +            u"title": u"youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", +            u"duration": 10,          },          u'skip': u'There is a limit of 200 free downloads / month for the test song'      }] @@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor):          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)          if m_download is None:              m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) -        if m_trackinfo: -            json_code = m_trackinfo.group(1) -            data = json.loads(json_code) +            if m_trackinfo: +                json_code = m_trackinfo.group(1) +                data = json.loads(json_code) +                d = data[0] + +                duration = int(round(d['duration'])) +                formats = [] +                for format_id, format_url in d['file'].items(): +                    ext, _, abr_str = format_id.partition('-') + +                    formats.append({ +                        'format_id': format_id, +                        'url': format_url, +                        'ext': format_id.partition('-')[0], +                        'vcodec': 'none', +                        'acodec': format_id.partition('-')[0], +                        'abr': int(format_id.partition('-')[2]), +                    }) + +                self._sort_formats(formats) -            for d in data: -                formats = [{ -                    'format_id': 'format_id', -                    'url': format_url, -                    'ext': format_id.partition('-')[0] -                } for format_id, format_url in sorted(d['file'].items())]                  return {                      'id': compat_str(d['id']),                      'title': d['title'],                      'formats': formats, +                    'duration': duration,                  } -        else: -            raise ExtractorError(u'No free songs found') +            else: +                raise ExtractorError(u'No free songs found')          download_link = m_download.group(1) -        id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',  -                       webpage, re.MULTILINE|re.DOTALL).group('id') +        video_id = re.search( +            r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', +            webpage, re.MULTILINE | re.DOTALL).group('id') -        download_webpage = self._download_webpage(download_link, id, +        download_webpage = self._download_webpage(download_link, video_id,                                                    'Downloading free downloads page')          # We get the dictionary of the track from some javascrip code          info = re.search(r'items: (.*?),$', @@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor):          m_url = re.match(re_url, initial_url)          #We build the url we will use to get the final track url          # This url is build in Bandcamp in the script download_bunde_*.js -        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) +        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))          final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')          # If we could correctly generate the .rand field the url would be          #in the "download_url" key          final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) -        track_info = {'id':id, -                      'title' : info[u'title'], -                      'ext' :   'mp3', -                      'url' :   final_url, -                      'thumbnail' : info[u'thumb_url'], -                      'uploader' :  info[u'artist'] -                      } - -        return [track_info] +        return { +            'id': video_id, +            'title': info[u'title'], +            'ext': 'mp3', +            'vcodec': 'none', +            'url': final_url, +            'thumbnail': info[u'thumb_url'], +            'uploader': info[u'artist'], +        }  class BandcampAlbumIE(InfoExtractor): @@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor):          webpage = self._download_webpage(url, title)          tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)          if not tracks_paths: -            raise ExtractorError(u'The page doesn\'t contain any track') +            raise ExtractorError(u'The page doesn\'t contain any tracks')          entries = [              self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())              for t_path in tracks_paths] diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 144ce64cc..0229840a3 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -61,9 +61,10 @@ class BlinkxIE(InfoExtractor):              elif m['type'] in ('flv', 'mp4'):                  vcodec = remove_start(m['vcodec'], 'ff')                  acodec = remove_start(m['acodec'], 'ff') +                tbr = (int(m['vbr']) + int(m['abr'])) // 1000                  format_id = (u'%s-%sk-%s' %                               (vcodec, -                              (int(m['vbr']) + int(m['abr'])) // 1000, +                              tbr,                                m['w']))                  formats.append({                      'format_id': format_id, @@ -72,10 +73,12 @@ class BlinkxIE(InfoExtractor):                      'acodec': acodec,                      'abr': int(m['abr']) // 1000,                      'vbr': int(m['vbr']) // 1000, +                    'tbr': tbr,                      'width': int(m['w']),                      'height': int(m['h']),                  }) -        formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr'])) + +        self._sort_formats(formats)          return {              'id': display_id, diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index ae70ea229..574881b70 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -76,14 +76,18 @@ class Channel9IE(InfoExtractor):              </div>)?                                                # File size part may be missing          '''          # Extract known formats -        formats = [{'url': x.group('url'), -                 'format_id': x.group('quality'), -                 'format_note': x.group('note'), -                 'format': '%s (%s)' % (x.group('quality'), x.group('note')),  -                 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate -                 } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] -        # Sort according to known formats list -        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) +        formats = [{ +            'url': x.group('url'), +            'format_id': x.group('quality'), +            'format_note': x.group('note'), +            'format': u'%s (%s)' % (x.group('quality'), x.group('note')), +            'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate +            'preference': self._known_formats.index(x.group('quality')), +            'vcodec': 'none' if x.group('note') == 'Audio only' else None, +        } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] + +        self._sort_formats(formats) +          return formats      def _extract_title(self, html): diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..88e0e9aba --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): +    IE_NAME = u'cmt.com' +    _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' +    _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + +    _TESTS = [ +        { +            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', +            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', +            u'info_dict': { +                u'id': u'989124', +                u'ext': u'mp4', +                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', +                u'description': u'Blame It All On My Roots', +            }, +        }, +    ] diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a034bb2fb..ecac5e0e9 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -1,7 +1,10 @@  import re  from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( +    int_or_none, +    parse_duration, +)  class CNNIE(InfoExtractor): @@ -15,6 +18,8 @@ class CNNIE(InfoExtractor):          u'info_dict': {              u'title': u'Nadal wins 8th French Open title',              u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', +            u'duration': 135, +            u'upload_date': u'20130609',          },      },      { @@ -35,22 +40,58 @@ class CNNIE(InfoExtractor):          info = self._download_xml(info_url, page_title)          formats = [] +        rex = re.compile(r'''(?x) +            (?P<width>[0-9]+)x(?P<height>[0-9]+) +            (?:_(?P<bitrate>[0-9]+)k)? +        ''')          for f in info.findall('files/file'): -            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) -            if mf is not None: -                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) -        formats = sorted(formats) -        (_,_,_, video_path) = formats[-1] -        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path +            video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) +            fdct = { +                'format_id': f.attrib['bitrate'], +                'url': video_url, +            } + +            mf = rex.match(f.attrib['bitrate']) +            if mf: +                fdct['width'] = int(mf.group('width')) +                fdct['height'] = int(mf.group('height')) +                fdct['tbr'] = int_or_none(mf.group('bitrate')) +            else: +                mf = rex.search(f.text) +                if mf: +                    fdct['width'] = int(mf.group('width')) +                    fdct['height'] = int(mf.group('height')) +                    fdct['tbr'] = int_or_none(mf.group('bitrate')) +                else: +                    mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) +                    if mi: +                        if mi.group(1) == 'audio': +                            fdct['vcodec'] = 'none' +                            fdct['ext'] = 'm4a' +                        else: +                            fdct['tbr'] = int(mi.group(1)) + +            formats.append(fdct) + +        self._sort_formats(formats)          thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])          thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] -        return {'id': info.attrib['id'], -                'title': info.find('headline').text, -                'url': video_url, -                'ext': determine_ext(video_url), -                'thumbnail': thumbnails[-1][1], -                'thumbnails': thumbs_dict, -                'description': info.find('description').text, -                } +        metas_el = info.find('metas') +        upload_date = ( +            metas_el.attrib.get('version') if metas_el is not None else None) + +        duration_el = info.find('length') +        duration = parse_duration(duration_el.text) + +        return { +            'id': info.attrib['id'], +            'title': info.find('headline').text, +            'formats': formats, +            'thumbnail': thumbnails[-1][1], +            'thumbnails': thumbs_dict, +            'description': info.find('description').text, +            'duration': duration, +            'upload_date': upload_date, +        } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index a54ce3ee7..27bd8256e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,7 +12,9 @@ from ..utils import (  class ComedyCentralIE(MTVServicesInfoExtractor): -    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' +    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ +        (video-clips|episodes|cc-studios|video-collections) +        /(?P<title>.*)'''      _FEED_URL = u'http://comedycentral.com/feeds/mrss/'      _TEST = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6fa60622e..f498bcf6f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -51,7 +51,8 @@ class InfoExtractor(object):                                   Calculated from the format_id, width, height.                                   and format_note fields if missing.                      * format_id  A short description of the format -                                 ("mp4_h264_opus" or "19") +                                 ("mp4_h264_opus" or "19"). +                                Technically optional, but strongly recommended.                      * format_note Additional info about the format                                   ("3D" or "DASH video")                      * width      Width of the video, if known @@ -68,7 +69,8 @@ class InfoExtractor(object):                                   download, lower-case.                                   "http", "https", "rtsp", "rtmp" or so.                      * preference Order number of this format. If this field is -                                 present, the formats get sorted by this field. +                                 present and not None, the formats get sorted +                                 by this field.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default.      url:            Final video URL. @@ -376,7 +378,7 @@ class InfoExtractor(object):      @staticmethod      def _og_regexes(prop):          content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' -        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) +        property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)          template = r'<meta[^>]+?%s[^>]+?%s'          return [              template % (property_re, content_re), diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 416e25156..0b11d1f10 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -10,11 +10,11 @@ from ..utils import (  class DreiSatIE(InfoExtractor):      IE_NAME = '3sat' -    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' +    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'      _TEST = {          u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", -        u'file': u'36983.webm', -        u'md5': u'57c97d0469d71cf874f6815aa2b7c944', +        u'file': u'36983.mp4', +        u'md5': u'9dcfe344732808dbfcc901537973c922',          u'info_dict': {              u"title": u"Kaffeeland Schweiz",              u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",  diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a14c98f9..377ae91c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):              return self.url_result('http://' + url)          video_id = os.path.splitext(url.split('/')[-1])[0] +        self.to_screen(u'%s: Requesting header' % video_id) +          try:              response = self._send_head(url) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index e5332cce8..16926b4d3 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):              'description': descr,              'thumbnail': format_info['slate'],          } + +class ImdbListIE(InfoExtractor): +    IE_NAME = u'imdb:list' +    IE_DESC = u'Internet Movie Database lists' +    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' +     +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        list_id = mobj.group('id') +         +        # RSS XML is sometimes malformed +        rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') +        list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title') +         +        # Export is independent of actual author_id, but returns 404 if no author_id is provided. +        # However, passing dummy author_id seems to be enough. +        csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, +                                     list_id, u'Downloading list CSV') +         +        entries = [] +        for item in csv.split('\n')[1:]: +            cols = item.split(',') +            if len(cols) < 2: +                continue +            item_id = cols[1][1:-1] +            if item_id.startswith('vi'): +                entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) +         +        return self.playlist_result(entries, list_id, list_title)
\ No newline at end of file diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 16a6f73c8..4ddda2f1b 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -5,7 +5,6 @@ from ..utils import (      compat_urlparse,      compat_urllib_parse,      xpath_with_ns, -    determine_ext,  ) @@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor):          for content in item.findall(_bp('media:group/media:content')):              attr = content.attrib              f_url = attr['url'] +            width = int(attr['width']) +            bitrate = int(attr['bitrate']) +            format_id = '%d-%dk' % (width, bitrate)              formats.append({ +                'format_id': format_id,                  'url': f_url, -                'ext': determine_ext(f_url), -                'width': int(attr['width']), -                'bitrate': int(attr['bitrate']), +                'width': width, +                'tbr': bitrate,              }) -        formats = sorted(formats, key=lambda f: f['bitrate']) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 4bdf55f93..98d1d272a 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -84,14 +84,16 @@ class IviIE(InfoExtractor):          result = video_json[u'result'] -        formats = [{'url': x[u'url'], -                    'format_id': x[u'content_format'] -                    } for x in result[u'files'] if x[u'content_format'] in self._known_formats] -        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) - -        if len(formats) == 0: -            self._downloader.report_warning(u'No media links available for %s' % video_id) -            return +        formats = [{ +            'url': x[u'url'], +            'format_id': x[u'content_format'], +            'preference': self._known_formats.index(x[u'content_format']), +        } for x in result[u'files'] if x[u'content_format'] in self._known_formats] + +        self._sort_formats(formats) + +        if not formats: +            raise ExtractorError(u'No media links available for %s' % video_id)          duration = result[u'duration']          compilation = result[u'compilation'] diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py new file mode 100644 index 000000000..aad782578 --- /dev/null +++ b/youtube_dl/extractor/jpopsukitv.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    unified_strdate, +) + + +class JpopsukiIE(InfoExtractor): +    IE_NAME = 'jpopsuki.tv' +    _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)' + +    _TEST = { +        'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771', +        'md5': '88018c0c1a9b1387940e90ec9e7e198e', +        'file': '00be659d23b0b40508169cdee4545771.mp4', +        'info_dict': { +            'id': '00be659d23b0b40508169cdee4545771', +            'title': 'ayumi hamasaki - evolution', +            'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution', +            'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg', +            'uploader': 'plama_chan', +            'uploader_id': '404', +            'upload_date': '20121101' +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        video_url = 'http://www.jpopsuki.tv' + self._html_search_regex( +            r'<source src="(.*?)" type', webpage, 'video url') + +        video_title = self._og_search_title(webpage) +        description = self._og_search_description(webpage) +        thumbnail = self._og_search_thumbnail(webpage) +        uploader = self._html_search_regex( +            r'<li>from: <a href="/user/view/user/(.*?)/uid/', +            webpage, 'video uploader', fatal=False) +        uploader_id = self._html_search_regex( +            r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)', +            webpage, 'video uploader_id', fatal=False) +        upload_date = self._html_search_regex( +            r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date', +            fatal=False) +        if upload_date is not None: +            upload_date = unified_strdate(upload_date) +        view_count_str = self._html_search_regex( +            r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count', +            fatal=False) +        comment_count_str = self._html_search_regex( +            r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count', +            fatal=False) + +        return { +            'id': video_id, +            'url': video_url, +            'title': video_title, +            'description': description, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'upload_date': upload_date, +            'view_count': int_or_none(view_count_str), +            'comment_count': int_or_none(comment_count_str), +        } diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index dd59aa3e6..592ed747a 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  import json @@ -6,17 +8,17 @@ from ..utils import ExtractorError  class LyndaIE(InfoExtractor): -    IE_NAME = u'lynda' -    IE_DESC = u'lynda.com videos' +    IE_NAME = 'lynda' +    IE_DESC = 'lynda.com videos'      _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'      _TEST = { -        u'url': u'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', -        u'file': u'114408.mp4', -        u'md5': u'ecfc6862da89489161fb9cd5f5a6fac1', +        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', +        'file': '114408.mp4', +        'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',          u"info_dict": { -            u'title': u'Using the exercise files', -            u'duration': 68 +            'title': 'Using the exercise files', +            'duration': 68          }      } @@ -25,26 +27,26 @@ class LyndaIE(InfoExtractor):          video_id = mobj.group(1)          page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, -                                      video_id, u'Downloading video JSON') +                                      video_id, 'Downloading video JSON')          video_json = json.loads(page) -        if u'Status' in video_json and video_json[u'Status'] == u'NotFound': -            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) +        if 'Status' in video_json and video_json['Status'] == 'NotFound': +            raise ExtractorError('Video %s does not exist' % video_id, expected=True) -        if video_json[u'HasAccess'] is False: -            raise ExtractorError(u'Video %s is only available for members' % video_id, expected=True) +        if video_json['HasAccess'] is False: +            raise ExtractorError('Video %s is only available for members' % video_id, expected=True) -        video_id = video_json[u'ID'] -        duration = video_json[u'DurationInSeconds'] -        title = video_json[u'Title'] +        video_id = video_json['ID'] +        duration = video_json['DurationInSeconds'] +        title = video_json['Title'] -        formats = [{'url': fmt[u'Url'], -                    'ext': fmt[u'Extension'], -                    'width': fmt[u'Width'], -                    'height': fmt[u'Height'], -                    'filesize': fmt[u'FileSize'], -                    'format_id': fmt[u'Resolution'] -                    } for fmt in video_json[u'Formats']] +        formats = [{'url': fmt['Url'], +                    'ext': fmt['Extension'], +                    'width': fmt['Width'], +                    'height': fmt['Height'], +                    'filesize': fmt['FileSize'], +                    'format_id': fmt['Resolution'] +                    } for fmt in video_json['Formats']]          self._sort_formats(formats) @@ -57,8 +59,8 @@ class LyndaIE(InfoExtractor):  class LyndaCourseIE(InfoExtractor): -    IE_NAME = u'lynda:course' -    IE_DESC = u'lynda.com online courses' +    IE_NAME = 'lynda:course' +    IE_DESC = 'lynda.com online courses'      # Course link equals to welcome/introduction video link of same course      # We will recognize it as course link @@ -70,27 +72,31 @@ class LyndaCourseIE(InfoExtractor):          course_id = mobj.group('courseid')          page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, -                                      course_id, u'Downloading course JSON') +                                      course_id, 'Downloading course JSON')          course_json = json.loads(page) -        if u'Status' in course_json and course_json[u'Status'] == u'NotFound': -            raise ExtractorError(u'Course %s does not exist' % course_id, expected=True) +        if 'Status' in course_json and course_json['Status'] == 'NotFound': +            raise ExtractorError('Course %s does not exist' % course_id, expected=True)          unaccessible_videos = 0          videos = [] -        for chapter in course_json[u'Chapters']: -            for video in chapter[u'Videos']: -                if video[u'HasAccess'] is not True: +        for chapter in course_json['Chapters']: +            for video in chapter['Videos']: +                if video['HasAccess'] is not True:                      unaccessible_videos += 1                      continue -                videos.append(video[u'ID']) +                videos.append(video['ID'])          if unaccessible_videos > 0: -            self._downloader.report_warning(u'%s videos are only available for members and will not be downloaded' % unaccessible_videos) +            self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos) -        entries = [self.url_result('http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), 'Lynda') for video_id in videos] +        entries = [ +            self.url_result('http://www.lynda.com/%s/%s-4.html' % +                            (course_path, video_id), +                            'Lynda') +            for video_id in videos] -        course_title = course_json[u'Title'] +        course_title = course_json['Title'] -        return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file +        return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py new file mode 100644 index 000000000..b818cf50c --- /dev/null +++ b/youtube_dl/extractor/macgamestore.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class MacGameStoreIE(InfoExtractor): +    IE_NAME = 'macgamestore' +    IE_DESC = 'MacGameStore trailers' +    _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + +    _TEST = { +        'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', +        'file': '2450.m4v', +        'md5': '8649b8ea684b6666b4c5be736ecddc61', +        'info_dict': { +            'title': 'Crow', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id, 'Downloading trailer page') + +        if re.search(r'>Missing Media<', webpage) is not None: +            raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) + +        video_title = self._html_search_regex( +            r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') + +        video_url = self._html_search_regex( +            r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>', +            webpage, 'video URL') + +        return { +            'id': video_id, +            'url': video_url, +            'title': video_title +        } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 125d81551..7c54ea0f4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):          info = json.loads(json_data)          preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') -        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') +        song_url = preview_url.replace('/previews/', '/c/originals/')          template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)          final_song_url = self._get_url(template_url)          if final_song_url is None: diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f521a..f1cf41e2d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('videoid') -        uri = mobj.group('mgid') +        uri = mobj.groupdict().get('mgid')          if uri is None:              webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e22ff9c38..951e977bd 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):                              (?!sets/)(?P<title>[\w\d-]+)/?                              (?P<token>[^?]+?)?(?:[?].*)?$)                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) -                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*) +                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)                      )                      '''      IE_NAME = u'soundcloud' @@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):          if track_id is not None:              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID              full_title = track_id -        elif mobj.group('widget'): +        elif mobj.group('player'):              query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)              return self.url_result(query['url'][0], ie='Soundcloud')          else: diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index cec65261b..23172143e 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor):          formats = []          for f in switch.findall(_x('smil:video')):              attr = f.attrib +            width = int(attr['width']) +            height = int(attr['height']) +            vbr = int(attr['system-bitrate']) // 1000 +            format_id = '%dx%d_%dk' % (width, height, vbr)              formats.append({ +                'format_id': format_id,                  'url': base_url,                  'play_path': 'mp4:' + attr['src'],                  'ext': 'flv', -                'width': int(attr['width']), -                'height': int(attr['height']), -                'vbr': int(attr['system-bitrate']), +                'width': width, +                'height': height, +                'vbr': vbr,              }) -        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr'])) + +        self._sort_formats(formats)          return {              'id': video_id, diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 584550455..bc31c2e64 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):                  'height': a['height'],                  'filesize': a['size'],                  'ext': a['ext'], +                'preference': 1 if atype == 'original' else None,              })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index bd0f2cae0..77ad423c4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -1,5 +1,4 @@  import json -import os  import re  import sys @@ -16,6 +15,7 @@ from ..aes import (      aes_decrypt_text  ) +  class YouPornIE(InfoExtractor):      _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'      _TEST = { @@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor):          u'file': u'505835.mp4',          u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',          u'info_dict': { -            u"upload_date": u"20101221",  -            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",  -            u"uploader": u"Ask Dan And Jennifer",  +            u"upload_date": u"20101221", +            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", +            u"uploader": u"Ask Dan And Jennifer",              u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",              u"age_limit": 18,          } @@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor):              link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')              links.append(link) -        if not links: -            raise ExtractorError(u'ERROR: no known formats available for video') -          formats = []          for link in links: -              # A link looks like this:              # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0              # A path looks like this:              # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4              video_url = unescapeHTML(link)              path = compat_urllib_parse_urlparse(video_url).path -            extension = os.path.splitext(path)[1][1:] -            format = path.split('/')[4].split('_')[:2] +            format_parts = path.split('/')[4].split('_')[:2] -            # size = format[0] -            # bitrate = format[1] -            format = "-".join(format) -            # title = u'%s-%s-%s' % (video_title, size, bitrate) +            dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0] + +            resolution = format_parts[0] +            height = int(resolution[:-len('p')]) +            bitrate = int(format_parts[1][:-len('k')]) +            format = u'-'.join(format_parts) + u'-' + dn              formats.append({                  'url': video_url, -                'ext': extension,                  'format': format,                  'format_id': format, +                'height': height, +                'tbr': bitrate, +                'resolution': resolution,              }) -        # Sort and remove doubles -        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) -        for i in range(len(formats)-1,0,-1): -            if formats[i]['format_id'] == formats[i-1]['format_id']: -                del formats[i] +        self._sort_formats(formats) + +        if not formats: +            raise ExtractorError(u'ERROR: no known formats available for video')          return {              'id': video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b0e29c2a8..9424d5e26 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},          '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},          '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, +        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},          # Dash mp4 audio          '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4c8bdbb0c..fc10fba63 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -539,7 +539,8 @@ def formatSeconds(secs):      else:          return '%d' % secs -def make_HTTPS_handler(opts_no_check_certificate): + +def make_HTTPS_handler(opts_no_check_certificate, **kwargs):      if sys.version_info < (3, 2):          import httplib @@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate):          class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):              def https_open(self, req):                  return self.do_open(HTTPSConnectionV3, req) -        return HTTPSHandlerV3() +        return HTTPSHandlerV3(**kwargs)      else:          context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)          context.verify_mode = (ssl.CERT_NONE @@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate):              context.load_default_certs()          except AttributeError:              pass  # Python < 3.4 -        return compat_urllib_request.HTTPSHandler(context=context) +        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)  class ExtractorError(Exception):      """Error during info extraction.""" @@ -1102,3 +1103,24 @@ class HEADRequest(compat_urllib_request.Request):  def int_or_none(v):      return v if v is None else int(v) + + +def parse_duration(s): +    if s is None: +        return None + +    m = re.match( +        r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s) +    if not m: +        return None +    res = int(m.group('secs')) +    if m.group('mins'): +        res += int(m.group('mins')) * 60 +        if m.group('hours'): +            res += int(m.group('hours')) * 60 * 60 +    return res + + +def prepend_extension(filename, ext): +    name, real_ext = os.path.splitext(filename)  +    return u'{0}.{1}{2}'.format(name, ext, real_ext) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b3d015634..bf5fc8212 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.23.4' +__version__ = '2014.01.03' | 
