diff options
49 files changed, 1482 insertions, 443 deletions
| @@ -89,3 +89,6 @@ Oskar Jauch  Matthew Rayfield  t0mm0  Tithen-Firion +Zack Fernandes +cryptonaut +Adrian Kretz @@ -1,7 +1,7 @@  all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  clean: -	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part +	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json  cleanall: clean  	rm -f youtube-dl youtube-dl.exe @@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.                                       this is not possible instead of searching.      --ignore-config                  Do not read configuration files. When given                                       in the global configuration file /etc -                                     /youtube-dl.conf: do not read the user -                                     configuration in ~/.config/youtube-dl.conf -                                     (%APPDATA%/youtube-dl/config.txt on -                                     Windows) +                                     /youtube-dl.conf: Do not read the user +                                     configuration in ~/.config/youtube- +                                     dl/config (%APPDATA%/youtube-dl/config.txt +                                     on Windows)      --flat-playlist                  Do not extract the videos of a playlist,                                       only list them. diff --git a/test/test_utils.py b/test/test_utils.py index baa3a2156..d42df6d96 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -48,6 +48,7 @@ from youtube_dl.utils import (      intlist_to_bytes,      args_to_str,      parse_filesize, +    version_tuple,  ) @@ -143,6 +144,9 @@ class TestUtil(unittest.TestCase):          self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')          self.assertEqual(unified_strdate('1968-12-10'), '19681210')          self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') +        self.assertEqual( +            unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), +            '20141126')      def test_find_xpath_attr(self):          testxml = '''<root> @@ -220,6 +224,9 @@ class TestUtil(unittest.TestCase):          self.assertEqual(parse_duration('0s'), 0)          self.assertEqual(parse_duration('01:02:03.05'), 3723.05)          self.assertEqual(parse_duration('T30M38S'), 1838) +        self.assertEqual(parse_duration('5 s'), 5) +        self.assertEqual(parse_duration('3 min'), 180) +        self.assertEqual(parse_duration('2.5 hours'), 9000)      def test_fix_xml_ampersands(self):          self.assertEqual( @@ -376,6 +383,12 @@ class TestUtil(unittest.TestCase):          self.assertEqual(parse_filesize('2 MiB'), 2097152)          self.assertEqual(parse_filesize('5 GB'), 5000000000)          self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) +        self.assertEqual(parse_filesize('1,24 KB'), 1240) + +    def test_version_tuple(self): +        self.assertEqual(version_tuple('1'), (1,)) +        self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) +        self.assertEqual(version_tuple('10.1-6'), (10, 1, 6))  # avconv style  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f89ac4e1d..31531855e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -7,6 +7,7 @@ import collections  import datetime  import errno  import io +import itertools  import json  import locale  import os @@ -621,23 +622,17 @@ class YoutubeDL(object):                  ie_result['url'], ie_key=ie_result.get('ie_key'),                  extra_info=extra_info, download=False, process=False) -            def make_result(embedded_info): -                new_result = ie_result.copy() -                for f in ('_type', 'url', 'ext', 'player_url', 'formats', -                          'entries', 'ie_key', 'duration', -                          'subtitles', 'annotations', 'format', -                          'thumbnail', 'thumbnails'): -                    if f in new_result: -                        del new_result[f] -                    if f in embedded_info: -                        new_result[f] = embedded_info[f] -                return new_result -            new_result = make_result(info) +            new_result = ie_result.copy() +            for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats', +                      'entries', 'ie_key', 'duration', +                      'subtitles', 'annotations', 'format', +                      'thumbnail', 'thumbnails'): +                if f in new_result: +                    del new_result[f] +                if f in info: +                    new_result[f] = info[f]              assert new_result.get('_type') != 'url_transparent' -            if new_result.get('_type') == 'compat_list': -                new_result['entries'] = [ -                    make_result(e) for e in new_result['entries']]              return self.process_ie_result(                  new_result, download=download, extra_info=extra_info) @@ -654,21 +649,28 @@ class YoutubeDL(object):              if playlistend == -1:                  playlistend = None -            if isinstance(ie_result['entries'], list): -                n_all_entries = len(ie_result['entries']) -                entries = ie_result['entries'][playliststart:playlistend] +            ie_entries = ie_result['entries'] +            if isinstance(ie_entries, list): +                n_all_entries = len(ie_entries) +                entries = ie_entries[playliststart:playlistend]                  n_entries = len(entries)                  self.to_screen(                      "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %                      (ie_result['extractor'], playlist, n_all_entries, n_entries)) -            else: -                assert isinstance(ie_result['entries'], PagedList) -                entries = ie_result['entries'].getslice( +            elif isinstance(ie_entries, PagedList): +                entries = ie_entries.getslice(                      playliststart, playlistend)                  n_entries = len(entries)                  self.to_screen(                      "[%s] playlist %s: Downloading %d videos" %                      (ie_result['extractor'], playlist, n_entries)) +            else:  # iterable +                entries = list(itertools.islice( +                    ie_entries, playliststart, playlistend)) +                n_entries = len(entries) +                self.to_screen( +                    "[%s] playlist %s: Downloading %d videos" % +                    (ie_result['extractor'], playlist, n_entries))              for i, entry in enumerate(entries, 1):                  self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) @@ -934,8 +936,12 @@ class YoutubeDL(object):          if self.params.get('forceid', False):              self.to_stdout(info_dict['id'])          if self.params.get('forceurl', False): -            # For RTMP URLs, also include the playpath -            self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) +            if info_dict.get('requested_formats') is not None: +                for f in info_dict['requested_formats']: +                    self.to_stdout(f['url'] + f.get('play_path', '')) +            else: +                # For RTMP URLs, also include the playpath +                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))          if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:              self.to_stdout(info_dict['thumbnail'])          if self.params.get('forcedescription', False) and info_dict.get('description') is not None: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 27596687d..cd46693b3 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,8 +1,10 @@  from __future__ import unicode_literals +import ctypes  import getpass  import optparse  import os +import platform  import re  import subprocess  import sys @@ -247,7 +249,7 @@ else:                  userhome = compat_getenv('HOME')              elif 'USERPROFILE' in os.environ:                  userhome = compat_getenv('USERPROFILE') -            elif not 'HOMEPATH' in os.environ: +            elif 'HOMEPATH' not in os.environ:                  return path              else:                  try: @@ -326,6 +328,22 @@ def workaround_optparse_bug9161():          optparse.OptionGroup.add_option = _compat_add_option +if platform.python_implementation() == 'PyPy': +    # PyPy expects byte strings as Windows function names +    # https://github.com/rg3/youtube-dl/pull/4392 +    def compat_WINFUNCTYPE(*args, **kwargs): +        real = ctypes.WINFUNCTYPE(*args, **kwargs) + +        def resf(tpl, *args, **kwargs): +            funcname, dll = tpl +            return real((str(funcname), dll), *args, **kwargs) + +        return resf +else: +    def compat_WINFUNCTYPE(*args, **kwargs): +        return ctypes.WINFUNCTYPE(*args, **kwargs) + +  __all__ = [      'compat_HTTPError',      'compat_chr', @@ -349,6 +367,7 @@ __all__ = [      'compat_urllib_request',      'compat_urlparse',      'compat_urlretrieve', +    'compat_WINFUNCTYPE',      'compat_xml_parse_error',      'shlex_quote',      'subprocess_check_output', diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 954beffd5..ad26cfa40 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -4,6 +4,7 @@ import os  import re  import subprocess +from ..postprocessor.ffmpeg import FFmpegPostProcessor  from .common import FileDownloader  from ..utils import (      compat_urlparse, @@ -32,6 +33,9 @@ class HlsFD(FileDownloader):              return False          cmd = [program] + args +        ffpp = FFmpegPostProcessor(downloader=self) +        ffpp.check_version() +          retval = subprocess.call(cmd)          if retval == 0:              fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8b513ffd1..746ee69e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -24,11 +24,13 @@ from .arte import (  )  from .audiomack import AudiomackIE  from .auengine import AUEngineIE +from .azubu import AzubuIE  from .bambuser import BambuserIE, BambuserChannelIE  from .bandcamp import BandcampIE, BandcampAlbumIE  from .bbccouk import BBCCoUkIE  from .beeg import BeegIE  from .behindkink import BehindKinkIE +from .bet import BetIE  from .bild import BildIE  from .bilibili import BiliBiliIE  from .blinkx import BlinkxIE @@ -49,7 +51,7 @@ from .cbsnews import CBSNewsIE  from .ceskatelevize import CeskaTelevizeIE  from .channel9 import Channel9IE  from .chilloutzone import ChilloutzoneIE -from .cinemassacre import CinemassacreIE +from .cinchcast import CinchcastIE  from .clipfish import ClipfishIE  from .cliphunter import CliphunterIE  from .clipsyndicate import ClipsyndicateIE @@ -121,6 +123,8 @@ from .fktv import (  from .flickr import FlickrIE  from .folketinget import FolketingetIE  from .fourtube import FourTubeIE +from .foxgay import FoxgayIE +from .foxnews import FoxNewsIE  from .franceculture import FranceCultureIE  from .franceinter import FranceInterIE  from .francetv import ( @@ -216,6 +220,7 @@ from .mdr import MDRIE  from .metacafe import MetacafeIE  from .metacritic import MetacriticIE  from .mgoon import MgoonIE +from .minhateca import MinhatecaIE  from .ministrygrid import MinistryGridIE  from .mit import TechTVMITIE, MITIE, OCWMITIE  from .mitele import MiTeleIE @@ -245,6 +250,7 @@ from .muzu import MuzuTVIE  from .myspace import MySpaceIE, MySpaceAlbumIE  from .myspass import MySpassIE  from .myvideo import MyVideoIE +from .myvidster import MyVidsterIE  from .naver import NaverIE  from .nba import NBAIE  from .nbc import ( @@ -302,6 +308,7 @@ from .promptfile import PromptFileIE  from .prosiebensat1 import ProSiebenSat1IE  from .pyvideo import PyvideoIE  from .quickvid import QuickVidIE +from .radiode import RadioDeIE  from .radiofrance import RadioFranceIE  from .rai import RaiIE  from .rbmaradio import RBMARadioIE @@ -329,6 +336,7 @@ from .savefrom import SaveFromIE  from .sbs import SBSIE  from .scivee import SciVeeIE  from .screencast import ScreencastIE +from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE  from .servingsys import ServingSysIE  from .sexu import SexuIE  from .sexykarma import SexyKarmaIE @@ -519,7 +527,7 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) -from .zdf import ZDFIE +from .zdf import ZDFIE, ZDFChannelIE  from .zingmp3 import (      ZingMp3SongIE,      ZingMp3AlbumIE, diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 0d05cbb4b..39e4ca296 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -2,123 +2,147 @@  from __future__ import unicode_literals  import re +import json  from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +)  class AdultSwimIE(InfoExtractor): -    _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' -    _TEST = { -        'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', +    _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' + +    _TESTS = [{ +        'url': 'http://adultswim.com/videos/rick-and-morty/pilot',          'playlist': [              { -                'md5': '4da359ec73b58df4575cd01a610ba5dc', -                'info_dict': { -                    'id': '8a250ba1450996e901453d7f02ca02f5', -                    'ext': 'flv', -                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', -                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', -                    'uploader': 'Rick and Morty', -                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' -                } -            }, -            { -                'md5': 'ffbdf55af9331c509d95350bd0cc1819', +                'md5': '247572debc75c7652f253c8daa51a14d',                  'info_dict': { -                    'id': '8a250ba1450996e901453d7f4bd102f6', +                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',                      'ext': 'flv', -                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', -                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', -                    'uploader': 'Rick and Morty', -                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' -                } +                    'title': 'Rick and Morty - Pilot Part 1', +                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " +                },              },              { -                'md5': 'b92409635540304280b4b6c36bd14a0a', +                'md5': '77b0e037a4b20ec6b98671c4c379f48d',                  'info_dict': { -                    'id': '8a250ba1450996e901453d7fa73c02f7', +                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',                      'ext': 'flv', -                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', -                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', -                    'uploader': 'Rick and Morty', -                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' -                } +                    'title': 'Rick and Morty - Pilot Part 4', +                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " +                },              }, +        ], +        'info_dict': { +            'title': 'Rick and Morty - Pilot', +            'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " +        } +    }, { +        'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/', +        'playlist': [              { -                'md5': 'e8818891d60e47b29cd89d7b0278156d', +                'md5': '2eb5c06d0f9a1539da3718d897f13ec5',                  'info_dict': { -                    'id': '8a250ba1450996e901453d7fc8ba02f8', +                    'id': '-t8CamQlQ2aYZ49ItZCFog-0',                      'ext': 'flv', -                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', -                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', -                    'uploader': 'Rick and Morty', -                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' -                } +                    'title': 'American Dad - Putting Francine Out of Business', +                    'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' +                },              } -        ] -    } - -    _video_extensions = { -        '3500': 'flv', -        '640': 'mp4', -        '150': 'mp4', -        'ipad': 'm3u8', -        'iphone': 'm3u8' -    } -    _video_dimensions = { -        '3500': (1280, 720), -        '640': (480, 270), -        '150': (320, 180) -    } +        ], +        'info_dict': { +            'title': 'American Dad - Putting Francine Out of Business', +            'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' +        }, +    }] + +    @staticmethod +    def find_video_info(collection, slug): +        for video in collection.get('videos'): +            if video.get('slug') == slug: +                return video + +    @staticmethod +    def find_collection_by_linkURL(collections, linkURL): +        for collection in collections: +            if collection.get('linkURL') == linkURL: +                return collection + +    @staticmethod +    def find_collection_containing_video(collections, slug): +        for collection in collections: +            for video in collection.get('videos'): +                if video.get('slug') == slug: +                    return collection, video      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_path = mobj.group('path') - -        webpage = self._download_webpage(url, video_path) -        episode_id = self._html_search_regex( -            r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', -            webpage, 'episode_id') -        title = self._og_search_title(webpage) - -        index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id -        idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') - -        episode_el = idoc.find('.//episode') -        show_title = episode_el.attrib.get('collectionTitle') -        episode_title = episode_el.attrib.get('title') -        thumbnail = episode_el.attrib.get('thumbnailUrl') -        description = episode_el.find('./description').text.strip() +        show_path = mobj.group('show_path') +        episode_path = mobj.group('episode_path') +        is_playlist = True if mobj.group('is_playlist') else False + +        webpage = self._download_webpage(url, episode_path) + +        # Extract the value of `bootstrappedData` from the Javascript in the page. +        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path) + +        try: +            bootstrappedData = json.loads(bootstrappedDataJS) +        except ValueError as ve: +            errmsg = '%s: Failed to parse JSON ' % episode_path +            raise ExtractorError(errmsg, cause=ve) + +        # Downloading videos from a /videos/playlist/ URL needs to be handled differently. +        # NOTE: We are only downloading one video (the current one) not the playlist +        if is_playlist: +            collections = bootstrappedData['playlists']['collections'] +            collection = self.find_collection_by_linkURL(collections, show_path) +            video_info = self.find_video_info(collection, episode_path) + +            show_title = video_info['showTitle'] +            segment_ids = [video_info['videoPlaybackID']] +        else: +            collections = bootstrappedData['show']['collections'] +            collection, video_info = self.find_collection_containing_video(collections, episode_path) + +            show = bootstrappedData['show'] +            show_title = show['title'] +            segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] + +        episode_id = video_info['id'] +        episode_title = video_info['title'] +        episode_description = video_info['description'] +        episode_duration = video_info.get('duration')          entries = [] -        segment_els = episode_el.findall('./segments/segment') +        for part_num, segment_id in enumerate(segment_ids): +            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id -        for part_num, segment_el in enumerate(segment_els): -            segment_id = segment_el.attrib.get('id') -            segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) -            thumbnail = segment_el.attrib.get('thumbnailUrl') -            duration = segment_el.attrib.get('duration') +            segment_title = '%s - %s' % (show_title, episode_title) +            if len(segment_ids) > 1: +                segment_title += ' Part %d' % (part_num + 1) -            segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id              idoc = self._download_xml(                  segment_url, segment_title,                  'Downloading segment information', 'Unable to download segment information') +            segment_duration = idoc.find('.//trt').text.strip() +              formats = []              file_els = idoc.findall('.//files/file')              for file_el in file_els:                  bitrate = file_el.attrib.get('bitrate') -                type = file_el.attrib.get('type') -                width, height = self._video_dimensions.get(bitrate, (None, None)) +                ftype = file_el.attrib.get('type') +                  formats.append({ -                    'format_id': '%s-%s' % (bitrate, type), -                    'url': file_el.text, -                    'ext': self._video_extensions.get(bitrate, 'mp4'), +                    'format_id': '%s_%s' % (bitrate, ftype), +                    'url': file_el.text.strip(),                      # The bitrate may not be a number (for example: 'iphone')                      'tbr': int(bitrate) if bitrate.isdigit() else None, -                    'height': height, -                    'width': width +                    'quality': 1 if ftype == 'hd' else -1                  })              self._sort_formats(formats) @@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):                  'id': segment_id,                  'title': segment_title,                  'formats': formats, -                'uploader': show_title, -                'thumbnail': thumbnail, -                'duration': duration, -                'description': description +                'duration': segment_duration, +                'description': episode_description              })          return {              '_type': 'playlist',              'id': episode_id, -            'display_id': video_path, +            'display_id': episode_path,              'entries': entries, -            'title': '%s %s' % (show_title, episode_title), -            'description': description, -            'thumbnail': thumbnail +            'title': '%s - %s' % (show_title, episode_title), +            'description': episode_description, +            'duration': episode_duration          } diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 04386f7f7..622b20989 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -26,13 +26,13 @@ class AudiomackIE(InfoExtractor):          {              'add_ie': ['Soundcloud'],              'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', -            'file': '172419696.mp3', -            'info_dict': -            { +            'info_dict': { +                'id': '172419696', +                'ext': 'mp3',                  'description': 'md5:1fc3272ed7a635cce5be1568c2822997',                  'title': 'Young Thug ft Lil Wayne - Take Kare', -                'uploader':'Young Thug World', -                'upload_date':'20141016', +                'uploader': 'Young Thug World', +                'upload_date': '20141016',              }          },      ] diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py new file mode 100644 index 000000000..0961d339f --- /dev/null +++ b/youtube_dl/extractor/azubu.py @@ -0,0 +1,93 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import float_or_none + + +class AzubuIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)' +    _TESTS = [ +        { +            'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', +            'md5': 'a88b42fcf844f29ad6035054bd9ecaf4', +            'info_dict': { +                'id': '15575', +                'ext': 'mp4', +                'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1', +                'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01', +                'thumbnail': 're:^https?://.*\.jpe?g', +                'timestamp': 1417523507.334, +                'upload_date': '20141202', +                'duration': 9988.7, +                'uploader': 'GSL', +                'uploader_id': 414310, +                'view_count': int, +            }, +        }, +        { +            'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-', +            'md5': 'b72a871fe1d9f70bd7673769cdb3b925', +            'info_dict': { +                'id': '9344', +                'ext': 'mp4', +                'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"', +                'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af', +                'thumbnail': 're:^https?://.*\.jpe?g', +                'timestamp': 1410530893.320, +                'upload_date': '20140912', +                'duration': 172.385, +                'uploader': 'FnaticTV', +                'uploader_id': 272749, +                'view_count': int, +            }, +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        data = self._download_json( +            'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data'] + +        title = data['title'].strip() +        description = data['description'] +        thumbnail = data['thumbnail'] +        view_count = data['view_count'] +        uploader = data['user']['username'] +        uploader_id = data['user']['id'] + +        stream_params = json.loads(data['stream_params']) + +        timestamp = float_or_none(stream_params['creationDate'], 1000) +        duration = float_or_none(stream_params['length'], 1000) + +        renditions = stream_params.get('renditions') or [] +        video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength') +        if video: +            renditions.append(video) + +        formats = [{ +            'url': fmt['url'], +            'width': fmt['frameWidth'], +            'height': fmt['frameHeight'], +            'vbr': float_or_none(fmt['encodingRate'], 1000), +            'filesize': fmt['size'], +            'vcodec': fmt['videoCodec'], +            'container': fmt['videoContainer'], +        } for fmt in renditions if fmt['url']] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +            'duration': duration, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'view_count': view_count, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index beb6cfc8a..01c02d360 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -1,6 +1,5 @@  from __future__ import unicode_literals -import re  import xml.etree.ElementTree  from .subtitles import SubtitlesInfoExtractor diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 31fdc0dcc..1bdc25812 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -10,15 +10,15 @@ from ..utils import url_basename  class BehindKinkIE(InfoExtractor):      _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'      _TEST = { -        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', -        'md5': '41ad01222b8442089a55528fec43ec01', +        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', +        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',          'info_dict': { -            'id': '36370', +            'id': '37127',              'ext': 'mp4', -            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', -            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', -            'upload_date': '20140814', -            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', +            'title': 'What are you passionate about – Marley Blaze', +            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', +            'upload_date': '20141205', +            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',              'age_limit': 18,          }      } @@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          display_id = mobj.group('id') -        year = mobj.group('year') -        month = mobj.group('month') -        day = mobj.group('day') -        upload_date = year + month + day          webpage = self._download_webpage(url, display_id)          video_url = self._search_regex( -            r"'file':\s*'([^']+)'", -            webpage, 'URL base') - -        video_id = url_basename(video_url) -        video_id = video_id.split('_')[0] +            r'<source src="([^"]+)"', webpage, 'video URL') +        video_id = url_basename(video_url).split('_')[0] +        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')          return {              'id': video_id, +            'display_id': display_id,              'url': video_url, -            'ext': 'mp4',              'title': self._og_search_title(webpage), -            'display_id': display_id,              'thumbnail': self._og_search_thumbnail(webpage),              'description': self._og_search_description(webpage),              'upload_date': upload_date, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py new file mode 100644 index 000000000..c1fc433f7 --- /dev/null +++ b/youtube_dl/extractor/bet.py @@ -0,0 +1,108 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +    xpath_text, +    xpath_with_ns, +    int_or_none, +    parse_iso8601, +) + + +class BetIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' +    _TESTS = [ +        { +            'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', +            'info_dict': { +                'id': '417cd61c-c793-4e8e-b006-e445ecc45add', +                'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', +                'ext': 'flv', +                'title': 'BET News Presents: A Conversation With President Obama', +                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', +                'duration': 1534, +                'timestamp': 1418075340, +                'upload_date': '20141208', +                'uploader': 'admin', +                'thumbnail': 're:(?i)^https?://.*\.jpg$', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, +        { +            'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', +            'info_dict': { +                'id': '4160e53b-ad41-43b1-980f-8d85f63121f4', +                'display_id': 'justice-for-ferguson-a-community-reacts', +                'ext': 'flv', +                'title': 'Justice for Ferguson: A Community Reacts', +                'description': 'A BET News special.', +                'duration': 1696, +                'timestamp': 1416942360, +                'upload_date': '20141125', +                'uploader': 'admin', +                'thumbnail': 're:(?i)^https?://.*\.jpg$', +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        } +    ] + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        media_url = compat_urllib_parse.unquote(self._search_regex( +            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], +            webpage, 'media URL')) + +        mrss = self._download_xml(media_url, display_id) + +        item = mrss.find('./channel/item') + +        NS_MAP = { +            'dc': 'http://purl.org/dc/elements/1.1/', +            'media': 'http://search.yahoo.com/mrss/', +            'ka': 'http://kickapps.com/karss', +        } + +        title = xpath_text(item, './title', 'title') +        description = xpath_text( +            item, './description', 'description', fatal=False) + +        video_id = xpath_text(item, './guid', 'video id', fatal=False) + +        timestamp = parse_iso8601(xpath_text( +            item, xpath_with_ns('./dc:date', NS_MAP), +            'upload date', fatal=False)) +        uploader = xpath_text( +            item, xpath_with_ns('./dc:creator', NS_MAP), +            'uploader', fatal=False) + +        media_content = item.find( +            xpath_with_ns('./media:content', NS_MAP)) +        duration = int_or_none(media_content.get('duration')) +        smil_url = media_content.get('url') + +        thumbnail = media_content.find( +            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') + +        formats = self._extract_smil_formats(smil_url, display_id) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +            'uploader': uploader, +            'duration': duration, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index da47f27bd..14b814120 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -4,13 +4,17 @@ import re  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( + +from ..compat import ( +    compat_str,      compat_urllib_request, -    unescapeHTML, -    parse_iso8601,      compat_urlparse, +) +from ..utils import (      clean_html, -    compat_str, +    int_or_none, +    parse_iso8601, +    unescapeHTML,  ) @@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):                  'uploader': 'NostalgiaCritic',                  'uploader_id': '246467',              } -        } +        }, +        { +            # https://github.com/rg3/youtube-dl/pull/4404 +            'note': 'Audio only', +            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982', +            'md5': '76c0a56f24e769ceaab21fbb6416a351', +            'info_dict': { +                'id': '7103299', +                'ext': 'flv', +                'title': 'Weekly Manga Recap: Kingdom', +                'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?', +                'timestamp': 1417660321, +                'upload_date': '20141204', +                'uploader': 'The Rollo T', +                'uploader_id': '407429', +                'duration': 7251, +                'vcodec': 'none', +            } +        },      ]      def _real_extract(self, url): @@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):                      'url': real_url,                      'format_id': role,                      'format_note': media_type, -                    'vcodec': media_content.get(blip('vcodec')), +                    'vcodec': media_content.get(blip('vcodec')) or 'none',                      'acodec': media_content.get(blip('acodec')),                      'filesize': media_content.get('filesize'), -                    'width': int(media_content.get('width')), -                    'height': int(media_content.get('height')), +                    'width': int_or_none(media_content.get('width')), +                    'height': int_or_none(media_content.get('height')),                  })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py new file mode 100644 index 000000000..0c9a24bef --- /dev/null +++ b/youtube_dl/extractor/cinchcast.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +    xpath_text, +) + + +class CinchcastIE(InfoExtractor): +    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' +    _TEST = { +        # Actual test is run in generic, look for undergroundwellness +        'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', +        'only_matching': True, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        doc = self._download_xml( +            'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, +            video_id) + +        item = doc.find('.//item') +        title = xpath_text(item, './title', fatal=True) +        date_str = xpath_text( +            item, './{http://developer.longtailvideo.com/trac/}date') +        upload_date = unified_strdate(date_str, day_first=False) +        # duration is present but wrong +        formats = [] +        formats.append({ +            'format_id': 'main', +            'url': item.find( +                './{http://search.yahoo.com/mrss/}content').attrib['url'], +        }) +        backup_url = xpath_text( +            item, './{http://developer.longtailvideo.com/trac/}backupContent') +        if backup_url: +            formats.append({ +                'preference': 2,  # seems to be more reliable +                'format_id': 'backup', +                'url': backup_url, +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'upload_date': upload_date, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7cbd846f6..d302fe45f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -118,6 +118,7 @@ class InfoExtractor(object):      The following fields are optional: +    alt_title:      A secondary title of the video.      display_id      An alternative identifier for the video, not necessarily                      unique, but available before title. Typically, id is                      something like "4234987", title "Dancing naked mole rats", @@ -129,7 +130,7 @@ class InfoExtractor(object):                          * "resolution" (optional, string "{width}x{height"},                                          deprecated)      thumbnail:      Full URL to a video thumbnail image. -    description:    One-line video description. +    description:    Full video description.      uploader:       Full name of the video uploader.      timestamp:      UNIX timestamp of the moment the video became available.      upload_date:    Video upload date (YYYYMMDD). @@ -158,8 +159,8 @@ class InfoExtractor(object):      _type "playlist" indicates multiple videos. -    There must be a key "entries", which is a list or a PagedList object, each -    element of which is a valid dictionary under this specfication. +    There must be a key "entries", which is a list, an iterable, or a PagedList +    object, each element of which is a valid dictionary by this specification.      Additionally, playlists can have "title" and "id" attributes with the same      semantics as videos (see above). @@ -174,9 +175,10 @@ class InfoExtractor(object):      _type "url" indicates that the video must be extracted from another      location, possibly by a different extractor. Its only required key is:      "url" - the next URL to extract. - -    Additionally, it may have properties believed to be identical to the -    resolved entity, for example "title" if the title of the referred video is +    The key "ie_key" can be set to the class name (minus the trailing "IE", +    e.g. "Youtube") if the extractor class is known in advance. +    Additionally, the dictionary may have any properties of the resolved entity +    known in advance, for example "title" if the title of the referred video is      known ahead of time. @@ -443,7 +445,7 @@ class InfoExtractor(object):          return video_info      @staticmethod -    def playlist_result(entries, playlist_id=None, playlist_title=None): +    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):          """Returns a playlist"""          video_info = {'_type': 'playlist',                        'entries': entries} @@ -451,6 +453,8 @@ class InfoExtractor(object):              video_info['id'] = playlist_id          if playlist_title:              video_info['title'] = playlist_title +        if playlist_description: +            video_info['description'] = playlist_description          return video_info      def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): @@ -794,6 +798,49 @@ class InfoExtractor(object):          self._sort_formats(formats)          return formats +    # TODO: improve extraction +    def _extract_smil_formats(self, smil_url, video_id): +        smil = self._download_xml( +            smil_url, video_id, 'Downloading SMIL file', +            'Unable to download SMIL file') + +        base = smil.find('./head/meta').get('base') + +        formats = [] +        rtmp_count = 0 +        for video in smil.findall('./body/switch/video'): +            src = video.get('src') +            if not src: +                continue +            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) +            width = int_or_none(video.get('width')) +            height = int_or_none(video.get('height')) +            proto = video.get('proto') +            if not proto: +                if base: +                    if base.startswith('rtmp'): +                        proto = 'rtmp' +                    elif base.startswith('http'): +                        proto = 'http' +            ext = video.get('ext') +            if proto == 'm3u8': +                formats.extend(self._extract_m3u8_formats(src, video_id, ext)) +            elif proto == 'rtmp': +                rtmp_count += 1 +                streamer = video.get('streamer') or base +                formats.append({ +                    'url': streamer, +                    'play_path': src, +                    'ext': 'flv', +                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), +                    'tbr': bitrate, +                    'width': width, +                    'height': height, +                }) +        self._sort_formats(formats) + +        return formats +      def _live_title(self, name):          """ Generate the title for a live video """          now = datetime.datetime.now() @@ -823,7 +870,8 @@ class InfoExtractor(object):          return res      def _set_cookie(self, domain, name, value, expire_time=None): -        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None, +        cookie = compat_cookiejar.Cookie( +            0, name, value, None, None, domain, None,              None, '/', True, False, expire_time, '', None, None, None)          self._downloader.cookiejar.set_cookie(cookie) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 2139f68aa..1ad4e77a8 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -13,9 +13,10 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( -    urlencode_postdata,      ExtractorError, +    int_or_none,      limit_length, +    urlencode_postdata,  ) @@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):          'info_dict': {              'id': '637842556329505',              'ext': 'mp4', -            'duration': 38,              'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',          }      }, { @@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):          self._login()      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          url = 'https://www.facebook.com/video/video.php?v=%s' % video_id          webpage = self._download_webpage(url, video_id) @@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):              'id': video_id,              'title': video_title,              'url': video_url, -            'duration': int(video_data['video_duration']), -            'thumbnail': video_data['thumbnail_src'], +            'duration': int_or_none(video_data.get('video_duration')), +            'thumbnail': video_data.get('thumbnail_src'),          } diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py new file mode 100644 index 000000000..08b8ea362 --- /dev/null +++ b/youtube_dl/extractor/foxgay.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class FoxgayIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' +    _TEST = { +        'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', +        'md5': '80d72beab5d04e1655a56ad37afe6841', +        'info_dict': { +            'id': '2582', +            'ext': 'mp4', +            'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', +            'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', +            'age_limit': 18, +            'thumbnail': 're:https?://.*\.jpg$', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex( +            r'<title>(?P<title>.*?)</title>', +            webpage, 'title', fatal=False) +        description = self._html_search_regex( +            r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>', +            webpage, 'description', fatal=False) + +        # Find the URL for the iFrame which contains the actual video. +        iframe = self._download_webpage( +            self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'), +            video_id) +        video_url = self._html_search_regex( +            r"v_path = '(?P<vid>http://.*?)'", iframe, 'url') +        thumb_url = self._html_search_regex( +            r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +            'description': description, +            'thumbnail': thumb_url, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py new file mode 100644 index 000000000..917f76b1e --- /dev/null +++ b/youtube_dl/extractor/foxnews.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    parse_iso8601, +    int_or_none, +) + + +class FoxNewsIE(InfoExtractor): +    _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' +    _TESTS = [ +        { +            'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', +            'md5': '32aaded6ba3ef0d1c04e238d01031e5e', +            'info_dict': { +                'id': '3937480', +                'ext': 'flv', +                'title': 'Frozen in Time', +                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', +                'duration': 265, +                'timestamp': 1304411491, +                'upload_date': '20110503', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips', +            'md5': '5846c64a1ea05ec78175421b8323e2df', +            'info_dict': { +                'id': '3922535568001', +                'ext': 'mp4', +                'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", +                'description': "Congressman discusses the president's executive action", +                'duration': 292, +                'timestamp': 1417662047, +                'upload_date': '20141204', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        video = self._download_json( +            'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id) + +        item = video['channel']['item'] +        title = item['title'] +        description = item['description'] +        timestamp = parse_iso8601(item['dc-date']) + +        media_group = item['media-group'] +        duration = None +        formats = [] +        for media in media_group['media-content']: +            attributes = media['@attributes'] +            video_url = attributes['url'] +            if video_url.endswith('.f4m'): +                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) +            elif video_url.endswith('.m3u8'): +                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) +            elif not video_url.endswith('.smil'): +                duration = int_or_none(attributes.get('duration')) +                formats.append({ +                    'url': video_url, +                    'format_id': media['media-category']['@attributes']['label'], +                    'preference': 1, +                    'vbr': int_or_none(attributes.get('bitrate')), +                    'filesize': int_or_none(attributes.get('fileSize')) +                }) +        self._sort_formats(formats) + +        media_thumbnail = media_group['media-thumbnail']['@attributes'] +        thumbnails = [{ +            'url': media_thumbnail['url'], +            'width': int_or_none(media_thumbnail.get('width')), +            'height': int_or_none(media_thumbnail.get('height')), +        }] if media_thumbnail else [] + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'timestamp': timestamp, +            'formats': formats, +            'thumbnails': thumbnails, +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 328301de3..2b4d8c62f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -467,8 +467,17 @@ class GenericIE(InfoExtractor):              'expected_warnings': [                  'URL could be a direct video link, returning it as such.'              ] -        } - +        }, +        # Cinchcast embed +        { +            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', +            'info_dict': { +                'id': '7141703', +                'ext': 'mp3', +                'upload_date': '20141126', +                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', +            } +        },      ]      def report_following_redirect(self, new_url): @@ -962,6 +971,13 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'SBS') +        # Look for embedded Cinchcast player +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', +            webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'Cinchcast') +          mobj = re.search(              r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',              webpage) diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py new file mode 100644 index 000000000..14934b7ec --- /dev/null +++ b/youtube_dl/extractor/minhateca.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( +    compat_urllib_parse, +    compat_urllib_request, +) +from ..utils import ( +    int_or_none, +    parse_duration, +    parse_filesize, +) + + +class MinhatecaIE(InfoExtractor): +    _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.' +    _TEST = { +        'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)', +        'info_dict': { +            'id': '125848331', +            'ext': 'mp4', +            'title': 'youtube-dl test video', +            'thumbnail': 're:^https?://.*\.jpg$', +            'filesize_approx': 1530000, +            'duration': 9, +            'view_count': int, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        token = self._html_search_regex( +            r'<input name="__RequestVerificationToken".*?value="([^"]+)"', +            webpage, 'request token') +        token_data = [ +            ('fileId', video_id), +            ('__RequestVerificationToken', token), +        ] +        req = compat_urllib_request.Request( +            'http://minhateca.com.br/action/License/Download', +            data=compat_urllib_parse.urlencode(token_data)) +        req.add_header('Content-Type', 'application/x-www-form-urlencoded') +        data = self._download_json( +            req, video_id, note='Downloading metadata') + +        video_url = data['redirectUrl'] +        title_str = self._html_search_regex( +            r'<h1.*?>(.*?)</h1>', webpage, 'title') +        title, _, ext = title_str.rpartition('.') +        filesize_approx = parse_filesize(self._html_search_regex( +            r'<p class="fileSize">(.*?)</p>', +            webpage, 'file size approximation', fatal=False)) +        duration = parse_duration(self._html_search_regex( +            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<', +            webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._html_search_regex( +            r'<p class="downloadsCounter">([0-9]+)</p>', +            webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'ext': ext, +            'filesize_approx': filesize_approx, +            'duration': duration, +            'view_count': view_count, +            'thumbnail': self._og_search_thumbnail(webpage), +        } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bb8937c4d..55cc33a3e 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):              raise ExtractorError('Unable to extract track url')          PREFIX = ( -            r'<div class="cloudcast-play-button-container[^"]*?"' +            r'<span class="play-button[^"]*?"'              r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')          title = self._html_search_regex(              PREFIX + r'm-title="([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index e62614670..83414a232 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -88,6 +88,7 @@ class MySpaceIE(InfoExtractor):                  self.report_warning(                      '%s: No downloadable song on this page' % video_id)                  return +              def search_data(name):                  return self._search_regex(                      r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py new file mode 100644 index 000000000..a94ab8358 --- /dev/null +++ b/youtube_dl/extractor/myvidster.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MyVidsterIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' + +    _TEST = { +        'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', +        'md5': '95296d0231c1363222c3441af62dc4ca', +        'info_dict': { +            'id': '3685814', +            'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', +            'upload_date': '20141027', +            'uploader_id': 'utkualp', +            'ext': 'mp4', +            'age_limit': 18, +        }, +        'add_ie': ['XHamster'], +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        return self.url_result(self._html_search_regex( +            r'rel="videolink" href="(?P<real_url>.*)">', +            webpage, 'real video url')) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index f69fe0925..862b706bf 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -1,7 +1,5 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      remove_end, @@ -10,8 +8,8 @@ from ..utils import (  class NBAIE(InfoExtractor): -    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$' -    _TEST = { +    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$' +    _TESTS = [{          'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',          'md5': 'c0edcfc37607344e2ff8f13c378c88a4',          'info_dict': { @@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):              'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',              'duration': 181,          }, -    } +    }, { +        'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', +        'only_matching': True, +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' @@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):          description = self._og_search_description(webpage)          duration = parse_duration( -            self._html_search_meta('duration', webpage, 'duration', fatal=False)) +            self._html_search_meta('duration', webpage, 'duration'))          return {              'id': shortened_video_id, diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 0244368e9..b2f40344f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals  import re  import json +import os  from .common import InfoExtractor  from ..compat import ( @@ -26,7 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):          initial_video_url = info['publishPoint']          if info['formats'] == '1':              parsed_url = compat_urllib_parse_urlparse(initial_video_url) -            path = parsed_url.path.replace('.', '_sd.', 1) +            filename, ext = os.path.splitext(parsed_url.path) +            path = '%s_sd%s' % (filename, ext)              data = compat_urllib_parse.urlencode({                  'type': 'fvod',                  'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index 13c8d79cd..ee740cd9c 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):                  'rtmp_conn': 'B:1',                  'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',                  'page_url': 'http://www.ntv.ru', -                'flash_ver': 'LNX 11,2,202,341', +                'flash_version': 'LNX 11,2,202,341',                  'rtmp_live': True,                  'ext': 'flv',                  'filesize': int(size.text), diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index bac484c67..954dfccb7 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -8,7 +8,6 @@ from ..utils import (      int_or_none,      js_to_json,      qualities, -    determine_ext,  ) @@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):          thumbnail = self._search_regex(              r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) -        quality = qualities(['SD', 'HD']) -        formats = [{ -            'url': source['file'], -            'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])), -            'quality': quality(source['label']), -        } for source in json.loads(js_to_json(self._search_regex( -            r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))] +        quality = qualities(['sd', 'hd']) +        sources = json.loads(js_to_json(self._search_regex( +            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) +        formats = [] +        for container, s in sources.items(): +            for qname, video_url in s.items(): +                formats.append({ +                    'url': video_url, +                    'container': container, +                    'format_id': '%s-%s' % (container, qname), +                    'quality': quality(qname), +                })          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 32d747ede..1262793c8 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Im Interview: Kai Wiesinger',                  'description': 'md5:e4e5370652ec63b95023e914190b4eb9', -                'upload_date': '20140225', +                'upload_date': '20140203',                  'duration': 522.56,              },              'params': { @@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',                  'description': 'md5:2669cde3febe9bce13904f701e774eb6', -                'upload_date': '20140225', +                'upload_date': '20141014',                  'duration': 2410.44,              },              'params': { @@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):                  'skip_download': True,              },          }, +        { +            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', +            'info_dict': { +                'id': '439664', +                'title': 'Episode 8 - Ganze Folge - Playlist', +                'description': 'md5:63b8963e71f481782aeea877658dec84', +            }, +            'playlist_count': 2, +        },      ]      _CLIPID_REGEXES = [          r'"clip_id"\s*:\s+"(\d+)"',          r'clipid: "(\d+)"',          r'clip[iI]d=(\d+)', +        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",      ]      _TITLE_REGEXES = [          r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', @@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):          r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',          r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',      ] +    _PAGE_TYPE_REGEXES = [ +        r'<meta name="page_type" content="([^"]+)">', +        r"'itemType'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_ID_REGEXES = [ +        r'content[iI]d=(\d+)', +        r"'itemId'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_CLIP_REGEXES = [ +        r'(?s)data-qvt=.+?<a href="([^"]+)"', +    ] -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - +    def _extract_clip(self, url, webpage):          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')          access_token = 'testclient' @@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):              'duration': duration,              'formats': formats,          } + +    def _extract_playlist(self, url, webpage): +        playlist_id = self._html_search_regex( +            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') +        for regex in self._PLAYLIST_CLIP_REGEXES: +            playlist_clips = re.findall(regex, webpage) +            if playlist_clips: +                title = self._html_search_regex( +                    self._TITLE_REGEXES, webpage, 'title') +                description = self._html_search_regex( +                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) +                entries = [ +                    self.url_result( +                        re.match('(.+?//.+?)/', url).group(1) + clip_path, +                        'ProSiebenSat1') +                    for clip_path in playlist_clips] +                return self.playlist_result(entries, playlist_id, title, description) + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        page_type = self._search_regex( +            self._PAGE_TYPE_REGEXES, webpage, +            'page type', default='clip').lower() +        if page_type == 'clip': +            return self._extract_clip(url, webpage) +        elif page_type == 'playlist': +            return self._extract_playlist(url, webpage) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py new file mode 100644 index 000000000..f95bc9454 --- /dev/null +++ b/youtube_dl/extractor/radiode.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): +    IE_NAME = 'radio.de' +    _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' +    _TEST = { +        'url': 'http://ndr2.radio.de/', +        'md5': '3b4cdd011bc59174596b6145cda474a4', +        'info_dict': { +            'id': 'ndr2', +            'ext': 'mp3', +            'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +            'description': 'md5:591c49c702db1a33751625ebfb67f273', +            'thumbnail': 're:^https?://.*\.png', +        }, +        'params': { +            'skip_download': True, +        } +    } + +    def _real_extract(self, url): +        radio_id = self._match_id(url) + +        webpage = self._download_webpage(url, radio_id) + +        broadcast = json.loads(self._search_regex( +            r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', +            webpage, 'broadcast')) + +        title = self._live_title(broadcast['name']) +        description = broadcast.get('description') or broadcast.get('shortDescription') +        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') + +        formats = [{ +            'url': stream['streamUrl'], +            'ext': stream['streamContentFormat'].lower(), +            'acodec': stream['streamContentFormat'], +            'abr': stream['bitRate'], +            'asr': stream['sampleRate'] +        } for stream in broadcast['streamUrls']] +        self._sort_formats(formats) + +        return { +            'id': radio_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'is_live': True, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/screenwavemedia.py index b7fa73c3b..6c9fdb7c1 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -5,61 +5,27 @@ import re  from .common import InfoExtractor  from ..utils import ( -    ExtractorError,      int_or_none, +    unified_strdate,  ) -class CinemassacreIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' -    _TESTS = [ -        { -            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', -            'md5': 'fde81fbafaee331785f58cd6c0d46190', -            'info_dict': { -                'id': '19911', -                'ext': 'mp4', -                'upload_date': '20121110', -                'title': '“Angry Video Game Nerd: The Movie” – Trailer', -                'description': 'md5:fb87405fcb42a331742a0dce2708560b', -            }, -        }, -        { -            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', -            'md5': 'd72f10cd39eac4215048f62ab477a511', -            'info_dict': { -                'id': '521be8ef82b16', -                'ext': 'mp4', -                'upload_date': '20131002', -                'title': 'The Mummy’s Hand (1940)', -            }, -        } -    ] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('display_id') - -        webpage = self._download_webpage(url, display_id) -        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') -        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) -        if not mobj: -            raise ExtractorError('Can\'t extract embed url and video id') -        playerdata_url = mobj.group('embed_url') -        video_id = mobj.group('video_id') -        full_video_id = mobj.group('full_video_id') +class ScreenwaveMediaIE(InfoExtractor): +    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)' -        video_title = self._html_search_regex( -            r'<title>(?P<title>.+?)\|', webpage, 'title') -        video_description = self._html_search_regex( -            r'<div class="entry-content">(?P<description>.+?)</div>', -            webpage, 'description', flags=re.DOTALL, fatal=False) -        video_thumbnail = self._og_search_thumbnail(webpage) +    _TESTS = [{ +        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', +        'only_matching': True, +    }] -        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') +    def _real_extract(self, url): +        video_id = self._match_id(url) +        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') +        vidtitle = self._search_regex( +            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')          vidurl = self._search_regex( -            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') +            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')          videolist_url = None @@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):          if mobj:              videoserver = mobj.group('videoserver')              mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) -            vidid = mobj.group('vidid') if mobj else full_video_id +            vidid = mobj.group('vidid') if mobj else video_id              videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)          else:              mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) @@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):                  file_ = src.partition(':')[-1]                  width = int_or_none(video.get('width'))                  height = int_or_none(video.get('height')) -                bitrate = int_or_none(video.get('system-bitrate')) +                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)                  format = {                      'url': baseurl + file_,                      'format_id': src.rpartition('.')[0].rpartition('_')[-1],                  }                  if width or height:                      format.update({ -                        'tbr': bitrate // 1000 if bitrate else None, +                        'tbr': bitrate,                          'width': width,                          'height': height,                      })                  else:                      format.update({ -                        'abr': bitrate // 1000 if bitrate else None, +                        'abr': bitrate,                          'vcodec': 'none',                      })                  formats.append(format) -            self._sort_formats(formats)          else:              formats = [{                  'url': vidurl,              }] +        self._sort_formats(formats)          return {              'id': video_id, -            'title': video_title, +            'title': vidtitle,              'formats': formats, +        } + + +class CinemassacreIE(InfoExtractor): +    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' +    _TESTS = [ +        { +            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', +            'md5': 'fde81fbafaee331785f58cd6c0d46190', +            'info_dict': { +                'id': 'Cinemassacre-19911', +                'ext': 'mp4', +                'upload_date': '20121110', +                'title': '“Angry Video Game Nerd: The Movie” – Trailer', +                'description': 'md5:fb87405fcb42a331742a0dce2708560b', +            }, +        }, +        { +            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', +            'md5': 'd72f10cd39eac4215048f62ab477a511', +            'info_dict': { +                'id': 'Cinemassacre-521be8ef82b16', +                'ext': 'mp4', +                'upload_date': '20131002', +                'title': 'The Mummy’s Hand (1940)', +            }, +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        display_id = mobj.group('display_id') +        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') + +        webpage = self._download_webpage(url, display_id) + +        playerdata_url = self._search_regex( +            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', +            webpage, 'player data URL') +        video_title = self._html_search_regex( +            r'<title>(?P<title>.+?)\|', webpage, 'title') +        video_description = self._html_search_regex( +            r'<div class="entry-content">(?P<description>.+?)</div>', +            webpage, 'description', flags=re.DOTALL, fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage) + +        return { +            '_type': 'url_transparent', +            'display_id': display_id, +            'title': video_title, +            'description': video_description, +            'upload_date': video_date, +            'thumbnail': video_thumbnail, +            'url': playerdata_url, +        } + + +class TeamFourIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' +    _TEST = { +        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', +        'info_dict': { +            'id': 'TeamFourStar-5292a02f20bfa', +            'ext': 'mp4', +            'upload_date': '20130401', +            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', +            'title': 'A Moment With TFS Episode 4', +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) + +        playerdata_url = self._search_regex( +            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', +            webpage, 'player data URL') + +        video_title = self._html_search_regex( +            r'<div class="heroheadingtitle">(?P<title>.+?)</div>', +            webpage, 'title') +        video_date = unified_strdate(self._html_search_regex( +            r'<div class="heroheadingdate">(?P<date>.+?)</div>', +            webpage, 'date', fatal=False)) +        video_description = self._html_search_regex( +            r'(?s)<div class="postcontent">(?P<description>.+?)</div>', +            webpage, 'description', fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage) + +        return { +            '_type': 'url_transparent', +            'display_id': display_id, +            'title': video_title,              'description': video_description,              'upload_date': video_date,              'thumbnail': video_thumbnail, +            'url': playerdata_url,          } diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 0751efc61..646af3cc9 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):          broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')          if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: -            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) +            raise ExtractorError( +                'Broadcast %s does not exist' % broadcast_id, expected=True)          # Adult content          if re.search('EroConfirmText">', broadcast_page) is not None:              (username, password) = self._get_login_info()              if username is None: -                raise ExtractorError('Erotic broadcasts allowed only for registered users, ' -                                     'use --username and --password options to provide account credentials.', expected=True) +                raise ExtractorError( +                    'Erotic broadcasts allowed only for registered users, ' +                    'use --username and --password options to provide account credentials.', +                    expected=True)              login_form = {                  'login-hint53': '1', @@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):                  'password': password,              } -            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) +            request = compat_urllib_request.Request( +                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))              request.add_header('Content-Type', 'application/x-www-form-urlencoded') -            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') +            broadcast_page = self._download_webpage( +                request, broadcast_id, 'Logging in and confirming age')              if re.search('>Неверный логин или пароль<', broadcast_page) is not None:                  raise ExtractorError('Unable to log in: bad username or password', expected=True) @@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):              adult_content = False          ticket = self._html_search_regex( -            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', +            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",              broadcast_page, 'broadcast ticket')          url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):          if broadcast_password:              url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() -        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') +        broadcast_json_page = self._download_webpage( +            url, broadcast_id, 'Downloading broadcast JSON')          try:              broadcast_json = json.loads(broadcast_json_page)              protected_broadcast = broadcast_json['_pass_protected'] == 1              if protected_broadcast and not broadcast_password: -                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) +                raise ExtractorError( +                    'This broadcast is protected by a password, use the --video-password option', +                    expected=True)              broadcast_offline = broadcast_json['is_play'] == 0              if broadcast_offline:                  raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)              rtmp_url = broadcast_json['_server'] -            if not rtmp_url.startswith('rtmp://'): +            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) +            if not mobj:                  raise ExtractorError('Unexpected broadcast rtmp URL')              broadcast_playpath = broadcast_json['_streamName'] +            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])              broadcast_thumbnail = broadcast_json['_imgURL'] -            broadcast_title = broadcast_json['title'] +            broadcast_title = self._live_title(broadcast_json['title'])              broadcast_description = broadcast_json['description']              broadcaster_nick = broadcast_json['nick']              broadcaster_login = broadcast_json['login'] @@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):              'age_limit': 18 if adult_content else 0,              'ext': 'flv',              'play_path': broadcast_playpath, +            'player_url': 'http://pics.smotri.com/broadcast_play.swf', +            'app': broadcast_app,              'rtmp_live': True, -            'rtmp_conn': rtmp_conn +            'rtmp_conn': rtmp_conn, +            'is_live': True,          } diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index b87047451..bfe07b024 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -4,10 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..utils import parse_filesize  class TagesschauIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'      _TESTS = [{          'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', @@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):              'description': 'md5:69da3c61275b426426d711bde96463ab',              'thumbnail': 're:^http:.*\.jpg$',          }, +    }, { +        'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', +        'md5': '3c54c1f6243d279b706bde660ceec633', +        'info_dict': { +            'id': '5727', +            'ext': 'mp4', +            'description': 'md5:695c01bfd98b7e313c501386327aea59', +            'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', +            'thumbnail': 're:^http:.*\.jpg$', +        }      }]      _FORMATS = { @@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        if video_id.startswith('-'): -            display_id = video_id.strip('-') -        else: -            display_id = video_id - +        video_id = self._match_id(url) +        display_id = video_id.lstrip('-')          webpage = self._download_webpage(url, display_id) -        playerpage = self._download_webpage( -            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, -            display_id, 'Downloading player page') - -        medias = re.findall( -            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', -            playerpage) +        player_url = self._html_search_meta( +            'twitter:player', webpage, 'player URL', default=None) +        if player_url: +            playerpage = self._download_webpage( +                player_url, display_id, 'Downloading player page') -        formats = [] -        for url, ext, res in medias: -            f = { -                'format_id': res + '_' + ext, -                'url': url, -                'ext': ext, -            } -            f.update(self._FORMATS.get(res, {})) -            formats.append(f) +            medias = re.findall( +                r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', +                playerpage) +            formats = [] +            for url, ext, res in medias: +                f = { +                    'format_id': res + '_' + ext, +                    'url': url, +                    'ext': ext, +                } +                f.update(self._FORMATS.get(res, {})) +                formats.append(f) +            thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] +            title = self._og_search_title(webpage).strip() +            description = self._og_search_description(webpage).strip() +        else: +            download_text = self._search_regex( +                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>', +                webpage, 'download links') +            links = re.finditer( +                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', +                download_text) +            formats = [] +            for l in links: +                format_id = self._search_regex( +                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID') +                format = { +                    'format_id': format_id, +                    'url': l.group('url'), +                    'format_name': l.group('name'), +                } +                m = re.match( +                    r'''(?x) +                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; +                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; +                        (?P<vbr>[0-9]+)kbps&\#10; +                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; +                        Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', +                    l.group('title')) +                if m: +                    format.update({ +                        'format_note': m.group('audio_desc'), +                        'vcodec': m.group('vcodec'), +                        'width': int(m.group('width')), +                        'height': int(m.group('height')), +                        'abr': int(m.group('abr')), +                        'vbr': int(m.group('vbr')), +                        'filesize_approx': parse_filesize(m.group('filesize_approx')), +                    }) +                formats.append(format) +            thumbnail_fn = self._search_regex( +                r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"', +                webpage, 'thumbnail', fatal=False) +            description = self._html_search_regex( +                r'(?s)<p class="teasertext">(.*?)</p>', +                webpage, 'description', fatal=False) +            title = self._html_search_regex( +                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')          self._sort_formats(formats) - -        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] +        thumbnail = 'http://www.tagesschau.de' + thumbnail_fn          return {              'id': display_id, -            'title': self._og_search_title(webpage).strip(), -            'thumbnail': 'http://www.tagesschau.de' + thumbnail, +            'title': title, +            'thumbnail': thumbnail,              'formats': formats, -            'description': self._og_search_description(webpage).strip(), +            'description': description,          } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index d81d1d1a6..ba65996dc 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,32 +1,30 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      float_or_none, -    str_to_int, +    parse_age_limit,  )  class TvigleIE(InfoExtractor):      IE_NAME = 'tvigle'      IE_DESC = 'Интернет-телевидение Tvigle.ru' -    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' +    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'      _TESTS = [          { -            'url': 'http://www.tvigle.ru/video/brat/', -            'md5': 'ff4344a4894b0524441fb6f8218dc716', +            'url': 'http://www.tvigle.ru/video/sokrat/', +            'md5': '36514aed3657d4f70b4b2cef8eb520cd',              'info_dict': { -                'id': '5118490', -                'display_id': 'brat', -                'ext': 'mp4', -                'title': 'Брат', -                'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb', -                'duration': 5722.6, -                'age_limit': 16, +                'id': '1848932', +                'display_id': 'sokrat', +                'ext': 'flv', +                'title': 'Сократ', +                'description': 'md5:a05bd01be310074d5833efc6743be95e', +                'duration': 6586, +                'age_limit': 0,              },          },          { @@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        display_id = mobj.group('display_id') +        display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) @@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):          title = item['title']          description = item['description']          thumbnail = item['thumbnail'] -        duration = float_or_none(item['durationMilliseconds'], 1000) -        age_limit = str_to_int(item['ageRestrictions']) +        duration = float_or_none(item.get('durationMilliseconds'), 1000) +        age_limit = parse_age_limit(item.get('ageRestrictions'))          formats = []          for vcodec, fmts in item['videos'].items(): diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index eb9473754..9a53a3c74 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,7 +6,6 @@ import re  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( -    ExtractorError,      parse_iso8601,      qualities,  ) @@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):              'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')          if video['is_geo_blocked']: -            raise ExtractorError( -                'This content is not available in your country due to copyright reasons', expected=True) +            self.report_warning( +                'This content might not be available in your country due to copyright reasons')          streams = self._download_json(              'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36aa1ad6e..397d167e8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -1,3 +1,4 @@ +# coding: utf-8  from __future__ import unicode_literals  import itertools @@ -5,6 +6,8 @@ import re  from .common import InfoExtractor  from ..utils import ( +    compat_urllib_parse, +    compat_urllib_request,      ExtractorError,      parse_iso8601,  ) @@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):          """      _PAGE_LIMIT = 100      _API_BASE = 'https://api.twitch.tv' +    _LOGIN_URL = 'https://secure.twitch.tv/user/login'      _TESTS = [{          'url': 'http://www.twitch.tv/riotgames/b/577357806',          'info_dict': { @@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):              'view_count': info['views'],          } +    def _real_initialize(self): +        self._login() + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        login_page = self._download_webpage( +            self._LOGIN_URL, None, 'Downloading login page') + +        authenticity_token = self._search_regex( +            r'<input name="authenticity_token" type="hidden" value="([^"]+)"', +            login_page, 'authenticity token') + +        login_form = { +            'utf8': '✓'.encode('utf-8'), +            'authenticity_token': authenticity_token, +            'redirect_on_login': '', +            'embed_form': 'false', +            'mp_source_action': '', +            'follow': '', +            'user[login]': username, +            'user[password]': password, +        } + +        request = compat_urllib_request.Request( +            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) +        request.add_header('Referer', self._LOGIN_URL) +        response = self._download_webpage( +            request, None, 'Logging in as %s' % username) + +        m = re.search( +            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) +        if m: +            raise ExtractorError( +                'Unable to login: %s' % m.group('msg').strip(), expected=True) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          if mobj.group('chapterid'): diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 0e4d386a8..5271611ac 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -97,11 +97,8 @@ class UdemyIE(InfoExtractor):          if 'returnUrl' not in response:              raise ExtractorError('Unable to log in') - -      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        lecture_id = mobj.group('id') +        lecture_id = self._match_id(url)          lecture = self._download_json(              'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 42995226e..0b58fe0fe 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -17,6 +17,7 @@ class VineIE(InfoExtractor):              'id': 'b9KOOWX7HUx',              'ext': 'mp4',              'title': 'Chicken.', +            'alt_title': 'Vine by Jack Dorsey',              'description': 'Chicken.',              'upload_date': '20130519',              'uploader': 'Jack Dorsey', @@ -25,30 +26,26 @@ class VineIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)          data = json.loads(self._html_search_regex(              r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) -        formats = [ -            { -                'url': data['videoLowURL'], -                'ext': 'mp4', -                'format_id': 'low', -            }, -            { -                'url': data['videoUrl'], -                'ext': 'mp4', -                'format_id': 'standard', -            } -        ] +        formats = [{ +            'url': data['videoLowURL'], +            'ext': 'mp4', +            'format_id': 'low', +        }, { +            'url': data['videoUrl'], +            'ext': 'mp4', +            'format_id': 'standard', +        }]          return {              'id': video_id,              'title': self._og_search_title(webpage), +            'alt_title': self._og_search_description(webpage),              'description': data['description'],              'thumbnail': data['thumbnailUrl'],              'upload_date': unified_strdate(data['created']), @@ -63,29 +60,36 @@ class VineIE(InfoExtractor):  class VineUserIE(InfoExtractor):      IE_NAME = 'vine:user' -    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' +    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'      _VINE_BASE_URL = "https://vine.co/" -    _TEST = { -        'url': 'https://vine.co/Visa', -        'info_dict': { -            'id': 'Visa', +    _TESTS = [ +        { +            'url': 'https://vine.co/Visa', +            'info_dict': { +                'id': 'Visa', +            }, +            'playlist_mincount': 46,          }, -        'playlist_mincount': 46, -    } +        { +            'url': 'https://vine.co/u/941705360593584128', +            'only_matching': True, +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          user = mobj.group('user') +        u = mobj.group('u') -        profile_url = "%sapi/users/profiles/vanity/%s" % ( -            self._VINE_BASE_URL, user) +        profile_url = "%sapi/users/profiles/%s%s" % ( +            self._VINE_BASE_URL, 'vanity/' if not u else '', user)          profile_data = self._download_json(              profile_url, user, note='Downloading user profile data')          user_id = profile_data['data']['userId']          timeline_data = []          for pagenum in itertools.count(1): -            timeline_url = "%sapi/timelines/users/%s?page=%s" % ( +            timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (                  self._VINE_BASE_URL, user_id, pagenum)              timeline_page = self._download_json(                  timeline_url, user, note='Downloading page %d' % pagenum) diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index f7e2e8ac9..8c6241aed 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -1,6 +1,8 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..compat import (      compat_chr, @@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor):              'tbr': 320,              'filesize_approx': 5900000,              'view_count': int, +            'description': 'md5:03238c5b663810bc79cf42ef3c03e371',          }      } @@ -48,6 +51,11 @@ class XMinusIE(InfoExtractor):          view_count = int_or_none(self._html_search_regex(              r'<div class="quality.*?► ([0-9]+)',              webpage, 'view count', fatal=False)) +        description = self._html_search_regex( +            r'(?s)<div id="song_texts">(.*?)</div><br', +            webpage, 'song lyrics', fatal=False) +        if description: +            description = re.sub(' *\r *', '\n', description)          enc_token = self._html_search_regex(              r'minus_track\.tkn="(.+?)"', webpage, 'enc_token') @@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor):              'filesize_approx': filesize_approx,              'tbr': tbr,              'view_count': view_count, +            'description': description,          } diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 8123928be..d9c06a2ee 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -45,7 +45,9 @@ class YouPornIE(InfoExtractor):          age_limit = self._rta_search(webpage)          # Get JSON parameters -        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') +        json_params = self._search_regex( +            r'var currentVideo = new Video\((.*)\)[,;]', +            webpage, 'JSON parameters')          try:              params = json.loads(json_params)          except: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1cba40387..7b6179a2a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor  from .subtitles import SubtitlesInfoExtractor  from ..jsinterp import JSInterpreter  from ..swfinterp import SWFInterpreter -from ..utils import ( +from ..compat import (      compat_chr,      compat_parse_qs,      compat_urllib_parse,      compat_urllib_request,      compat_urlparse,      compat_str, - +) +from ..utils import (      clean_html, -    get_element_by_id, -    get_element_by_attribute,      ExtractorError, +    get_element_by_attribute, +    get_element_by_id,      int_or_none,      OnDemandPagedList, +    orderedSet,      unescapeHTML,      unified_strdate, -    orderedSet,      uppercase_escape,  ) @@ -44,9 +45,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      _LOGIN_REQUIRED = False      def _set_language(self): -        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en', +        self._set_cookie( +            '.youtube.com', 'PREF', 'f1=50000000&hl=en',              # YouTube sets the expire time to about two months -            expire_time=time.time() + 60*24*3600) +            expire_time=time.time() + 2 * 30 * 24 * 3600)      def _login(self):          """ @@ -416,6 +418,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'upload_date': '20140605',              },          }, +        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) +        { +            'url': '__2ABJjxzNo', +            'info_dict': { +                'id': '__2ABJjxzNo', +                'ext': 'mp4', +                'upload_date': '20100430', +                'uploader_id': 'deadmau5', +                'description': 'md5:12c56784b8032162bb936a5f76d55360', +                'uploader': 'deadmau5', +                'title': 'Deadmau5 - Some Chords (HD)', +            }, +            'expected_warnings': [ +                'DASH manifest missing', +            ] +        }, +        # Olympics (https://github.com/rg3/youtube-dl/issues/4431) +        { +            'url': 'lqQg6PlCWgI', +            'info_dict': { +                'id': 'lqQg6PlCWgI', +                'ext': 'mp4', +                'upload_date': '20120731', +                'uploader_id': 'olympic', +                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', +                'uploader': 'Olympics', +                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', +            }, +            'params': { +                'skip_download': 'requires avconv', +            } +        },      ]      def __init__(self, *args, **kwargs): @@ -665,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id          return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') +    def _parse_dash_manifest( +            self, video_id, dash_manifest_url, player_url, age_gate): +        def decrypt_sig(mobj): +            s = mobj.group(1) +            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) +            return '/signature/%s' % dec_s +        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) +        dash_doc = self._download_xml( +            dash_manifest_url, video_id, +            note='Downloading DASH manifest', +            errnote='Could not download DASH manifest') + +        formats = [] +        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): +            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') +            if url_el is None: +                continue +            format_id = r.attrib['id'] +            video_url = url_el.text +            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) +            f = { +                'format_id': format_id, +                'url': video_url, +                'width': int_or_none(r.attrib.get('width')), +                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), +                'asr': int_or_none(r.attrib.get('audioSamplingRate')), +                'filesize': filesize, +                'fps': int_or_none(r.attrib.get('frameRate')), +            } +            try: +                existing_format = next( +                    fo for fo in formats +                    if fo['format_id'] == format_id) +            except StopIteration: +                f.update(self._formats.get(format_id, {})) +                formats.append(f) +            else: +                existing_format.update(f) +        return formats +      def _real_extract(self, url):          proto = (              'http' if self._downloader.params.get('prefer_insecure', False) @@ -722,9 +796,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  # We fallback to the get_video_info pages (used by the embed page)                  self.report_video_info_webpage_download(video_id)                  for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: -                    video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' -                        % (video_id, el_type)) -                    video_info_webpage = self._download_webpage(video_info_url, +                    video_info_url = ( +                        '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' +                        % (proto, video_id, el_type)) +                    video_info_webpage = self._download_webpage( +                        video_info_url,                          video_id, note=False,                          errnote='unable to download video info webpage')                      video_info = compat_parse_qs(video_info_webpage) @@ -797,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          m_cat_container = self._search_regex(              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', -            video_webpage, 'categories', fatal=False) +            video_webpage, 'categories', default=None)          if m_cat_container:              category = self._html_search_regex(                  r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', @@ -875,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'url': video_info['conn'][0],                  'player_url': player_url,              }] -        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: +        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:              encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]              if 'rtmpe%3Dyes' in encoded_url_map:                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) @@ -940,51 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # Look for the DASH manifest          if self._downloader.params.get('youtube_include_dash_manifest', True): -            try: -                # The DASH manifest used needs to be the one from the original video_webpage. -                # The one found in get_video_info seems to be using different signatures. -                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. -                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the -                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. -                dash_manifest_url = video_info.get('dashmpd')[0] - -                def decrypt_sig(mobj): -                    s = mobj.group(1) -                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) -                    return '/signature/%s' % dec_s -                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) -                dash_doc = self._download_xml( -                    dash_manifest_url, video_id, -                    note='Downloading DASH manifest', -                    errnote='Could not download DASH manifest') -                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): -                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') -                    if url_el is None: -                        continue -                    format_id = r.attrib['id'] -                    video_url = url_el.text -                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) -                    f = { -                        'format_id': format_id, -                        'url': video_url, -                        'width': int_or_none(r.attrib.get('width')), -                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), -                        'asr': int_or_none(r.attrib.get('audioSamplingRate')), -                        'filesize': filesize, -                        'fps': int_or_none(r.attrib.get('frameRate')), -                    } -                    try: -                        existing_format = next( -                            fo for fo in formats -                            if fo['format_id'] == format_id) -                    except StopIteration: -                        f.update(self._formats.get(format_id, {})) -                        formats.append(f) -                    else: -                        existing_format.update(f) - -            except (ExtractorError, KeyError) as e: -                self.report_warning('Skipping DASH manifest: %r' % e, video_id) +            dash_mpd = video_info.get('dashmpd') +            if dash_mpd: +                dash_manifest_url = dash_mpd[0] +                try: +                    dash_formats = self._parse_dash_manifest( +                        video_id, dash_manifest_url, player_url, age_gate) +                except (ExtractorError, KeyError) as e: +                    self.report_warning( +                        'Skipping DASH manifest: %r' % e, video_id) +                else: +                    formats.extend(dash_formats)          self._sort_formats(formats) @@ -1226,7 +1268,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):  class YoutubeChannelIE(InfoExtractor):      IE_DESC = 'YouTube.com channels' -    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" +    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'      _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'      IE_NAME = 'youtube:channel' @@ -1244,13 +1286,8 @@ class YoutubeChannelIE(InfoExtractor):          return ids_in_page      def _real_extract(self, url): -        # Extract channel id -        mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError('Invalid URL: %s' % url) +        channel_id = self._match_id(url) -        # Download channel page -        channel_id = mobj.group(1)          video_ids = []          url = 'https://www.youtube.com/channel/%s/videos' % channel_id          channel_page = self._download_webpage(url, channel_id) @@ -1264,8 +1301,12 @@ class YoutubeChannelIE(InfoExtractor):              # The videos are contained in a single page              # the ajax pages can't be used, they are empty              video_ids = self.extract_videos_from_page(channel_page) -        else: -            # Download all channel pages using the json-based channel_ajax query +            entries = [ +                self.url_result(video_id, 'Youtube', video_id=video_id) +                for video_id in video_ids] +            return self.playlist_result(entries, channel_id) + +        def _entries():              for pagenum in itertools.count(1):                  url = self._MORE_PAGES_URL % (pagenum, channel_id)                  page = self._download_json( @@ -1273,21 +1314,19 @@ class YoutubeChannelIE(InfoExtractor):                      transform_source=uppercase_escape)                  ids_in_page = self.extract_videos_from_page(page['content_html']) -                video_ids.extend(ids_in_page) +                for video_id in ids_in_page: +                    yield self.url_result( +                        video_id, 'Youtube', video_id=video_id)                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:                      break -        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) - -        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) -                       for video_id in video_ids] -        return self.playlist_result(url_entries, channel_id) +        return self.playlist_result(_entries(), channel_id)  class YoutubeUserIE(InfoExtractor):      IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' -    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' +    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'      _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'      _GDATA_PAGE_SIZE = 50      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1315,12 +1354,7 @@ class YoutubeUserIE(InfoExtractor):              return super(YoutubeUserIE, cls).suitable(url)      def _real_extract(self, url): -        # Extract username -        mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError('Invalid URL: %s' % url) - -        username = mobj.group(1) +        username = self._match_id(url)          # Download video ids using YouTube Data API. Result size per          # query is limited (currently to 50 videos) so we need to query diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 9ff00e26c..74c76a9a0 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,12 +1,14 @@  # coding: utf-8  from __future__ import unicode_literals +import functools  import re  from .common import InfoExtractor  from ..utils import (      int_or_none,      unified_strdate, +    OnDemandPagedList,  ) @@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):  class ZDFIE(InfoExtractor): -    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' +    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'      _TEST = {          'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', @@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id          return extract_from_xml_url(self, video_id, xml_url) + + +class ZDFChannelIE(InfoExtractor): +    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', +        'info_dict': { +            'id': '1586442', +        }, +        'playlist_count': 4, +    } +    _PAGE_SIZE = 50 + +    def _fetch_page(self, channel_id, page): +        offset = page * self._PAGE_SIZE +        xml_url = ( +            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' +            % (offset, self._PAGE_SIZE, channel_id)) +        doc = self._download_xml( +            xml_url, channel_id, +            note='Downloading channel info', +            errnote='Failed to download channel info') + +        title = doc.find('.//information/title').text +        description = doc.find('.//information/detail').text +        for asset in doc.findall('.//teasers/teaser'): +            a_type = asset.find('./type').text +            a_id = asset.find('./details/assetId').text +            if a_type not in ('video', 'topic'): +                continue +            yield { +                '_type': 'url', +                'playlist_title': title, +                'playlist_description': description, +                'url': 'zdf:%s:%s' % (a_type, a_id), +            } + +    def _real_extract(self, url): +        channel_id = self._match_id(url) +        entries = OnDemandPagedList( +            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) + +        return { +            '_type': 'playlist', +            'id': channel_id, +            'entries': entries, +        } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2e8c71508..041ca83d8 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -163,7 +163,10 @@ def parseOpts(overrideArguments=None):      general.add_option(          '--ignore-config',          action='store_true', -        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') +        help='Do not read configuration files. ' +        'When given in the global configuration file /etc/youtube-dl.conf: ' +        'Do not read the user configuration in ~/.config/youtube-dl/config ' +        '(%APPDATA%/youtube-dl/config.txt on Windows)')      general.add_option(          '--flat-playlist',          action='store_const', dest='extract_flat', const='in_playlist', diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 09db43611..75c0f7bbe 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):      def run(self, information):          cmd = self.exec_cmd -        if not '{}' in cmd: +        if '{}' not in cmd:              cmd += ' {}'          cmd = cmd.replace('{}', shlex_quote(information['filepath'])) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 9303b8378..965ded4c1 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor):          if not self._executable:              raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') -        REQUIRED_VERSION = '1.0' +        required_version = '10-0' if self._uses_avconv() else '1.0'          if is_outdated_version( -                self._versions[self._executable], REQUIRED_VERSION): +                self._versions[self._executable], required_version):              warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( -                self._executable, self._executable, REQUIRED_VERSION) +                self._executable, self._executable, required_version)              if self._downloader:                  self._downloader.report_warning(warning) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 4c07a558e..2d2703368 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -79,7 +79,7 @@ def update_self(to_screen, verbose):              to_screen(compat_str(traceback.format_exc()))          to_screen('ERROR: can\'t obtain versions info. Please try again later.')          return -    if not 'signature' in versions_info: +    if 'signature' not in versions_info:          to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')          return      signature = versions_info['signature'] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4d3cbac74..bbe554a65 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -41,6 +41,7 @@ from .compat import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urlparse, +    compat_WINFUNCTYPE,      shlex_quote,  ) @@ -166,7 +167,7 @@ def xpath_text(node, xpath, name=None, fatal=False):          xpath = xpath.encode('ascii')      n = node.find(xpath) -    if n is None: +    if n is None or n.text is None:          if fatal:              name = xpath if name is None else name              raise ExtractorError('Could not find XML element %s' % name) @@ -644,17 +645,19 @@ def parse_iso8601(date_str, delimiter='T'):      return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True):      """Return a string with the date in the format YYYYMMDD"""      if date_str is None:          return None -      upload_date = None      # Replace commas      date_str = date_str.replace(',', ' ')      # %z (UTC offset) is only supported in python>=3.2      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) +    # Remove AM/PM + timezone +    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) +      format_expressions = [          '%d %B %Y',          '%d %b %Y', @@ -669,7 +672,6 @@ def unified_strdate(date_str):          '%d/%m/%Y',          '%d/%m/%y',          '%Y/%m/%d %H:%M:%S', -        '%d/%m/%Y %H:%M:%S',          '%Y-%m-%d %H:%M:%S',          '%Y-%m-%d %H:%M:%S.%f',          '%d.%m.%Y %H:%M', @@ -681,6 +683,14 @@ def unified_strdate(date_str):          '%Y-%m-%dT%H:%M:%S.%f',          '%Y-%m-%dT%H:%M',      ] +    if day_first: +        format_expressions.extend([ +            '%d/%m/%Y %H:%M:%S', +        ]) +    else: +        format_expressions.extend([ +            '%m/%d/%Y %H:%M:%S', +        ])      for expression in format_expressions:          try:              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -712,8 +722,10 @@ def date_from_str(date_str):      Return a datetime object from a string in the format YYYYMMDD or      (now|today)[+-][0-9](day|week|month|year)(s)?"""      today = datetime.date.today() -    if date_str == 'now'or date_str == 'today': +    if date_str in ('now', 'today'):          return today +    if date_str == 'yesterday': +        return today - datetime.timedelta(days=1)      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)      if match is not None:          sign = match.group('sign') @@ -806,21 +818,21 @@ def _windows_write_string(s, out):      if fileno not in WIN_OUTPUT_IDS:          return False -    GetStdHandle = ctypes.WINFUNCTYPE( +    GetStdHandle = compat_WINFUNCTYPE(          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(          ("GetStdHandle", ctypes.windll.kernel32))      h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) -    WriteConsoleW = ctypes.WINFUNCTYPE( +    WriteConsoleW = compat_WINFUNCTYPE(          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),          ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))      written = ctypes.wintypes.DWORD(0) -    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32)) +    GetFileType = compat_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))      FILE_TYPE_CHAR = 0x0002      FILE_TYPE_REMOTE = 0x8000 -    GetConsoleMode = ctypes.WINFUNCTYPE( +    GetConsoleMode = compat_WINFUNCTYPE(          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,          ctypes.POINTER(ctypes.wintypes.DWORD))(          ("GetConsoleMode", ctypes.windll.kernel32)) @@ -1024,7 +1036,7 @@ def smuggle_url(url, data):  def unsmuggle_url(smug_url, default=None): -    if not '#__youtubedl_smuggle' in smug_url: +    if '#__youtubedl_smuggle' not in smug_url:          return smug_url, default      url, _, sdata = smug_url.rpartition('#')      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] @@ -1090,11 +1102,14 @@ def parse_filesize(s):      }      units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) -    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s) +    m = re.match( +        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)      if not m:          return None -    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) +    num_str = m.group('num').replace(',', '.') +    mult = _UNIT_TABLE[m.group('unit')] +    return int(float(num_str) * mult)  def get_term_width(): @@ -1203,18 +1218,29 @@ def parse_duration(s):      m = re.match(          r'''(?ix)T? +        (?: +            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| +            (?P<only_hours>[0-9.]+)\s*(?:hours?)| +              (?:                  (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*              )? -            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s) +            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? +        )$''', s)      if not m:          return None -    res = int(m.group('secs')) +    res = 0 +    if m.group('only_mins'): +        return float_or_none(m.group('only_mins'), invscale=60) +    if m.group('only_hours'): +        return float_or_none(m.group('only_hours'), invscale=60 * 60) +    if m.group('secs'): +        res += int(m.group('secs'))      if m.group('mins'):          res += int(m.group('mins')) * 60 -        if m.group('hours'): -            res += int(m.group('hours')) * 60 * 60 +    if m.group('hours'): +        res += int(m.group('hours')) * 60 * 60      if m.group('ms'):          res += float(m.group('ms'))      return res @@ -1488,7 +1514,7 @@ def limit_length(s, length):  def version_tuple(v): -    return [int(e) for e in v.split('.')] +    return tuple(int(e) for e in re.split(r'[-.]', v))  def is_outdated_version(version, limit, assume_new=True): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 61902a8cc..7289ea4d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2014.12.03' +__version__ = '2014.12.12.1' | 
