diff options
-rw-r--r-- | .github/ISSUE_TEMPLATE.md | 6 | ||||
-rw-r--r-- | CONTRIBUTING.md | 4 | ||||
-rw-r--r-- | docs/supportedsites.md | 3 | ||||
-rw-r--r-- | youtube_dl/downloader/rtsp.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/aol.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/arte.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/karrierevideos.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/laola1tv.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/lecture2go.py | 17 | ||||
-rw-r--r-- | youtube_dl/extractor/metacritic.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/ministrygrid.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/mixcloud.py | 254 | ||||
-rw-r--r-- | youtube_dl/extractor/tdslifeway.py | 33 | ||||
-rw-r--r-- | youtube_dl/extractor/ubu.py | 57 | ||||
-rw-r--r-- | youtube_dl/extractor/wayofthemaster.py | 52 | ||||
-rw-r--r-- | youtube_dl/extractor/xboxclips.py | 2 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
19 files changed, 359 insertions, 173 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bf9494646..caed64e38 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.06 +[debug] youtube-dl version 2016.04.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0df6193fb..c83b8655a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d6ee8476b..51a6b5609 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -115,6 +115,7 @@ - **Cinemassacre** - **Clipfish** - **cliphunter** + - **ClipRs** - **Clipsyndicate** - **cloudtime**: CloudTime - **Cloudy** @@ -286,7 +287,6 @@ - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV - **Izlesene** - - **JadoreCettePub** - **JeuxVideo** - **Jove** - **jpopsuki.tv** @@ -484,6 +484,7 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PressTV** - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py index 3eb29526c..939358b2a 100644 --- a/youtube_dl/downloader/rtsp.py +++ b/youtube_dl/downloader/rtsp.py @@ -27,6 +27,8 @@ class RtspFD(FileDownloader): self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False + self._debug_cmd(args) + retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index d4801a25b..24df8fe93 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -12,9 +12,10 @@ from ..utils import ( class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)' _TESTS = [{ + # video with 5min ID 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { @@ -31,6 +32,7 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { + # video with vidible ID 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', @@ -45,6 +47,12 @@ class AolIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'only_matching': True, + }, { + 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f042d9163..a9e3266dc 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', @@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE): 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', 'upload_date': '20140805', } + }, { + 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', + 'only_matching': True, }] +class ArteTVInfoIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:info' + _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', + 'info_dict': { + 'id': '067528-000-A', + 'ext': 'mp4', + 'title': 'Service civique, un cache misère ?', + 'upload_date': '20160403', + }, + } + + class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c718cf385..f0781fc27 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor): ext = 'flv' if ext is None: ext = determine_ext(url) - tbr = int_or_none(rend.get('encodingRate'), 1000), + tbr = int_or_none(rend.get('encodingRate'), 1000) a_format = { 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), 'url': url, diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 04c6508f1..d00445b3c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -46,6 +46,7 @@ from .arte import ( ArteTVPlus7IE, ArteTVCreativeIE, ArteTVConcertIE, + ArteTVInfoIE, ArteTVFutureIE, ArteTVCinemaIE, ArteTVDDCIE, @@ -411,7 +412,12 @@ from .minoto import MinotoIE from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE -from .mixcloud import MixcloudIE +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, + MixcloudStreamIE, +) from .mlb import MLBIE from .mnet import MnetIE from .mpora import MporaIE @@ -729,6 +735,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .tapely import TapelyIE from .tass import TassIE +from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -831,7 +838,6 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, ) -from .ubu import UbuIE from .udemy import ( UdemyIE, UdemyCourseIE @@ -916,7 +922,6 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 2cb04e533..c05263e61 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor): video_id = self._search_regex( r'/config/video/(.+?)\.xml', webpage, 'video id') + # Server returns malformed headers + # Force Accept-Encoding: * to prevent gzipped results playlist = self._download_xml( 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id, - video_id, transform_source=fix_xml_ampersands) + video_id, transform_source=fix_xml_ampersands, + headers={'Accept-Encoding': '*'}) NS_MAP = { 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d4fbafece..2fab38079 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This live stream has already finished.', }] def _real_extract(self, url): @@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) + iframe_url = self._search_regex( r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"', webpage, 'iframe url') diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py index 40a3d2346..81b5d41be 100644 --- a/youtube_dl/extractor/lecture2go.py +++ b/youtube_dl/extractor/lecture2go.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + determine_protocol, parse_duration, int_or_none, ) @@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor): 'md5': 'ac02b570883020d208d405d5a3fd2f7f', 'info_dict': { 'id': '17473', - 'ext': 'flv', + 'ext': 'mp4', 'title': '2 - Endliche Automaten und reguläre Sprachen', 'creator': 'Frank Heitmann', 'duration': 5220, + }, + 'params': { + # m3u8 download + 'skip_download': True, } } @@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor): title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title') formats = [] - for url in set(re.findall(r'"src","([^"]+)"', webpage)): + for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)): ext = determine_ext(url) + protocol = determine_protocol({'url': url}) if ext == 'f4m': - formats.extend(self._extract_f4m_formats(url, video_id)) + formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds')) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(url, video_id)) + formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls')) else: + if protocol == 'rtmp': + continue # XXX: currently broken formats.append({ + 'format_id': protocol, 'url': url, }) diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index e30320569..444ec0310 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -11,7 +11,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', 'info_dict': { 'id': '3698222', @@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor): 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', 'duration': 221, }, - } + 'skip': 'Not providing trailers anymore', + }, { + 'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315', + 'info_dict': { + 'id': '5740315', + 'ext': 'mp4', + 'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler', + 'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).', + 'duration': 114, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index 949ad11db..e48eba3fa 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -1,8 +1,5 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', - 'uploader': 'LifeWay Christian Resources (MG)', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, + 'add_ie': ['TDSLifeway'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - portlets_json = self._search_regex( - r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') - portlets = json.loads(portlets_json) + portlets = self._parse_json(self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'), + video_id) pl_id = self._search_regex( - r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id') + r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id') for i, portlet in enumerate(portlets): portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet) @@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor): r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe', default=None) if video_iframe_url: - surl = smuggle_url( - video_iframe_url, {'force_videoid': video_id}) - return { - '_type': 'url', - 'id': video_id, - 'url': surl, - } + return self.url_result( + smuggle_url(video_iframe_url, {'force_videoid': video_id}), + video_id=video_id) raise ExtractorError('Could not find video iframe in any portlets') diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 9638cc9e6..483f6925f 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,27 +1,35 @@ from __future__ import unicode_literals +import base64 +import functools +import itertools import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_chr, + compat_ord, + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import ( + clean_html, ExtractorError, - HEADRequest, - NO_DEFAULT, + OnDemandPagedList, parse_count, str_to_int, ) class MixcloudIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' _TESTS = [{ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'info_dict': { 'id': 'dholbach-cryptkeeper', - 'ext': 'mp3', + 'ext': 'm4a', 'title': 'Cryptkeeper', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', @@ -39,22 +47,22 @@ class MixcloudIE(InfoExtractor): 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 'uploader': 'Gilles Peterson Worldwide', 'uploader_id': 'gillespeterson', - 'thumbnail': 're:https?://.*/images/', + 'thumbnail': 're:https?://.*', 'view_count': int, 'like_count': int, }, }] - def _check_url(self, url, track_id, ext): - try: - # We only want to know if the request succeed - # don't download the whole file - self._request_webpage( - HEADRequest(url), track_id, - 'Trying %s URL' % ext) - return True - except ExtractorError: - return False + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js + @staticmethod + def _decrypt_play_info(play_info): + KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' + + play_info = base64.b64decode(play_info.encode('ascii')) + + return ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) + for idx, ch in enumerate(play_info)]) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -68,19 +76,15 @@ class MixcloudIE(InfoExtractor): r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) - preview_url = self._search_regex( - r'\s(?:data-preview-url|m-preview)="([^"]+)"', - webpage, 'preview url', default=None if message else NO_DEFAULT) + encrypted_play_info = self._search_regex( + r'm-play-info="([^"]+)"', webpage, 'play info') + play_info = self._parse_json( + self._decrypt_play_info(encrypted_play_info), track_id) - if message: + if message and 'stream_url' not in play_info: raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url) - song_url = song_url.replace('/previews/', '/c/originals/') - if not self._check_url(song_url, track_id, 'mp3'): - song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - if not self._check_url(song_url, track_id, 'm4a'): - raise ExtractorError('Unable to extract track url') + song_url = play_info['stream_url'] PREFIX = ( r'm-play-on-spacebar[^>]+' @@ -115,3 +119,201 @@ class MixcloudIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, } + + +class MixcloudPlaylistBaseIE(InfoExtractor): + _PAGE_SIZE = 24 + + def _find_urls_in_page(self, page): + for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): + yield self.url_result( + compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), + MixcloudIE.ie_key()) + + def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): + real_page_number = real_page_number or current_page + 1 + return self._download_webpage( + 'https://www.mixcloud.com/%s/' % path, video_id, + note='Download %s (page %d)' % (page_name, current_page + 1), + errnote='Unable to download %s' % page_name, + query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, + headers={'X-Requested-With': 'XMLHttpRequest'}) + + def _tracks_page_func(self, page, video_id, page_name, current_page): + resp = self._fetch_tracks_page(page, video_id, page_name, current_page) + + for item in self._find_urls_in_page(resp): + yield item + + def _get_user_description(self, page_content): + return self._html_search_regex( + r'<div[^>]+class="description-text"[^>]*>(.+?)</div>', + page_content, 'user description', fatal=False) + + +class MixcloudUserIE(MixcloudPlaylistBaseIE): + _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' + IE_NAME = 'mixcloud:user' + + _TESTS = [{ + 'url': 'http://www.mixcloud.com/dholbach/', + 'info_dict': { + 'id': 'dholbach_uploads', + 'title': 'Daniel Holbach (uploads)', + 'description': 'md5:327af72d1efeb404a8216c27240d1370', + }, + 'playlist_mincount': 11, + }, { + 'url': 'http://www.mixcloud.com/dholbach/uploads/', + 'info_dict': { + 'id': 'dholbach_uploads', + 'title': 'Daniel Holbach (uploads)', + 'description': 'md5:327af72d1efeb404a8216c27240d1370', + }, + 'playlist_mincount': 11, + }, { + 'url': 'http://www.mixcloud.com/dholbach/favorites/', + 'info_dict': { + 'id': 'dholbach_favorites', + 'title': 'Daniel Holbach (favorites)', + 'description': 'md5:327af72d1efeb404a8216c27240d1370', + }, + 'params': { + 'playlist_items': '1-100', + }, + 'playlist_mincount': 100, + }, { + 'url': 'http://www.mixcloud.com/dholbach/listens/', + 'info_dict': { + 'id': 'dholbach_listens', + 'title': 'Daniel Holbach (listens)', + 'description': 'md5:327af72d1efeb404a8216c27240d1370', + }, + 'params': { + 'playlist_items': '1-100', + }, + 'playlist_mincount': 100, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('user') + list_type = mobj.group('type') + + # if only a profile URL was supplied, default to download all uploads + if list_type is None: + list_type = 'uploads' + + video_id = '%s_%s' % (user_id, list_type) + + profile = self._download_webpage( + 'https://www.mixcloud.com/%s/' % user_id, video_id, + note='Downloading user profile', + errnote='Unable to download user profile') + + username = self._og_search_title(profile) + description = self._get_user_description(profile) + + entries = OnDemandPagedList( + functools.partial( + self._tracks_page_func, + '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), + self._PAGE_SIZE, use_cache=True) + + return self.playlist_result( + entries, video_id, '%s (%s)' % (username, list_type), description) + + +class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): + _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' + IE_NAME = 'mixcloud:playlist' + + _TESTS = [{ + 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', + 'info_dict': { + 'id': 'RedBullThre3style_tokyo-finalists-2015', + 'title': 'National Champions 2015', + 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', + }, + 'playlist_mincount': 16, + }, { + 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', + 'info_dict': { + 'id': 'maxvibes_jazzcat-on-ness-radio', + 'title': 'Jazzcat on Ness Radio', + 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', + }, + 'playlist_mincount': 23 + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('user') + playlist_id = mobj.group('playlist') + video_id = '%s_%s' % (user_id, playlist_id) + + profile = self._download_webpage( + url, user_id, + note='Downloading playlist page', + errnote='Unable to download playlist page') + + description = self._get_user_description(profile) + playlist_title = self._html_search_regex( + r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>', + profile, 'playlist title') + + entries = OnDemandPagedList( + functools.partial( + self._tracks_page_func, + '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), + self._PAGE_SIZE) + + return self.playlist_result(entries, video_id, playlist_title, description) + + +class MixcloudStreamIE(MixcloudPlaylistBaseIE): + _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' + IE_NAME = 'mixcloud:stream' + + _TEST = { + 'url': 'https://www.mixcloud.com/FirstEar/stream/', + 'info_dict': { + 'id': 'FirstEar', + 'title': 'First Ear', + 'description': 'Curators of good music\nfirstearmusic.com', + }, + 'playlist_mincount': 192, + } + + def _real_extract(self, url): + user_id = self._match_id(url) + + webpage = self._download_webpage(url, user_id) + + entries = [] + prev_page_url = None + + def _handle_page(page): + entries.extend(self._find_urls_in_page(page)) + return self._search_regex( + r'm-next-page-url="([^"]+)"', page, + 'next page URL', default=None) + + next_page_url = _handle_page(webpage) + + for idx in itertools.count(0): + if not next_page_url or prev_page_url == next_page_url: + break + + prev_page_url = next_page_url + current_page = int(self._search_regex( + r'\?page=(\d+)', next_page_url, 'next page number')) + + next_page_url = _handle_page(self._fetch_tracks_page( + '%s/stream' % user_id, user_id, 'stream', idx, + real_page_number=current_page)) + + username = self._og_search_title(webpage) + description = self._get_user_description(webpage) + + return self.playlist_result(entries, user_id, username, description) diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py new file mode 100644 index 000000000..4d1f5c801 --- /dev/null +++ b/youtube_dl/extractor/tdslifeway.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TDSLifewayIE(InfoExtractor): + _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html' + + _TEST = { + # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers + 'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb®istration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F', + 'info_dict': { + 'id': '3453494717001', + 'ext': 'mp4', + 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', + 'description': 'Coming soon from T4G 2014!', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py deleted file mode 100644 index 1d52cbc98..000000000 --- a/youtube_dl/extractor/ubu.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, -) - - -class UbuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' - _TEST = { - 'url': 'http://ubu.com/film/her_noise.html', - 'md5': '138d5652618bf0f03878978db9bef1ee', - 'info_dict': { - 'id': 'her_noise', - 'ext': 'm4v', - 'title': 'Her Noise - The Making Of (2007)', - 'duration': 3600, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title') - - duration = int_or_none(self._html_search_regex( - r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), - invscale=60) - - formats = [] - FORMAT_REGEXES = [ - ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), - ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), - ] - preference = qualities([fid for fid, _ in FORMAT_REGEXES]) - for format_id, format_regex in FORMAT_REGEXES: - m = re.search(format_regex, webpage) - if m: - formats.append({ - 'url': m.group(1), - 'format_id': format_id, - 'preference': preference(format_id), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/wayofthemaster.py b/youtube_dl/extractor/wayofthemaster.py deleted file mode 100644 index af7bb8b49..000000000 --- a/youtube_dl/extractor/wayofthemaster.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class WayOfTheMasterIE(InfoExtractor): - _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])' - - _TEST = { - 'url': 'http://www.wayofthemaster.com/hbks.shtml', - 'md5': '5316b57487ada8480606a93cb3d18d24', - 'info_dict': { - 'id': 'hbks', - 'ext': 'mp4', - 'title': 'Intelligent Design vs. Evolution', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - title = self._search_regex( - r'<img src="images/title_[^"]+".*?alt="([^"]+)"', - webpage, 'title', default=None) - if title is None: - title = self._html_search_regex( - r'<title>(.*?)</title>', webpage, 'page title') - - url_base = self._search_regex( - r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"', - webpage, 'URL base') - formats = [{ - 'format_id': 'low', - 'quality': 1, - 'url': url_base + '_low.mp4', - }, { - 'format_id': 'high', - 'quality': 2, - 'url': url_base + '_high.mp4', - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - } diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py index 236ff403b..b113ab1c4 100644 --- a/youtube_dl/extractor/xboxclips.py +++ b/youtube_dl/extractor/xboxclips.py @@ -12,7 +12,7 @@ from ..utils import ( class XboxClipsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})' _TEST = { - 'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', + 'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d', 'info_dict': { 'id': '074a69a9-5faf-46aa-b93b-9909c1720325', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 167b16e24..0618d9a4f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.04.06' +__version__ = '2016.04.13' |