diff options
author | Kai Sommerfeld <kai.sommerfeld@gmx.com> | 2022-05-10 12:01:24 +0200 |
---|---|---|
committer | Kai Sommerfeld <kai.sommerfeld@gmx.com> | 2022-05-12 18:40:57 +0200 |
commit | 5d4d46887e010a86f7c857c42d5a0b6495a1d6bc (patch) | |
tree | 3f154ea9e9ce754d2a3dd1e5c11ad9c11bc1f9ab | |
parent | 6f69d363d0dee6f608e0a650b9c821bfb286424b (diff) |
[macOS][iOS] Add speech recognition service implementation.
-rw-r--r-- | cmake/scripts/darwin_embedded/ArchSetup.cmake | 5 | ||||
-rw-r--r-- | cmake/scripts/osx/ArchSetup.cmake | 3 | ||||
-rw-r--r-- | cmake/treedata/darwin_embedded/ios/ios.txt | 1 | ||||
-rw-r--r-- | cmake/treedata/darwin_embedded/subdirs.txt | 1 | ||||
-rw-r--r-- | cmake/treedata/darwin_embedded/tvos/tvos.txt | 1 | ||||
-rw-r--r-- | cmake/treedata/osx/subdirs.txt | 2 | ||||
-rw-r--r-- | xbmc/platform/darwin/speech/CMakeLists.txt | 5 | ||||
-rw-r--r-- | xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h | 31 | ||||
-rw-r--r-- | xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm | 329 |
9 files changed, 375 insertions, 3 deletions
diff --git a/cmake/scripts/darwin_embedded/ArchSetup.cmake b/cmake/scripts/darwin_embedded/ArchSetup.cmake index 8368413b12..0730c4567a 100644 --- a/cmake/scripts/darwin_embedded/ArchSetup.cmake +++ b/cmake/scripts/darwin_embedded/ArchSetup.cmake @@ -44,6 +44,11 @@ list(APPEND DEPLIBS "-framework CoreFoundation" "-framework CoreVideo" "-framework VideoToolbox" "-lresolv" "-ObjC" "-framework AVKit" "-framework GameController") +# Speech not available on tvOS +if(NOT CORE_PLATFORM_NAME_LC STREQUAL tvos) + list(APPEND DEPLIBS "-framework Speech") +endif() + set(ENABLE_OPTICAL OFF CACHE BOOL "" FORCE) set(CMAKE_XCODE_ATTRIBUTE_INLINES_ARE_PRIVATE_EXTERN OFF) set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN OFF) diff --git a/cmake/scripts/osx/ArchSetup.cmake b/cmake/scripts/osx/ArchSetup.cmake index 4e89214f40..4f43c1e1aa 100644 --- a/cmake/scripts/osx/ArchSetup.cmake +++ b/cmake/scripts/osx/ArchSetup.cmake @@ -46,7 +46,8 @@ list(APPEND DEPLIBS "-framework DiskArbitration" "-framework IOKit" "-framework CoreAudio" "-framework AudioToolbox" "-framework CoreGraphics" "-framework CoreMedia" "-framework VideoToolbox" "-framework Security" - "-framework GameController") + "-framework GameController" "-framework Speech" + "-framework AVFoundation") if(ARCH STREQUAL aarch64) set(CMAKE_OSX_DEPLOYMENT_TARGET 11.0) diff --git a/cmake/treedata/darwin_embedded/ios/ios.txt b/cmake/treedata/darwin_embedded/ios/ios.txt index f5d13e3a4e..b5a5dcf1f9 100644 --- a/cmake/treedata/darwin_embedded/ios/ios.txt +++ b/cmake/treedata/darwin_embedded/ios/ios.txt @@ -1,2 +1,3 @@ xbmc/platform/darwin/ios platform/ios +xbmc/platform/darwin/speech platform/darwin/speech xbmc/windowing/ios windowing/ios diff --git a/cmake/treedata/darwin_embedded/subdirs.txt b/cmake/treedata/darwin_embedded/subdirs.txt index 31cd9b9c09..52892f47c2 100644 --- a/cmake/treedata/darwin_embedded/subdirs.txt +++ b/cmake/treedata/darwin_embedded/subdirs.txt @@ -2,7 +2,6 @@ xbmc/cores/RetroPlayer/process/ios cores/RetroPlayer/process/ios xbmc/cores/VideoPlayer/Process/ios cores/VideoPlayer/Process/ios xbmc/input/touch input/touch xbmc/input/touch/generic input/touch/generic -xbmc/platform/common/speech platform/common/speech xbmc/platform/darwin platform/darwin xbmc/platform/darwin/ios-common platform/ios-common xbmc/platform/darwin/ios-common/network platform/ios-common/network diff --git a/cmake/treedata/darwin_embedded/tvos/tvos.txt b/cmake/treedata/darwin_embedded/tvos/tvos.txt index 1335e6694f..da2e107e71 100644 --- a/cmake/treedata/darwin_embedded/tvos/tvos.txt +++ b/cmake/treedata/darwin_embedded/tvos/tvos.txt @@ -1,3 +1,4 @@ +xbmc/platform/common/speech platform/common/speech xbmc/platform/darwin/tvos platform/tvos xbmc/platform/darwin/tvos/filesystem platform/darwin/tvos/filesystem xbmc/platform/darwin/tvos/input platform/darwin/tvos/input diff --git a/cmake/treedata/osx/subdirs.txt b/cmake/treedata/osx/subdirs.txt index 0c1ba717eb..b56f268cae 100644 --- a/cmake/treedata/osx/subdirs.txt +++ b/cmake/treedata/osx/subdirs.txt @@ -1,8 +1,8 @@ xbmc/cores/RetroPlayer/process/osx cores/RetroPlayer/process/osx xbmc/cores/VideoPlayer/Process/osx cores/VideoPlayer/Process/osx -xbmc/platform/common/speech platform/common/speech xbmc/platform/darwin platform/darwin xbmc/platform/darwin/network platform/darwin/network +xbmc/platform/darwin/speech platform/darwin/speech xbmc/platform/darwin/osx platform/osx xbmc/platform/darwin/osx/network platform/darwin/osx/network xbmc/platform/darwin/osx/peripherals platform/osx/peripherals diff --git a/xbmc/platform/darwin/speech/CMakeLists.txt b/xbmc/platform/darwin/speech/CMakeLists.txt new file mode 100644 index 0000000000..03eb9e12d1 --- /dev/null +++ b/xbmc/platform/darwin/speech/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES SpeechRecognitionDarwin.mm) + +set(HEADERS SpeechRecognitionDarwin.h) + +core_add_library(platform_darwin_speech) diff --git a/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h new file mode 100644 index 0000000000..48a03a4b4b --- /dev/null +++ b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2012-2022 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#pragma once + +#include "speech/ISpeechRecognition.h" + +#include <memory> + +struct SpeechRecognitionDarwinImpl; + +class CSpeechRecognitionDarwin : public speech::ISpeechRecognition +{ +public: + CSpeechRecognitionDarwin(); + ~CSpeechRecognitionDarwin() override; + + // ISpeechRecognition implementation + void StartSpeechRecognition( + const std::shared_ptr<speech::ISpeechRecognitionListener>& listener) override; + + void OnRecognitionDone(speech::ISpeechRecognitionListener* listener); + +private: + std::unique_ptr<SpeechRecognitionDarwinImpl> m_impl; +}; diff --git a/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm new file mode 100644 index 0000000000..85051be6a5 --- /dev/null +++ b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2012-2022 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "SpeechRecognitionDarwin.h" + +#include "LangInfo.h" +#include "speech/ISpeechRecognitionListener.h" +#include "speech/SpeechRecognitionErrors.h" +#include "threads/CriticalSection.h" +#include "utils/log.h" + +#include <algorithm> +#include <mutex> +#include <vector> + +#import <AVFoundation/AVFoundation.h> +#import <Speech/Speech.h> + +API_AVAILABLE(macos(10.15), ios(10.0)) +API_UNAVAILABLE(tvos) @interface SpeechRecognitionImpl : NSObject<SFSpeechRecognizerDelegate> + +@property(nonatomic, strong) SFSpeechRecognizer* speechRecognizer; +@property(nonatomic, strong) SFSpeechAudioBufferRecognitionRequest* recognitionRequest; +@property(nonatomic, strong) SFSpeechRecognitionTask* recognitionTask; +@property(nonatomic, strong) AVAudioEngine* audioEngine; +@property(nonatomic, strong) NSTimer* talkTimeoutTimer; +@property(nonatomic, copy) NSString* text; + +// C++ members +@property(nonatomic) speech::ISpeechRecognitionListener* listener; +@property(nonatomic) CSpeechRecognitionDarwin* owner; + +@end + +@implementation SpeechRecognitionImpl + +- (void)startSpeechRecognition:(speech::ISpeechRecognitionListener*)listener + owner:(CSpeechRecognitionDarwin*)owner +{ + self.listener = listener; + self.owner = owner; + + // Get current Kodi GUI locale and use it for speech recognition. + std::string kodiLocale = g_langInfo.GetLocale().ToShortString(); + std::replace(kodiLocale.begin(), kodiLocale.end(), '_', '-'); + NSString* locale = @(kodiLocale.c_str()); + + self.speechRecognizer = + [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:locale]]; + if (self.speechRecognizer == nil) + { + CLog::LogF(LOGWARNING, + "Speech recognizer not available for user's current locale. Trying en-US"); + self.speechRecognizer = + [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:@"en-US"]]; + } + if (self.speechRecognizer == nil) + { + [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE + logMessage:@"Unable to create an SFSpeechRecognizer instance"]; + return; + } + + [self.speechRecognizer setDelegate:self]; + + self.audioEngine = [[AVAudioEngine alloc] init]; + if (self.audioEngine == nil) + { + [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE + logMessage:@"Unable to create an AVAudioEngine instance"]; + return; + } + + [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus authStatus) { + switch (authStatus) + { + case SFSpeechRecognizerAuthorizationStatusAuthorized: // User gave access to speech recognition + break; + + case SFSpeechRecognizerAuthorizationStatusDenied: // User denied access to speech recognition + case SFSpeechRecognizerAuthorizationStatusRestricted: // Speech recognition restricted on this device + case SFSpeechRecognizerAuthorizationStatusNotDetermined: // Speech recognition not yet authorized + default: + [self onError:speech::RecognitionError::INSUFFICIENT_PERMISSIONS + logMessage:@"Insufficient permissions"]; + break; + } + }]; + + listener->OnReadyForSpeech(); + + self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init]; + if (self.recognitionRequest == nil) + { + [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE + logMessage:@"Unable to create an SFSpeechAudioBufferRecognitionRequest instance"]; + return; + } + + self.recognitionRequest.shouldReportPartialResults = YES; + + AVAudioNode* inputNode = [self.audioEngine inputNode]; + if (inputNode == nil) + { + [self onError:speech::RecognitionError::AUDIO + logMessage:@"Audio engine instance has no input node"]; + return; + } + + [self.recognitionTask cancel]; + self.recognitionTask = nil; + + // stop recognition after 10 secs if the user did not start talking + self.talkTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:10.0 + target:self + selector:@selector(onTalkTimeout:) + userInfo:nil + repeats:NO]; + + __typeof__(self) __weak welf = self; + self.recognitionTask = [self.speechRecognizer + recognitionTaskWithRequest:self.recognitionRequest + resultHandler:^(SFSpeechRecognitionResult* _Nullable result, + NSError* _Nullable error) { + __typeof__(self) sself = welf; + if (!sself) // the object (self) is dead; it makes no sense to continue + return; + + BOOL isFinal = NO; + + // reset talk timeout timer to fire 3 secs after user stopped talking + [sself.talkTimeoutTimer invalidate]; + sself.talkTimeoutTimer = + [NSTimer scheduledTimerWithTimeInterval:3.0 + target:sself + selector:@selector(onTalkTimeout:) + userInfo:nil + repeats:NO]; + if (result != nil) + { + isFinal = result.isFinal; + sself.text = result.bestTranscription.formattedString; + listener->OnResults({[sself.text UTF8String]}); + } + + if (error == nil && !isFinal) + return; + + [sself.audioEngine stop]; + [inputNode removeTapOnBus:0]; + + if (error != nil && !isFinal) + { + int recognitionError = speech::RecognitionError::UNKNOWN; + + if (sself.text == nil) + { + recognitionError = speech::RecognitionError::NO_MATCH; + } + else if ([error.domain isEqualToString:@"kLSRErrorDomain"]) + { + switch (error.code) + { + case 102: // Assets are not installed + case 201: // Siri or Dictation is disabled + case 300: // Failed to initialize recognizer + recognitionError = speech::RecognitionError::SERVICE_NOT_AVAILABLE; + break; + + case 301: // Request was canceled + break; + } + } + else if ([error.domain isEqualToString:@"kAFAssistantErrorDomain"]) + { + switch (error.code) + { + case 1100: // Trying to start recognition while an earlier instance is still active + recognitionError = speech::RecognitionError::RECOGNIZER_BUSY; + break; + + case 1110: // Failed to recognize any speech + recognitionError = speech::RecognitionError::NO_MATCH; + break; + + case 1700: // Request is not authorized + recognitionError = speech::RecognitionError::INSUFFICIENT_PERMISSIONS; + break; + + case 203: // Failure occurred during speech recognition + case 1101: // Connection to speech process was invalidated + case 1107: // Connection to speech process was interrupted + break; + } + } + + NSString* logMsg = + [NSString stringWithFormat:@"code='%ld' description='%@'", + (long)error.code, error.localizedDescription]; + [sself onError:recognitionError logMessage:logMsg]; + } + else + { + owner->OnRecognitionDone(listener); + } + + sself.recognitionRequest = nil; + sself.recognitionTask = nil; + sself.text = nil; + [sself.talkTimeoutTimer invalidate]; + sself.talkTimeoutTimer = nil; + }]; + + [inputNode installTapOnBus:0 + bufferSize:4096 + format:[inputNode outputFormatForBus:0] + block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) { + [self.recognitionRequest appendAudioPCMBuffer:buffer]; + }]; + + [self.audioEngine prepare]; + + NSError* outError; + [self.audioEngine startAndReturnError:&outError]; + + if (outError != nil) + { + [self onError:speech::RecognitionError::AUDIO + logMessage:[NSString stringWithFormat: + @"Audio engine couldn't start because of an error. code='%ld'", + outError.code]]; + } +} + +- (void)speechRecognizer:(SFSpeechRecognizer*)speechRecognizer availabilityDidChange:(BOOL)available +{ + if (available) + return; + + [self.recognitionTask cancel]; + self.recognitionTask = nil; + + if (self.listener && self.owner) + { + [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE + logMessage:@"Service currently not available. Try again later"]; + } +} + +- (void)onTalkTimeout:(NSTimer*)timer +{ + [self.recognitionRequest endAudio]; +} + +- (void)onError:(int)recognitionError logMessage:(NSString*)logMessage +{ + CLog::Log(LOGERROR, "Speech recognition error: {}", [logMessage UTF8String]); + self.listener->OnError(recognitionError); + self.owner->OnRecognitionDone(self.listener); +} + +@end + +std::shared_ptr<speech::ISpeechRecognition> speech::ISpeechRecognition::CreateInstance() +{ + return std::make_shared<CSpeechRecognitionDarwin>(); +} + +struct API_AVAILABLE(macos(10.15), ios(10.0)) API_UNAVAILABLE(tvos) SpeechRecognitionDarwinImpl +{ + CCriticalSection m_listenersMutex; + std::vector<std::shared_ptr<speech::ISpeechRecognitionListener>> m_listeners; + SpeechRecognitionImpl* m_recognizer{nil}; +}; + +CSpeechRecognitionDarwin::CSpeechRecognitionDarwin() : m_impl(new SpeechRecognitionDarwinImpl) +{ +} + +void CSpeechRecognitionDarwin::StartSpeechRecognition( + const std::shared_ptr<speech::ISpeechRecognitionListener>& listener) +{ + // Speech: macOS 10.15+ iOS 10.0+. Currently not available on tvOS! + if (@available(macOS 10.15, iOS 10.0, *)) + { + if (!m_impl->m_recognizer) + m_impl->m_recognizer = [[SpeechRecognitionImpl alloc] init]; + + if (m_impl->m_recognizer == nil) + { + CLog::LogF(LOGERROR, "Unable to create a SpeechRecognitionImpl instance"); + return; + } + + std::unique_lock<CCriticalSection> lock(m_impl->m_listenersMutex); + m_impl->m_listeners.emplace_back( + listener); // we need to ensure the listener lives as long as we do + lock.unlock(); + + [m_impl->m_recognizer startSpeechRecognition:listener.get() owner:this]; + } + else + { + CLog::LogF(LOGERROR, "Operating system does not match the minimum required version"); + listener->OnError(speech::RecognitionError::SERVICE_NOT_AVAILABLE); + } +} + +CSpeechRecognitionDarwin::~CSpeechRecognitionDarwin() +{ +} + +void CSpeechRecognitionDarwin::OnRecognitionDone(speech::ISpeechRecognitionListener* listener) +{ + std::unique_lock<CCriticalSection> lock(m_impl->m_listenersMutex); + for (auto it = m_impl->m_listeners.begin(); it != m_impl->m_listeners.end(); ++it) + { + if ((*it).get() == listener) + { + m_impl->m_listeners.erase(it); + break; + } + } +} |