diff options
authorKai Sommerfeld <kai.sommerfeld@gmx.com>2022-05-10 12:01:24 +0200
committerKai Sommerfeld <kai.sommerfeld@gmx.com>2022-05-12 18:40:57 +0200
commit5d4d46887e010a86f7c857c42d5a0b6495a1d6bc (patch)
parent6f69d363d0dee6f608e0a650b9c821bfb286424b (diff)
[macOS][iOS] Add speech recognition service implementation.
9 files changed, 375 insertions, 3 deletions
diff --git a/cmake/scripts/darwin_embedded/ArchSetup.cmake b/cmake/scripts/darwin_embedded/ArchSetup.cmake
index 8368413b12..0730c4567a 100644
--- a/cmake/scripts/darwin_embedded/ArchSetup.cmake
+++ b/cmake/scripts/darwin_embedded/ArchSetup.cmake
@@ -44,6 +44,11 @@ list(APPEND DEPLIBS "-framework CoreFoundation" "-framework CoreVideo"
"-framework VideoToolbox" "-lresolv" "-ObjC"
"-framework AVKit" "-framework GameController")
+# Speech not available on tvOS
+ list(APPEND DEPLIBS "-framework Speech")
diff --git a/cmake/scripts/osx/ArchSetup.cmake b/cmake/scripts/osx/ArchSetup.cmake
index 4e89214f40..4f43c1e1aa 100644
--- a/cmake/scripts/osx/ArchSetup.cmake
+++ b/cmake/scripts/osx/ArchSetup.cmake
@@ -46,7 +46,8 @@ list(APPEND DEPLIBS "-framework DiskArbitration" "-framework IOKit"
"-framework CoreAudio" "-framework AudioToolbox"
"-framework CoreGraphics" "-framework CoreMedia"
"-framework VideoToolbox" "-framework Security"
- "-framework GameController")
+ "-framework GameController" "-framework Speech"
+ "-framework AVFoundation")
if(ARCH STREQUAL aarch64)
diff --git a/cmake/treedata/darwin_embedded/ios/ios.txt b/cmake/treedata/darwin_embedded/ios/ios.txt
index f5d13e3a4e..b5a5dcf1f9 100644
--- a/cmake/treedata/darwin_embedded/ios/ios.txt
+++ b/cmake/treedata/darwin_embedded/ios/ios.txt
@@ -1,2 +1,3 @@
xbmc/platform/darwin/ios platform/ios
+xbmc/platform/darwin/speech platform/darwin/speech
xbmc/windowing/ios windowing/ios
diff --git a/cmake/treedata/darwin_embedded/subdirs.txt b/cmake/treedata/darwin_embedded/subdirs.txt
index 31cd9b9c09..52892f47c2 100644
--- a/cmake/treedata/darwin_embedded/subdirs.txt
+++ b/cmake/treedata/darwin_embedded/subdirs.txt
@@ -2,7 +2,6 @@ xbmc/cores/RetroPlayer/process/ios cores/RetroPlayer/process/ios
xbmc/cores/VideoPlayer/Process/ios cores/VideoPlayer/Process/ios
xbmc/input/touch input/touch
xbmc/input/touch/generic input/touch/generic
-xbmc/platform/common/speech platform/common/speech
xbmc/platform/darwin platform/darwin
xbmc/platform/darwin/ios-common platform/ios-common
xbmc/platform/darwin/ios-common/network platform/ios-common/network
diff --git a/cmake/treedata/darwin_embedded/tvos/tvos.txt b/cmake/treedata/darwin_embedded/tvos/tvos.txt
index 1335e6694f..da2e107e71 100644
--- a/cmake/treedata/darwin_embedded/tvos/tvos.txt
+++ b/cmake/treedata/darwin_embedded/tvos/tvos.txt
@@ -1,3 +1,4 @@
+xbmc/platform/common/speech platform/common/speech
xbmc/platform/darwin/tvos platform/tvos
xbmc/platform/darwin/tvos/filesystem platform/darwin/tvos/filesystem
xbmc/platform/darwin/tvos/input platform/darwin/tvos/input
diff --git a/cmake/treedata/osx/subdirs.txt b/cmake/treedata/osx/subdirs.txt
index 0c1ba717eb..b56f268cae 100644
--- a/cmake/treedata/osx/subdirs.txt
+++ b/cmake/treedata/osx/subdirs.txt
@@ -1,8 +1,8 @@
xbmc/cores/RetroPlayer/process/osx cores/RetroPlayer/process/osx
xbmc/cores/VideoPlayer/Process/osx cores/VideoPlayer/Process/osx
-xbmc/platform/common/speech platform/common/speech
xbmc/platform/darwin platform/darwin
xbmc/platform/darwin/network platform/darwin/network
+xbmc/platform/darwin/speech platform/darwin/speech
xbmc/platform/darwin/osx platform/osx
xbmc/platform/darwin/osx/network platform/darwin/osx/network
xbmc/platform/darwin/osx/peripherals platform/osx/peripherals
diff --git a/xbmc/platform/darwin/speech/CMakeLists.txt b/xbmc/platform/darwin/speech/CMakeLists.txt
new file mode 100644
index 0000000000..03eb9e12d1
--- /dev/null
+++ b/xbmc/platform/darwin/speech/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(SOURCES SpeechRecognitionDarwin.mm)
+set(HEADERS SpeechRecognitionDarwin.h)
diff --git a/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h
new file mode 100644
index 0000000000..48a03a4b4b
--- /dev/null
+++ b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.h
@@ -0,0 +1,31 @@
+ * Copyright (C) 2012-2022 Team Kodi
+ * This file is part of Kodi - https://kodi.tv
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * See LICENSES/README.md for more information.
+ */
+#pragma once
+#include "speech/ISpeechRecognition.h"
+#include <memory>
+struct SpeechRecognitionDarwinImpl;
+class CSpeechRecognitionDarwin : public speech::ISpeechRecognition
+ CSpeechRecognitionDarwin();
+ ~CSpeechRecognitionDarwin() override;
+ // ISpeechRecognition implementation
+ void StartSpeechRecognition(
+ const std::shared_ptr<speech::ISpeechRecognitionListener>& listener) override;
+ void OnRecognitionDone(speech::ISpeechRecognitionListener* listener);
+ std::unique_ptr<SpeechRecognitionDarwinImpl> m_impl;
diff --git a/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm
new file mode 100644
index 0000000000..85051be6a5
--- /dev/null
+++ b/xbmc/platform/darwin/speech/SpeechRecognitionDarwin.mm
@@ -0,0 +1,329 @@
+ * Copyright (C) 2012-2022 Team Kodi
+ * This file is part of Kodi - https://kodi.tv
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * See LICENSES/README.md for more information.
+ */
+#include "SpeechRecognitionDarwin.h"
+#include "LangInfo.h"
+#include "speech/ISpeechRecognitionListener.h"
+#include "speech/SpeechRecognitionErrors.h"
+#include "threads/CriticalSection.h"
+#include "utils/log.h"
+#include <algorithm>
+#include <mutex>
+#include <vector>
+#import <AVFoundation/AVFoundation.h>
+#import <Speech/Speech.h>
+API_AVAILABLE(macos(10.15), ios(10.0))
+API_UNAVAILABLE(tvos) @interface SpeechRecognitionImpl : NSObject<SFSpeechRecognizerDelegate>
+@property(nonatomic, strong) SFSpeechRecognizer* speechRecognizer;
+@property(nonatomic, strong) SFSpeechAudioBufferRecognitionRequest* recognitionRequest;
+@property(nonatomic, strong) SFSpeechRecognitionTask* recognitionTask;
+@property(nonatomic, strong) AVAudioEngine* audioEngine;
+@property(nonatomic, strong) NSTimer* talkTimeoutTimer;
+@property(nonatomic, copy) NSString* text;
+// C++ members
+@property(nonatomic) speech::ISpeechRecognitionListener* listener;
+@property(nonatomic) CSpeechRecognitionDarwin* owner;
+@implementation SpeechRecognitionImpl
+- (void)startSpeechRecognition:(speech::ISpeechRecognitionListener*)listener
+ owner:(CSpeechRecognitionDarwin*)owner
+ self.listener = listener;
+ self.owner = owner;
+ // Get current Kodi GUI locale and use it for speech recognition.
+ std::string kodiLocale = g_langInfo.GetLocale().ToShortString();
+ std::replace(kodiLocale.begin(), kodiLocale.end(), '_', '-');
+ NSString* locale = @(kodiLocale.c_str());
+ self.speechRecognizer =
+ [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:locale]];
+ if (self.speechRecognizer == nil)
+ {
+ "Speech recognizer not available for user's current locale. Trying en-US");
+ self.speechRecognizer =
+ [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:@"en-US"]];
+ }
+ if (self.speechRecognizer == nil)
+ {
+ [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE
+ logMessage:@"Unable to create an SFSpeechRecognizer instance"];
+ return;
+ }
+ [self.speechRecognizer setDelegate:self];
+ self.audioEngine = [[AVAudioEngine alloc] init];
+ if (self.audioEngine == nil)
+ {
+ [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE
+ logMessage:@"Unable to create an AVAudioEngine instance"];
+ return;
+ }
+ [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus authStatus) {
+ switch (authStatus)
+ {
+ case SFSpeechRecognizerAuthorizationStatusAuthorized: // User gave access to speech recognition
+ break;
+ case SFSpeechRecognizerAuthorizationStatusDenied: // User denied access to speech recognition
+ case SFSpeechRecognizerAuthorizationStatusRestricted: // Speech recognition restricted on this device
+ case SFSpeechRecognizerAuthorizationStatusNotDetermined: // Speech recognition not yet authorized
+ default:
+ [self onError:speech::RecognitionError::INSUFFICIENT_PERMISSIONS
+ logMessage:@"Insufficient permissions"];
+ break;
+ }
+ }];
+ listener->OnReadyForSpeech();
+ self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
+ if (self.recognitionRequest == nil)
+ {
+ [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE
+ logMessage:@"Unable to create an SFSpeechAudioBufferRecognitionRequest instance"];
+ return;
+ }
+ self.recognitionRequest.shouldReportPartialResults = YES;
+ AVAudioNode* inputNode = [self.audioEngine inputNode];
+ if (inputNode == nil)
+ {
+ [self onError:speech::RecognitionError::AUDIO
+ logMessage:@"Audio engine instance has no input node"];
+ return;
+ }
+ [self.recognitionTask cancel];
+ self.recognitionTask = nil;
+ // stop recognition after 10 secs if the user did not start talking
+ self.talkTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:10.0
+ target:self
+ selector:@selector(onTalkTimeout:)
+ userInfo:nil
+ repeats:NO];
+ __typeof__(self) __weak welf = self;
+ self.recognitionTask = [self.speechRecognizer
+ recognitionTaskWithRequest:self.recognitionRequest
+ resultHandler:^(SFSpeechRecognitionResult* _Nullable result,
+ NSError* _Nullable error) {
+ __typeof__(self) sself = welf;
+ if (!sself) // the object (self) is dead; it makes no sense to continue
+ return;
+ BOOL isFinal = NO;
+ // reset talk timeout timer to fire 3 secs after user stopped talking
+ [sself.talkTimeoutTimer invalidate];
+ sself.talkTimeoutTimer =
+ [NSTimer scheduledTimerWithTimeInterval:3.0
+ target:sself
+ selector:@selector(onTalkTimeout:)
+ userInfo:nil
+ repeats:NO];
+ if (result != nil)
+ {
+ isFinal = result.isFinal;
+ sself.text = result.bestTranscription.formattedString;
+ listener->OnResults({[sself.text UTF8String]});
+ }
+ if (error == nil && !isFinal)
+ return;
+ [sself.audioEngine stop];
+ [inputNode removeTapOnBus:0];
+ if (error != nil && !isFinal)
+ {
+ int recognitionError = speech::RecognitionError::UNKNOWN;
+ if (sself.text == nil)
+ {
+ recognitionError = speech::RecognitionError::NO_MATCH;
+ }
+ else if ([error.domain isEqualToString:@"kLSRErrorDomain"])
+ {
+ switch (error.code)
+ {
+ case 102: // Assets are not installed
+ case 201: // Siri or Dictation is disabled
+ case 300: // Failed to initialize recognizer
+ recognitionError = speech::RecognitionError::SERVICE_NOT_AVAILABLE;
+ break;
+ case 301: // Request was canceled
+ break;
+ }
+ }
+ else if ([error.domain isEqualToString:@"kAFAssistantErrorDomain"])
+ {
+ switch (error.code)
+ {
+ case 1100: // Trying to start recognition while an earlier instance is still active
+ recognitionError = speech::RecognitionError::RECOGNIZER_BUSY;
+ break;
+ case 1110: // Failed to recognize any speech
+ recognitionError = speech::RecognitionError::NO_MATCH;
+ break;
+ case 1700: // Request is not authorized
+ recognitionError = speech::RecognitionError::INSUFFICIENT_PERMISSIONS;
+ break;
+ case 203: // Failure occurred during speech recognition
+ case 1101: // Connection to speech process was invalidated
+ case 1107: // Connection to speech process was interrupted
+ break;
+ }
+ }
+ NSString* logMsg =
+ [NSString stringWithFormat:@"code='%ld' description='%@'",
+ (long)error.code, error.localizedDescription];
+ [sself onError:recognitionError logMessage:logMsg];
+ }
+ else
+ {
+ owner->OnRecognitionDone(listener);
+ }
+ sself.recognitionRequest = nil;
+ sself.recognitionTask = nil;
+ sself.text = nil;
+ [sself.talkTimeoutTimer invalidate];
+ sself.talkTimeoutTimer = nil;
+ }];
+ [inputNode installTapOnBus:0
+ bufferSize:4096
+ format:[inputNode outputFormatForBus:0]
+ block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) {
+ [self.recognitionRequest appendAudioPCMBuffer:buffer];
+ }];
+ [self.audioEngine prepare];
+ NSError* outError;
+ [self.audioEngine startAndReturnError:&outError];
+ if (outError != nil)
+ {
+ [self onError:speech::RecognitionError::AUDIO
+ logMessage:[NSString stringWithFormat:
+ @"Audio engine couldn't start because of an error. code='%ld'",
+ outError.code]];
+ }
+- (void)speechRecognizer:(SFSpeechRecognizer*)speechRecognizer availabilityDidChange:(BOOL)available
+ if (available)
+ return;
+ [self.recognitionTask cancel];
+ self.recognitionTask = nil;
+ if (self.listener && self.owner)
+ {
+ [self onError:speech::RecognitionError::SERVICE_NOT_AVAILABLE
+ logMessage:@"Service currently not available. Try again later"];
+ }
+- (void)onTalkTimeout:(NSTimer*)timer
+ [self.recognitionRequest endAudio];
+- (void)onError:(int)recognitionError logMessage:(NSString*)logMessage
+ CLog::Log(LOGERROR, "Speech recognition error: {}", [logMessage UTF8String]);
+ self.listener->OnError(recognitionError);
+ self.owner->OnRecognitionDone(self.listener);
+std::shared_ptr<speech::ISpeechRecognition> speech::ISpeechRecognition::CreateInstance()
+ return std::make_shared<CSpeechRecognitionDarwin>();
+struct API_AVAILABLE(macos(10.15), ios(10.0)) API_UNAVAILABLE(tvos) SpeechRecognitionDarwinImpl
+ CCriticalSection m_listenersMutex;
+ std::vector<std::shared_ptr<speech::ISpeechRecognitionListener>> m_listeners;
+ SpeechRecognitionImpl* m_recognizer{nil};
+CSpeechRecognitionDarwin::CSpeechRecognitionDarwin() : m_impl(new SpeechRecognitionDarwinImpl)
+void CSpeechRecognitionDarwin::StartSpeechRecognition(
+ const std::shared_ptr<speech::ISpeechRecognitionListener>& listener)
+ // Speech: macOS 10.15+ iOS 10.0+. Currently not available on tvOS!
+ if (@available(macOS 10.15, iOS 10.0, *))
+ {
+ if (!m_impl->m_recognizer)
+ m_impl->m_recognizer = [[SpeechRecognitionImpl alloc] init];
+ if (m_impl->m_recognizer == nil)
+ {
+ CLog::LogF(LOGERROR, "Unable to create a SpeechRecognitionImpl instance");
+ return;
+ }
+ std::unique_lock<CCriticalSection> lock(m_impl->m_listenersMutex);
+ m_impl->m_listeners.emplace_back(
+ listener); // we need to ensure the listener lives as long as we do
+ lock.unlock();
+ [m_impl->m_recognizer startSpeechRecognition:listener.get() owner:this];
+ }
+ else
+ {
+ CLog::LogF(LOGERROR, "Operating system does not match the minimum required version");
+ listener->OnError(speech::RecognitionError::SERVICE_NOT_AVAILABLE);
+ }
+void CSpeechRecognitionDarwin::OnRecognitionDone(speech::ISpeechRecognitionListener* listener)
+ std::unique_lock<CCriticalSection> lock(m_impl->m_listenersMutex);
+ for (auto it = m_impl->m_listeners.begin(); it != m_impl->m_listeners.end(); ++it)
+ {
+ if ((*it).get() == listener)
+ {
+ m_impl->m_listeners.erase(it);
+ break;
+ }
+ }