diff --git a/ArchieCore/Commands/Command.py b/ArchieCore/Commands/Command.py index c7c6316..c0c4906 100644 --- a/ArchieCore/Commands/Command.py +++ b/ArchieCore/Commands/Command.py @@ -9,9 +9,9 @@ class Command(ABC): patterns: list[Pattern] start: Callable - def __init__(self, name, patterns = [], primary = True): + def __init__(self, name: str, patterns: list[str] = [], primary: bool = True): self._name = name - self._patterns = patterns + self._patterns = [Pattern(pattern) for pattern in patterns] self.primary = primary from .CommandsManager import CommandsManager diff --git a/ArchieCore/Commands/CommandsManager.py b/ArchieCore/Commands/CommandsManager.py index 57f5dfa..f3a006f 100644 --- a/ArchieCore/Commands/CommandsManager.py +++ b/ArchieCore/Commands/CommandsManager.py @@ -13,7 +13,7 @@ class SearchResult: class CommandsManager: allCommands: list[Command] = [] - QA: Command + QA: Command = None def __new__(cls): # Singleton if not hasattr(cls, 'instance'): @@ -21,15 +21,18 @@ class CommandsManager: return cls.instance def search(self, string: str, commands: list[Command]) -> list[SearchResult]: - string = string.lower() results: list[SearchResult] = [] - acstring = ACString(string) + acstring = ACString(string.lower()) # find command obj by pattern for command in commands: for pattern in command.patterns: - if groupdict := pattern.match(string): - parameters: dict[str: ACObject] = {'string': acstring,} + groupdict = pattern.match(acstring) + + if groupdict != None: + + parameters: dict[str: ACObject] = {'string': acstring} + for key, value in groupdict.items(): name, typeName = key.split(':') ACType: Type[ACObject] = CommandsManager.classFromString(typeName) @@ -40,7 +43,9 @@ class CommandsManager: results.append(SearchResult(command, parameters)) if results: return results - else: return [SearchResult(self.QA, {'string': acstring,}),] + elif qa := self.QA: return [SearchResult(qa, {'string': acstring,}),] + + return [] def append(self, command): if hasattr(self, command.name): diff --git a/ArchieCore/Pattern/Pattern.py b/ArchieCore/Pattern/Pattern.py index d098b5a..b8f1aca 100644 --- a/ArchieCore/Pattern/Pattern.py +++ b/ArchieCore/Pattern/Pattern.py @@ -21,7 +21,9 @@ class Pattern: # find and transform arguments like $name:Type argumentRegex = re.compile(r'\$[:word:]:[:word:]') - while match := re.search(argumentRegex, pattern)[0]: + reMatch = re.search(argumentRegex, pattern) + while reMatch: + match = reMatch.pop(0) arg: str = match[1:] argName, argTypeName = arg.split(':') argType: Type[ACObject] = classFromString(argTypeName) @@ -30,7 +32,7 @@ class Pattern: return re.compile(pattern) - def match(self, string: str) -> Optional[dict[str, str]]: - if match := re.search(self.compiled, string): + def match(self, string: ACString) -> Optional[dict[str, str]]: + if match := re.search(self.compiled, string.value): return match.groupdict() return None diff --git a/Controls/Control.py b/Controls/Control.py index b9881a1..7699a72 100644 --- a/Controls/Control.py +++ b/Controls/Control.py @@ -10,3 +10,7 @@ class Control(Singleton): @abstractmethod def start(self): pass + + @abstractmethod + def stop(self): + pass diff --git a/Controls/VoiceAssistant/VoiceAssistant.py b/Controls/VoiceAssistant/VoiceAssistant.py index c20174a..ed31a03 100644 --- a/Controls/VoiceAssistant/VoiceAssistant.py +++ b/Controls/VoiceAssistant/VoiceAssistant.py @@ -1,99 +1,96 @@ #!/usr/local/bin/python3.8 +from typing import Optional + import os -from ..Control import Control -from General import SpeechRecognition, Text2Speech -from ArchieCore import CommandsManager + import config +from ..Control import Control +from General import SpeechRecognizer, Text2Speech +from ArchieCore import CommandsManager, Command, Response, ResponseAction, ThreadData + +''' +TODO: async +self.check_threads() +self.report() +''' class VoiceAssistant(Control): commandsManager = CommandsManager() - listener = SpeechRecognition.SpeechToText() - voice = Text2Speech.Engine() - threads = [] - reports = [] - memory = [] - voids = 0 + speechRecognizer = SpeechRecognizer() + voice = Text2Speech.Engine() - lastClapTime = 0 - doubleClap = False + commandsContext: list[list[Command]] = [] + threads: list[ThreadData] = [] + reports: list[Response] = [] + memory: list[Response] = [] + + voids: int = 0 + lastClapTime: float = 0 + doubleClap: bool = False def __init__(self): pass def start(self): - self.listener.listen_noise() - os.system('clear') + self.commandsContext = [self.commandsManager.allCommands,] + self.speechRecognizer.didReceivePartialResult = lambda string: self.speechRecognizerReceivePartialResult(string) + self.speechRecognizer.didReceiveFinalResult = lambda string: self.speechRecognizerReceiveFinalResult(string) + self.speechRecognizer.startListening() - while True: - if self.voids >= 3: - self.voids = 0 - if config.double_clap_activation: - print('\nSleep (-_-)zzZZ\n') - sleep() + def stop(self): + self.speechRecognizer.stopListening() - print('\nYou: ', end='') - speech = self.listener.listen() - print(speech.get('text') or '', end='') + def speechRecognizerReceivePartialResult(self, result: str): + print(f'\rYou: \x1B[3m{result}\x1B[0m', end = '') - while True: - if speech['status'] == 'error': - break - if speech['status'] == 'void': - self.voids += 1 - break - text = speech['text'] + def speechRecognizerReceiveFinalResult(self, result: str): + print(f'\rYou: {result}') - for result in self.commandsManager.search(text, self.commandsManager.allCommands): - try: response = result.command.start(result.parameters) - except: break + currentContext = self.commandsContext[0] if self.commandsContext else None - self.reply(response) - self.check_threads() - self.report() + while self.commandsContext: + if searchResults := self.commandsManager.search(string = result, commands = currentContext): + for searchResult in searchResults: + commandResponse = searchResult.command.start(params = searchResult.parameters) + self.parse(commandResponse) - if response.callback: - speech = recognize(response.callback, {}) - else: - break + match commandResponse.action: + case ResponseAction.popContext: + self.commandsContext.pop(0) + case ResponseAction.popToRootContext: + self.commandsContext = [self.commandsManager.allCommands,] + break + case ResponseAction.sleep: + self.stopListening() + case ResponseAction.repeatLastAnswer: + if self.memory: + previousResponse = self.memory[-1] + self.reply(previousResponse) + break + else: + currentContext = self.commandsContext.pop(0) + else: + self.commandsContext.append(self.commandsManager.allCommands) - def recognize(self, callback, params): - print('\nYou: ', end='') - speech = self.listener.listen() - if speech['status'] in ['error', 'void']: - return speech - text = speech['text'] - print(text, end='') + def parse(self, response): + self.reply(response) + if response.thread: # add background thread to list + self.threads.append(response.thread) + if response.context: # insert context if exist + self.commandsContext.insert(0, response.context) + self.memory.append(response) - while True: - self.check_threads() - if not callback: break - - self.memory.insert(0, { - 'text': text, - 'cmd': cmd, - 'response': response, - }) - - speech = recognize(response.callback, params) - if callback.once: break - - return speech + def reply(self, response): + if response.text: # print answer + print('\nArchie: '+response.text) + if response.voice: # say answer + self.voice.generate(response.voice).speak() def report(self): for response in self.reports: - if response.voice: - self.voice.generate(response.voice).speak() - time.sleep(2) + self.reply(response) self.reports = [] - def reply(self, response): - if response.text: # print answer - print('\nArchie: '+response.text) - if response.voice: # say answer - self.voice.generate(response.voice).speak() - if response.thread: # add background thread to stack - self.threads.append(response.thread) - def check_threads(self): for thread in self.threads: if not thread['finish_event'].is_set(): continue diff --git a/Features/QA/QA.py b/Features/QA/QA.py index 585f44c..b8942ad 100644 --- a/Features/QA/QA.py +++ b/Features/QA/QA.py @@ -74,4 +74,5 @@ def qa_start(params): voice = text = search or random.choice(['Не совсем понимаю, о чём вы.', 'Вот эта последняя фраза мне не ясна.', 'А вот это не совсем понятно.', 'Можете сказать то же самое другими словами?', 'Вот сейчас я совсем вас не понимаю.', 'Попробуйте выразить свою мысль по-другому',]) return Response(text = text, voice = voice) -CommandsManager.QA = qa_start +CommandsManager().QA = qa_start +print(CommandsManager().QA, 'CommandsManager Sets QA') diff --git a/General/SpeechRecognition/SpeechRecognition.py b/General/SpeechRecognition/SpeechRecognition.py index e015972..542ef5d 100644 --- a/General/SpeechRecognition/SpeechRecognition.py +++ b/General/SpeechRecognition/SpeechRecognition.py @@ -1,46 +1,55 @@ -import speech_recognition as sr +from typing import Callable +import os, sys +import json +import queue + +import sounddevice +import vosk + import config -#r = sr.Recognizer() -#m = sr.Microphone(device_index=config.device_index) +vosk.SetLogLevel(-1) -class SpeechToText: - def __init__(self, device = config.device_index, language = config.language_code): - self.device = device - self.language = language - self.m = sr.Microphone(device_index = self.device) - self.r = sr.Recognizer() - self.r.pause_threshold = config.pause_threshold - self.r.energy_threshold = config.energy_threshold - self.r.dynamic_energy_threshold = config.dynamic_energy_threshold - self.r.non_speaking_duration = config.non_speaking_duration +class SpeechRecognizer: + didReceivePartialResult: Callable[[str], None] = lambda self, _: None + didReceiveFinalResult: Callable[[str], None] = lambda self, _: None - def listen(self): - try: - with self.m as source: - audio = self.r.listen(source) - except: - return '' - try: - responce = {'text': self.r.recognize_google(audio, language = self.language).lower(), 'status': 'ok'} - except sr.UnknownValueError: - responce = {'text': None, 'status': 'void'} - except sr.RequestError: - responce = {'text': None, 'status': 'error'} - return responce + _isListening = False - def recognize(self, speech): - with sr.AudioFile(speech.getPath()) as source: - audio = r.record(source) - try: - return r.recognize_google(audio) - except: - return '' + audioQueue = queue.Queue() + model = vosk.Model(config.vosk_model) - def listen_noise(self): - with self.m as source: - self.r.adjust_for_ambient_noise(source) + samplerate = int(sounddevice.query_devices(kind = 'input')['default_samplerate']) + blocksize = 8000 + dtype = 'int16' + channels = 1 + kaldiRecognizer = vosk.KaldiRecognizer(model, samplerate) - def set_device(self, index): - self.device = 1 - self.m = sr.Microphone(device_index = self.device) + def audioInputCallback(self, indata, frames, time, status): + self.audioQueue.put(bytes(indata)) + + def stopListening(self): + self._isListening = False + + def startListening(self): + self._isListening = True + + callback = lambda indata, frames, time, status: self.audioInputCallback(indata, frames, time, status) + kwargs = { + 'samplerate': self.samplerate, + 'blocksize': self.blocksize, + 'dtype': self.dtype, + 'channels': self.channels, + 'callback': callback + } + + with sounddevice.RawInputStream(**kwargs): + while self._isListening: + data = self.audioQueue.get() + + if self.kaldiRecognizer.AcceptWaveform(data): + result = json.loads(self.kaldiRecognizer.Result()) + self.didReceiveFinalResult(result['text']) + else: + result = json.loads(self.kaldiRecognizer.PartialResult()) + self.didReceivePartialResult(result['partial']) diff --git a/General/SpeechRecognition/__init__.py b/General/SpeechRecognition/__init__.py index 11ff965..d919a98 100644 --- a/General/SpeechRecognition/__init__.py +++ b/General/SpeechRecognition/__init__.py @@ -1 +1 @@ -from .SpeechRecognition import * +from .SpeechRecognition import SpeechRecognizer diff --git a/config.example.py b/config.example.py index 22963c0..da350aa 100644 --- a/config.example.py +++ b/config.example.py @@ -7,14 +7,7 @@ goole_tts_json_key = path+'google-cloud-text-to-speech-private-key.json' db_name = 'archie.db' -language_code = 'ru-RU' -device_index = 1 -voice_volume = 1 - -energy_threshold = 2000 -dynamic_energy_threshold = True -pause_threshold = 1 -non_speaking_duration = 1 +vosk_model = 'model-small-rus' # from alphacephei.com/vosk/models double_clap_activation = False diff --git a/dependences.txt b/dependences.txt index 31c92e8..5cc4ce0 100644 --- a/dependences.txt +++ b/dependences.txt @@ -1,11 +1,17 @@ Python 3.10 -pip install SpeechRecognition + +pip install sounddevice +pip install vosk +# download model from https://alphacephei.com/vosk/models + +pip install pyaudio # if instalation fails, try install from .whl (https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio) pip install google-cloud-texttospeech -pip install pygame + +pip install PyTelegramBotApi + pip install bs4 pip install wikipedia + pip install xlrd pip install xlwt pip install xlutils -pip install pyaudio # if instalation fails, try install from .whl (https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio) -pip install pip install PyTelegramBotApi diff --git a/model-small-rus/README b/model-small-rus/README new file mode 100644 index 0000000..f5d35ea --- /dev/null +++ b/model-small-rus/README @@ -0,0 +1,8 @@ +Small Russian model for Vosk (Android, RPi, other small devices) + +%WER 22.71 [ 9092 / 40042, 1124 ins, 1536 del, 6432 sub ] exp/chain_a/tdnn/decode_test_audiobooks_look_fast/wer_10_0.0 +%WER 11.79 [ 5940 / 50394, 894 ins, 832 del, 4214 sub ] exp/chain_a/tdnn/decode_test_golos_crowd_look_fast/wer_11_0.0 +%WER 21.34 [ 1789 / 8382, 173 ins, 440 del, 1176 sub ] exp/chain_a/tdnn/decode_test_golos_farfield_look_fast/wer_10_0.0 +%WER 29.89 [ 5579 / 18666, 476 ins, 1550 del, 3553 sub ] exp/chain_a/tdnn/decode_test_sova_devices_look_fast/wer_10_0.0 +%WER 31.97 [ 13588 / 42496, 1013 ins, 3640 del, 8935 sub ] exp/chain_a/tdnn/decode_test_youtube_look_fast/wer_9_0.0 + diff --git a/model-small-rus/am/final.mdl b/model-small-rus/am/final.mdl new file mode 100644 index 0000000..8978dba Binary files /dev/null and b/model-small-rus/am/final.mdl differ diff --git a/model-small-rus/conf/mfcc.conf b/model-small-rus/conf/mfcc.conf new file mode 100644 index 0000000..eaa40c5 --- /dev/null +++ b/model-small-rus/conf/mfcc.conf @@ -0,0 +1,7 @@ +--sample-frequency=16000 +--use-energy=false +--num-mel-bins=40 +--num-ceps=40 +--low-freq=20 +--high-freq=7600 +--allow-downsample=true diff --git a/model-small-rus/conf/model.conf b/model-small-rus/conf/model.conf new file mode 100644 index 0000000..64bc89e --- /dev/null +++ b/model-small-rus/conf/model.conf @@ -0,0 +1,10 @@ +--min-active=200 +--max-active=3000 +--beam=10.0 +--lattice-beam=2.0 +--acoustic-scale=1.0 +--frame-subsampling-factor=3 +--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10 +--endpoint.rule2.min-trailing-silence=0.5 +--endpoint.rule3.min-trailing-silence=1.0 +--endpoint.rule4.min-trailing-silence=2.0 diff --git a/model-small-rus/graph/Gr.fst b/model-small-rus/graph/Gr.fst new file mode 100644 index 0000000..3952cc6 Binary files /dev/null and b/model-small-rus/graph/Gr.fst differ diff --git a/model-small-rus/graph/HCLr.fst b/model-small-rus/graph/HCLr.fst new file mode 100644 index 0000000..373f12f Binary files /dev/null and b/model-small-rus/graph/HCLr.fst differ diff --git a/model-small-rus/graph/disambig_tid.int b/model-small-rus/graph/disambig_tid.int new file mode 100644 index 0000000..8294520 --- /dev/null +++ b/model-small-rus/graph/disambig_tid.int @@ -0,0 +1,5 @@ +9855 +9856 +9857 +9858 +9859 diff --git a/model-small-rus/graph/phones/word_boundary.int b/model-small-rus/graph/phones/word_boundary.int new file mode 100644 index 0000000..f4a3008 --- /dev/null +++ b/model-small-rus/graph/phones/word_boundary.int @@ -0,0 +1,202 @@ +1 nonword +2 begin +3 end +4 internal +5 singleton +6 nonword +7 begin +8 end +9 internal +10 singleton +11 begin +12 end +13 internal +14 singleton +15 begin +16 end +17 internal +18 singleton +19 begin +20 end +21 internal +22 singleton +23 begin +24 end +25 internal +26 singleton +27 begin +28 end +29 internal +30 singleton +31 begin +32 end +33 internal +34 singleton +35 begin +36 end +37 internal +38 singleton +39 begin +40 end +41 internal +42 singleton +43 begin +44 end +45 internal +46 singleton +47 begin +48 end +49 internal +50 singleton +51 begin +52 end +53 internal +54 singleton +55 begin +56 end +57 internal +58 singleton +59 begin +60 end +61 internal +62 singleton +63 begin +64 end +65 internal +66 singleton +67 begin +68 end +69 internal +70 singleton +71 begin +72 end +73 internal +74 singleton +75 begin +76 end +77 internal +78 singleton +79 begin +80 end +81 internal +82 singleton +83 begin +84 end +85 internal +86 singleton +87 begin +88 end +89 internal +90 singleton +91 begin +92 end +93 internal +94 singleton +95 begin +96 end +97 internal +98 singleton +99 begin +100 end +101 internal +102 singleton +103 begin +104 end +105 internal +106 singleton +107 begin +108 end +109 internal +110 singleton +111 begin +112 end +113 internal +114 singleton +115 begin +116 end +117 internal +118 singleton +119 begin +120 end +121 internal +122 singleton +123 begin +124 end +125 internal +126 singleton +127 begin +128 end +129 internal +130 singleton +131 begin +132 end +133 internal +134 singleton +135 begin +136 end +137 internal +138 singleton +139 begin +140 end +141 internal +142 singleton +143 begin +144 end +145 internal +146 singleton +147 begin +148 end +149 internal +150 singleton +151 begin +152 end +153 internal +154 singleton +155 begin +156 end +157 internal +158 singleton +159 begin +160 end +161 internal +162 singleton +163 begin +164 end +165 internal +166 singleton +167 begin +168 end +169 internal +170 singleton +171 begin +172 end +173 internal +174 singleton +175 begin +176 end +177 internal +178 singleton +179 begin +180 end +181 internal +182 singleton +183 begin +184 end +185 internal +186 singleton +187 begin +188 end +189 internal +190 singleton +191 begin +192 end +193 internal +194 singleton +195 begin +196 end +197 internal +198 singleton +199 begin +200 end +201 internal +202 singleton diff --git a/model-small-rus/ivector/final.dubm b/model-small-rus/ivector/final.dubm new file mode 100644 index 0000000..4166b20 Binary files /dev/null and b/model-small-rus/ivector/final.dubm differ diff --git a/model-small-rus/ivector/final.ie b/model-small-rus/ivector/final.ie new file mode 100644 index 0000000..f256f7a Binary files /dev/null and b/model-small-rus/ivector/final.ie differ diff --git a/model-small-rus/ivector/final.mat b/model-small-rus/ivector/final.mat new file mode 100644 index 0000000..a6dbb02 Binary files /dev/null and b/model-small-rus/ivector/final.mat differ diff --git a/model-small-rus/ivector/global_cmvn.stats b/model-small-rus/ivector/global_cmvn.stats new file mode 100644 index 0000000..4be171c --- /dev/null +++ b/model-small-rus/ivector/global_cmvn.stats @@ -0,0 +1,3 @@ + [ + 8.330133e+10 -4.600894e+09 -2.394861e+09 2.127165e+09 -9.355799e+09 -9.378007e+09 -1.302309e+10 -9.460417e+09 -9.260028e+09 -4.58608e+09 -5.287111e+09 -1.972033e+09 -6.090821e+09 -1.336419e+09 -5.214569e+09 -2.321841e+09 -3.889789e+09 -1.060202e+09 -2.065653e+09 -2.684904e+08 -7.4007e+08 -4587485 -1.315853e+08 -8597548 2.599227e+08 7.408538e+07 5.505751e+08 -1.161846e+07 5.138103e+08 -1.828159e+08 4.251498e+08 -2.901496e+07 6.469246e+08 2.489644e+08 6.289868e+08 2.490337e+08 3.38884e+08 -1.788837e+08 -2.536016e+08 -1.591728e+08 8.388078e+08 + 8.660994e+12 4.637783e+11 3.366465e+11 4.467952e+11 5.094759e+11 5.179353e+11 6.145244e+11 4.970492e+11 5.014889e+11 4.027981e+11 3.937422e+11 3.602942e+11 3.162307e+11 2.40687e+11 2.267307e+11 1.563018e+11 1.341105e+11 8.535779e+10 6.12398e+10 3.207774e+10 1.737325e+10 5.704115e+09 7.980573e+08 2.168777e+08 2.763352e+09 6.859176e+09 1.214891e+10 1.604714e+10 2.005353e+10 2.240119e+10 2.366007e+10 2.300222e+10 2.406182e+10 2.354406e+10 2.098983e+10 1.619869e+10 1.491578e+10 1.224871e+10 9.502735e+09 6.517532e+09 0 ] diff --git a/model-small-rus/ivector/online_cmvn.conf b/model-small-rus/ivector/online_cmvn.conf new file mode 100644 index 0000000..7748a4a --- /dev/null +++ b/model-small-rus/ivector/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/model-small-rus/ivector/splice.conf b/model-small-rus/ivector/splice.conf new file mode 100644 index 0000000..960cd2e --- /dev/null +++ b/model-small-rus/ivector/splice.conf @@ -0,0 +1,2 @@ +--left-context=3 +--right-context=3 diff --git a/start.py b/start.py index 96fa72b..0a95ad7 100644 --- a/start.py +++ b/start.py @@ -7,9 +7,9 @@ import Controls def main(): controls = [ Controls.VoiceAssistant(), - Controls.TelegramBot(), - Controls.RemoteControl(), - Controls.Django(), + #Controls.TelegramBot(), + #Controls.RemoteControl(), + #Controls.Django(), ] processes = []