1
0
mirror of https://github.com/janvarev/Irene-Voice-Assistant.git synced 2025-11-23 22:45:08 +02:00

11.1.0 Добавлен плагин нормализации prepare от @Grayen-mail, который умеет обрабатывать допсимволы и английский текст

Он требует библиотеку eng_to_ipa, которая была добавлена в проект.
Плагин нормализации prepare теперь установлен по умолчанию при настройки нормализации "default"
This commit is contained in:
Janvarev Vladislav
2025-05-17 13:41:23 +03:00
parent 1132e56df3
commit 4c58cbdb5a
14 changed files with 134273 additions and 3 deletions

View File

@@ -358,6 +358,8 @@ https://github.com/Oknolaz/vasisualy
AlphaCephei за прекрасную библиотеку распознавания Vosk ( https://alphacephei.com/vosk/index.ru ) AlphaCephei за прекрасную библиотеку распознавания Vosk ( https://alphacephei.com/vosk/index.ru )
## Поддержка проекта ## Поддержка проекта
Основная сложность в опенсорс - это не писать код. Писать код интересно. Основная сложность в опенсорс - это не писать код. Писать код интересно.

21
eng_to_ipa/LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Michael Phillips
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

6
eng_to_ipa/__init__.py Normal file
View File

@@ -0,0 +1,6 @@
from .transcribe import *
from .stress import *
from .rhymes import *
from .syllables import *
__all__ = ['transcribe', 'rhymes', 'stress', 'syllables']

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,39 @@
AA vowel
AE vowel
AH vowel
AO vowel
AW vowel
AY vowel
B stop
CH affricate
D stop
DH fricative
EH vowel
ER vowel
EY vowel
F fricative
G stop
HH aspirate
IH vowel
IY vowel
JH affricate
K stop
L liquid
M nasal
N nasal
NG nasal
OW vowel
OY vowel
P stop
R liquid
S fricative
SH fricative
T stop
TH fricative
UH vowel
UW vowel
V fricative
W semivowel
Y semivowel
Z fricative
ZH fricative

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
{"aa": "vowel", "ae": "vowel", "ah": "vowel", "q": "vowel", "+": "vowel", "ao": "vowel", "aw": "vowel", "ay": "vowel", "b": "stop", "ch": "affricate", "d": "stop", "dh": "fricative", "eh": "vowel", "er": "vowel", "ey": "vowel", "f": "fricative", "g": "stop", "hh": "aspirate", "ih": "vowel", "iy": "vowel", "jh": "affricate", "k": "stop", "l": "liquid", "m": "nasal", "n": "nasal", "ng": "nasal", "ow": "vowel", "oy": "vowel", "p": "stop", "r": "liquid", "s": "fricative", "sh": "fricative", "t": "stop", "th": "fricative", "uh": "vowel", "uw": "vowel", "v": "fricative", "w": "semivowel", "y": "semivowel", "z": "fricative", "zh": "fricative"}

42
eng_to_ipa/rhymes.py Normal file
View File

@@ -0,0 +1,42 @@
# Simple rhyming support. Call get_rhymes() on a word to find rhymes from the CMU dictionary.
from eng_to_ipa.transcribe import ModeType, get_cmu, preprocess
def remove_onset(word_in):
phone_list = get_cmu([word_in])[0][0].split(" ")
for i, phoneme in enumerate(phone_list):
if "1" in phoneme:
return ' '.join(phone_list[i:])
def get_rhymes(word, mode="sql"):
if len(word.split()) > 1:
return [get_rhymes(w) for w in word.split()]
phones = remove_onset(preprocess(word))
phones_full = get_cmu([preprocess(word)])[0][0]
asset = ModeType(mode=mode).mode
if mode == "sql":
asset.execute("SELECT word, phonemes FROM dictionary WHERE phonemes "
"LIKE \"%{0}\" AND NOT word=\"{1}\" ".format(phones, word) +
"AND NOT phonemes=\"{0}\"".format(phones_full))
# also don't return results that are the same but spelled differently
return sorted(list(set([r[0] for r in asset.fetchall()])))
elif mode == "json":
r_list = []
for key, val in asset.items():
for v in val:
if v.endswith(phones) and word != key and v != phones_full:
r_list.append(key)
return sorted(set(r_list))
def jhymes(word):
"""Get rhymes with forced JSON mode."""
return get_rhymes(word, mode="json")
if __name__ == "__main__":
test = "orange"
rhymes = get_rhymes(test)
for rhyme in rhymes:
print(rhyme)

114
eng_to_ipa/stress.py Normal file
View File

@@ -0,0 +1,114 @@
import os
import re
import json
import eng_to_ipa.syllables as syllables
import logging
def create_phones_json():
"""Creates the phones.json file in the resources directory from the phones.txt source file from CMU"""
phones_dict = {}
with open(os.path.join(os.path.abspath(os.path.dirname(__file__)),
'resources', 'CMU_source_files', 'cmudict-0.7b.phones.txt'), encoding="UTF-8") as phones_txt:
# source link: http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones
for line in phones_txt.readlines():
phones_dict[line.split(" ")[0].lower()] = line.split(" ")[1].replace("\n", "")
with open(os.path.join(os.path.abspath(os.path.dirname(__file__)),
'resources', 'phones.json'), "w") as phones_json:
json.dump(phones_dict, phones_json)
def stress_type(stress):
"""Determine the kind of stress that should be evaluated"""
stress = stress.lower()
default = {"1": "ˈ", "2": "ˌ"}
if stress == "primary":
return {"1": "ˈ"}
elif stress == "secondary":
return {"2": "ˌ"}
elif stress == "both" or stress == "all":
return default
elif stress.lower() == "none" or not stress:
return {}
else:
logging.warning("WARNING: stress type parameter " + stress + " not recognized.")
# Use default stress
return default
with open(os.path.join(os.path.abspath(os.path.dirname(__file__)),
'resources', 'phones.json'), "r") as phones_json:
phones = json.load(phones_json)
def find_stress(word, type="all"):
"""Convert stress marking numbers from CMU into actual stress markings
:param word: the CMU word string to be evaluated for stress markings
:param type: type of stress to be evaluated (primary, secondary, or both)"""
syll_count = syllables.cmu_syllable_count(word)
if (not word.startswith("__IGNORE__")) and syll_count > 1:
symbols = word.split(' ')
stress_map = stress_type(type)
new_word = []
clusters = ["sp", "st", "sk", "fr", "fl"]
stop_set = ["nasal", "fricative", "vowel"] # stop searching for where stress starts if these are encountered
# for each CMU symbol
for c in symbols:
# if the last character is a 1 or 2 (that means it has stress, and we want to evaluate it)
if c[-1] in stress_map.keys():
# if the new_word list is empty
if not new_word:
# append to new_word the CMU symbol, replacing numbers with stress marks
new_word.append(re.sub(r"\d", "", stress_map[re.findall(r"\d", c)[0]] + c))
else:
stress_mark = stress_map[c[-1]]
placed = False
hiatus = False
new_word = new_word[::-1] # flip the word and backtrack through symbols
for i, sym in enumerate(new_word):
sym = re.sub(r"[0-9ˈˌ]", "", sym)
prev_sym = re.sub(r"[0-9ˈˌ]", "", new_word[i-1])
prev_phone = phones[re.sub(r"[0-9ˈˌ]", "", new_word[i-1])]
if phones[sym] in stop_set or (i > 0 and prev_phone == "stop") or sym in ["er", "w", "j"]:
if sym + prev_sym in clusters:
new_word[i] = stress_mark + new_word[i]
elif not prev_phone == "vowel" and i > 0:
new_word[i-1] = stress_mark + new_word[i-1]
else:
if phones[sym] == "vowel":
hiatus = True
new_word = [stress_mark + re.sub(r"[0-9ˈˌ]", "", c)] + new_word
else:
new_word[i] = stress_mark + new_word[i]
placed = True
break
if not placed:
if new_word:
new_word[len(new_word) - 1] = stress_mark + new_word[len(new_word) - 1]
new_word = new_word[::-1]
if not hiatus:
new_word.append(re.sub(r"\d", "", c))
else:
if c.startswith("__IGNORE__"):
new_word.append(c)
else:
new_word.append(re.sub(r"\d", "", c))
return ' '.join(new_word)
else:
if word.startswith("__IGNORE__"):
return word
else:
return re.sub(r"[0-9]", "", word)
if __name__ == "__main__":
# create phones dictionary from source if not found in the resources directory
if not os.path.isfile(os.path.join(
os.path.abspath(os.path.dirname(__file__)),
'resources', 'phones.json')):
create_phones_json()

38
eng_to_ipa/syllables.py Normal file
View File

@@ -0,0 +1,38 @@
import re
import os
import json
from eng_to_ipa import transcribe
with open(os.path.join(os.path.abspath(os.path.dirname(__file__)),
'resources', 'phones.json'), "r", encoding="UTF-8") as phones_json:
PHONES = json.load(phones_json)
# list of adjacent vowel symbols that constitute separate nuclei
hiatus = [["er", "iy"], ["iy", "ow"], ["uw", "ow"], ["iy", "ah"], ["iy", "ey"], ["uw", "eh"], ["er", "eh"]]
def cmu_syllable_count(word):
"""count syllables based on CMU transcription"""
word = re.sub(r"\d", "", word).split(' ')
if "__IGNORE__" in word[0]:
return 0
else:
nuclei = 0
for i, sym in enumerate(word):
prev_phone = PHONES[word[i-1]]
prev_sym = word[i-1]
if PHONES[sym] == 'vowel':
if i > 0 and not prev_phone == 'vowel' or i == 0:
nuclei += 1
elif [prev_sym, sym] in hiatus:
nuclei += 1
return nuclei
def syllable_count(word: str, db_type="sql"):
"""transcribes a regular word to CMU to fetch syllable count"""
if len(word.split()) > 1:
return [syllable_count(w) for w in word.split()]
word = transcribe.get_cmu([transcribe.preprocess(word)], db_type=db_type)
return cmu_syllable_count(word[0][0])

225
eng_to_ipa/transcribe.py Normal file
View File

@@ -0,0 +1,225 @@
# -*- coding: utf-8 -*-
import re
from os.path import join, abspath, dirname
import eng_to_ipa.stress as stress
from collections import defaultdict
class ModeType(object):
def __init__(self, mode):
self.name = mode
if mode.lower() == "sql":
import sqlite3
conn = sqlite3.connect(join(abspath(dirname(__file__)),
"./resources/CMU_dict.db"))
self.mode = conn.cursor()
elif mode.lower() == "json":
import json
json_file = open(join(abspath(dirname(__file__)),
"../eng_to_ipa/resources/CMU_dict.json"),
encoding="UTF-8")
self.mode = json.load(json_file)
def __str__(self):
return self.name
def preprocess(words):
"""Returns a string of words stripped of punctuation"""
punct_str = '!"#$%&\'()*+,-./:;<=>/?@[\\]^_`{|}~«» '
return ' '.join([w.strip(punct_str).lower() for w in words.split()])
def preserve_punc(words):
"""converts words to IPA and finds punctuation before and after the word."""
words_preserved = []
for w in words.split():
punct_list = ["", preprocess(w), ""]
before = re.search(r"^([^A-Za-z0-9]+)[A-Za-z]", w)
after = re.search(r"[A-Za-z]([^A-Za-z0-9]+)$", w)
if before:
punct_list[0] = str(before.group(1))
if after:
punct_list[2] = str(after.group(1))
words_preserved.append(punct_list)
return words_preserved
def apply_punct(triple, as_str=False):
"""places surrounding punctuation back on center on a list of preserve_punc triples"""
if type(triple[0]) == list:
for i, t in enumerate(triple):
triple[i] = str(''.join(triple[i]))
if as_str:
return ' '.join(triple)
return triple
if as_str:
return str(''.join(t for t in triple))
return [''.join(t for t in triple)]
def _punct_replace_word(original, transcription):
"""Get the IPA transcription of word with the original punctuation marks"""
for i, trans_list in enumerate(transcription):
for j, item in enumerate(trans_list):
triple = [original[i][0]] + [item] + [original[i][2]]
transcription[i][j] = apply_punct(triple, as_str=True)
return transcription
def fetch_words(words_in, db_type="sql"):
"""fetches a list of words from the database"""
asset = ModeType(mode=db_type).mode
if db_type.lower() == "sql":
quest = "?, " * len(words_in)
asset.execute("SELECT word, phonemes FROM dictionary "
"WHERE word IN ({0})".format(quest[:-2]), words_in)
result = asset.fetchall()
d = defaultdict(list)
for k, v in result:
d[k].append(v)
return list(d.items())
if db_type.lower() == "json":
words = []
for k, v in asset.items():
if k in words_in:
words.append((k, v))
return words
def get_cmu(tokens_in, db_type="sql"):
"""query the SQL database for the words and return the phonemes in the order of user_in"""
result = fetch_words(tokens_in, db_type)
ordered = []
for word in tokens_in:
this_word = [[i[1] for i in result if i[0] == word]][0]
if this_word:
ordered.append(this_word[0])
else:
ordered.append(["__IGNORE__" + word])
return ordered
def cmu_to_ipa(cmu_list, mark=True, stress_marking='all'):
"""converts the CMU word lists into IPA transcriptions"""
# cmu_list = [[x[0].replace("ah1", "q1").replace("ah0", "+0")] for x in cmu_list]
for i in range(0, len(cmu_list)):
for j in range(0, len(cmu_list[i])):
cmu_list[i][j] = cmu_list[i][j].replace("ah1", "q1").replace("ah0", "+0")
symbols = {"a": "ə", "ey": "", "aa": "ɑ", "ae": "æ", "+": "ə", "ao": "ɔ", "q": "ʌ",
"aw": "", "ay": "", "ch": "ʧ", "dh": "ð", "eh": "ɛ", "er": "ər",
"hh": "h", "ih": "ɪ", "jh": "ʤ", "ng": "ŋ", "ow": "", "oy": "ɔɪ",
"sh": "ʃ", "th": "θ", "uh": "ʊ", "uw": "u", "zh": "ʒ", "iy": "i", "y": "j"}
final_list = [] # the final list of IPA tokens to be returned
for word_list in cmu_list:
ipa_word_list = [] # the word list for each word
for word in word_list:
if stress_marking:
word = stress.find_stress(word, type=stress_marking)
else:
if re.sub(r"\d*", "", word.replace("__IGNORE__", "")) == "":
pass # do not delete token if it's all numbers
else:
word = re.sub("[0-9]", "", word)
ipa_form = ''
if word.startswith("__IGNORE__"):
ipa_form = word.replace("__IGNORE__", "")
# mark words we couldn't transliterate with an asterisk:
if mark:
if not re.sub(r"\d*", "", ipa_form) == "":
ipa_form += "*"
else:
for piece in word.split(" "):
marked = False
unmarked = piece
if piece[0] in ["ˈ", "ˌ"]:
marked = True
mark = piece[0]
unmarked = piece[1:]
if unmarked in symbols:
if marked:
ipa_form += mark + symbols[unmarked]
else:
ipa_form += symbols[unmarked]
else:
ipa_form += piece
swap_list = [["ˈər", "əˈr"], ["ˈie", "iˈe"]]
for sym in swap_list:
if not ipa_form.startswith(sym[0]):
ipa_form = ipa_form.replace(sym[0], sym[1])
ipa_word_list.append(ipa_form)
final_list.append(list(ipa_word_list))
return final_list
def get_top(ipa_list):
"""Returns only the one result for a query. If multiple entries for words are found, only the first is used."""
return ' '.join([word_list[0] for word_list in ipa_list])
def get_all(ipa_list):
"""utilizes an algorithm to discover and return all possible combinations of IPA transcriptions"""
final_size = 1
for word_list in ipa_list:
final_size *= len(word_list)
list_all = ["" for s in range(final_size)]
for i in range(len(ipa_list)):
if i == 0:
swtich_rate = final_size / len(ipa_list[i])
else:
swtich_rate /= len(ipa_list[i])
k = 0
for j in range(final_size):
if (j+1) % int(swtich_rate) == 0:
k += 1
if k == len(ipa_list[i]):
k = 0
list_all[j] = list_all[j] + ipa_list[i][k] + " "
return sorted([sent[:-1] for sent in list_all])
def ipa_list(words_in, keep_punct=True, stress_marks='both', db_type="sql"):
"""Returns a list of all the discovered IPA transcriptions for each word."""
words = [preserve_punc(w.lower())[0] for w in words_in.split()] \
if type(words_in) == str else [preserve_punc(w.lower())[0] for w in words_in]
cmu = get_cmu([w[1] for w in words], db_type=db_type)
ipa = cmu_to_ipa(cmu, stress_marking=stress_marks)
if keep_punct:
ipa = _punct_replace_word(words, ipa)
return ipa
def isin_cmu(word, db_type="sql"):
"""checks if a word is in the CMU dictionary. Doesn't strip punctuation.
If given more than one word, returns True only if all words are present."""
if type(word) == str:
word = [preprocess(w) for w in word.split()]
results = fetch_words(word, db_type)
as_set = list(set(t[0] for t in results))
return len(as_set) == len(set(word))
def contains(ipa, db_type="sql"):
"""Get any words that contain the IPA string. Returns the word and the IPA as a list."""
asset = ModeType(mode=db_type).mode
if db_type.lower() == "sql":
asset.execute("SELECT word, ipa FROM eng_ipa WHERE "
"REPLACE(REPLACE(ipa, 'ˌ', ''), 'ˈ', '') "
"LIKE \"%{}%\"".format(str(ipa)))
return [list(res) for res in asset.fetchall()]
def convert(text, retrieve_all=False, keep_punct=True, stress_marks='both', mode="sql"):
"""takes either a string or list of English words and converts them to IPA"""
ipa = ipa_list(words_in=text, keep_punct=keep_punct,
stress_marks=stress_marks, db_type=mode)
return get_all(ipa) if retrieve_all else get_top(ipa)
def jonvert(text, retrieve_all=False, keep_punct=True, stress_marks='both'):
"""Forces use of JSON database for fetching phoneme data."""
return convert(text, retrieve_all, keep_punct, stress_marks, mode="json")

View File

@@ -7,7 +7,7 @@ from vacore import VACore
def start(core:VACore): def start(core:VACore):
manifest = { manifest = {
"name": "Core plugin", "name": "Core plugin",
"version": "4.4", "version": "4.5",
"description": "Плагин с основными настройками Ирины.\nПосмотрите другие плагины, чтобы понять, какие команды можно использовать.", "description": "Плагин с основными настройками Ирины.\nПосмотрите другие плагины, чтобы понять, какие команды можно использовать.",
"options_label": { "options_label": {
@@ -82,7 +82,7 @@ def start(core:VACore):
"log_file_level": "DEBUG", # NOTSET | DEBUG | INFO | WARNING | ERROR | CRITICAL "log_file_level": "DEBUG", # NOTSET | DEBUG | INFO | WARNING | ERROR | CRITICAL
"log_file_name": "log.txt", # имя лог-файла "log_file_name": "log.txt", # имя лог-файла
"normalization_engine": "numbers", # нормализация текста для русских TTS. "normalization_engine": "default", # нормализация текста для русских TTS.
# Добавляется плагинами. Рекомендуется runorm для качества (но runorm тяжела в обработке) # Добавляется плагинами. Рекомендуется runorm для качества (но runorm тяжела в обработке)
}, },
@@ -128,6 +128,8 @@ def start_with_options(core:VACore, manifest:dict):
lingua_franca.load_language(options["linguaFrancaLang"]) lingua_franca.load_language(options["linguaFrancaLang"])
core.normalization_engine = options["normalization_engine"] core.normalization_engine = options["normalization_engine"]
if core.normalization_engine == "default":
core.normalization_engine = "prepare"
# Логирование # Логирование
core.log_console = options["log_console"] core.log_console = options["log_console"]

View File

@@ -12,7 +12,7 @@ from jaa import JaaCore
from collections.abc import Callable from collections.abc import Callable
version = "11.0.0" version = "11.1.0"
import logging import logging