From 2aab85f073e19ab12306acc30c2a3754441b5084 Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Mon, 3 Jul 2023 12:25:50 +0100 Subject: [PATCH] Android: Allow configuring voice typing model URL --- .../components/screens/ConfigScreen.tsx | 13 +++++---- .../services/voiceTyping/vosk.android.ts | 27 ++++++++++++++----- readme/privacy.md | 1 + readme/spec/voice_typing.md | 17 ++++++++++++ readme/welcome/5_privacy.md | 1 + 5 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 readme/spec/voice_typing.md diff --git a/packages/app-mobile/components/screens/ConfigScreen.tsx b/packages/app-mobile/components/screens/ConfigScreen.tsx index 0ba494e55..0108a61c4 100644 --- a/packages/app-mobile/components/screens/ConfigScreen.tsx +++ b/packages/app-mobile/components/screens/ConfigScreen.tsx @@ -600,11 +600,14 @@ class ConfigScreenComponent extends BaseScreenComponent { ); } return ( - - - {md.label()} - - void updateSettingValue(key, value)} secureTextEntry={!!md.secure} /> + + + + {md.label()} + + void updateSettingValue(key, value)} secureTextEntry={!!md.secure} /> + + {descriptionComp} ); } else { diff --git a/packages/app-mobile/services/voiceTyping/vosk.android.ts b/packages/app-mobile/services/voiceTyping/vosk.android.ts index a0b911c16..32b284f68 100644 --- a/packages/app-mobile/services/voiceTyping/vosk.android.ts +++ b/packages/app-mobile/services/voiceTyping/vosk.android.ts @@ -1,5 +1,7 @@ import { languageCodeOnly } from '@joplin/lib/locale'; import Logger from '@joplin/lib/Logger'; +import Setting from '@joplin/lib/models/Setting'; +import { rtrimSlashes } from '@joplin/lib/path-utils'; import shim from '@joplin/lib/shim'; import Vosk from 'react-native-vosk'; import { unzip } from 'react-native-zip-archive'; @@ -31,9 +33,9 @@ export interface Recorder { cleanup: ()=> void; } -const supportedLanguages = { +const defaultSupportedLanguages = { 'en': 'https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip', - 'cn': 'https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip', + 'zh': 'https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip', 'ru': 'https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip', 'fr': 'https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip', 'de': 'https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip', @@ -54,7 +56,7 @@ const supportedLanguages = { export const isSupportedLanguage = (locale: string) => { const l = languageCodeOnly(locale).toLowerCase(); - return Object.keys(supportedLanguages).includes(l); + return Object.keys(defaultSupportedLanguages).includes(l); }; // Where all the models files for all the languages are @@ -73,9 +75,18 @@ const getModelDir = (locale: string) => { }; const languageModelUrl = (locale: string) => { - const l = languageCodeOnly(locale).toLowerCase(); - if (!(l in supportedLanguages)) throw new Error(`No language file for: ${locale}`); - return (supportedLanguages as any)[l]; + const lang = languageCodeOnly(locale).toLowerCase(); + if (!(lang in defaultSupportedLanguages)) throw new Error(`No language file for: ${locale}`); + + const urlTemplate = rtrimSlashes(Setting.value('voiceTypingBaseUrl').trim()); + + if (urlTemplate) { + let url = rtrimSlashes(urlTemplate); + if (!url.includes('{lang}')) url += '/{lang}.zip'; + return url.replace(/\{lang\}/g, lang); + } else { + return (defaultSupportedLanguages as any)[lang]; + } }; export const modelIsDownloaded = async (locale: string) => { @@ -114,10 +125,12 @@ const downloadModel = async (locale: string) => { logger.info(`Downloading model from: ${modelUrl}`); - await shim.fetchBlob(languageModelUrl(locale), { + const response = await shim.fetchBlob(modelUrl, { path: zipFilePath, }); + if (!response.ok || response.status >= 400) throw new Error(`Could not download from ${modelUrl}: Error ${response.status}`); + logger.info(`Unzipping ${zipFilePath} => ${unzipDir}`); await unzip(zipFilePath, unzipDir); diff --git a/readme/privacy.md b/readme/privacy.md index 93edf3e97..862659640 100644 --- a/readme/privacy.md +++ b/readme/privacy.md @@ -14,6 +14,7 @@ In order to provide certain features, Joplin may need to connect to third-party | Wifi connection check | On mobile, Joplin checks for Wifi connectivity to give the option to synchronise data only when Wifi is enabled. | Enabled | No (1) | | Spellchecker dictionary | On Linux and Windows, the desktop application downloads the spellchecker dictionary from `redirector.gvt1.com`. | Enabled | Yes (2) | | Plugin repository | The desktop application downloads the list of available plugins from the [official GitHub repository](https://github.com/joplin/plugins). If this repository is not accessible (eg. in China) the app will try to get the plugin list from [various mirrors](https://github.com/laurent22/joplin/blob/8ac6017c02017b6efd59f5fcab7e0b07f8d44164/packages/lib/services/plugins/RepositoryApi.ts#L22), in which case the plugin screen [works slightly differently](https://github.com/laurent22/joplin/issues/5161#issuecomment-925226975). | Enabled | No +| Voice typing | If you use the voice typing feature on Android, the application will download the language files from https://alphacephei.com/vosk/models | Disabled | Yes (1) https://github.com/laurent22/joplin/issues/5705
(2) If the spellchecker is disabled, [it will not download the dictionary](https://discourse.joplinapp.org/t/new-version-of-joplin-contacting-google-servers-on-startup/23000/40?u=laurent). diff --git a/readme/spec/voice_typing.md b/readme/spec/voice_typing.md new file mode 100644 index 000000000..79fd3bf6b --- /dev/null +++ b/readme/spec/voice_typing.md @@ -0,0 +1,17 @@ +# Voice typing + +The Android mobile application supports built-in, offline voice typing via the [Vosk library](https://alphacephei.com/vosk/). Vosk is a speech recognition toolkit that can work on lightweight devices, such as mobile phones. + +## Language models + +Vosk uses pre-trained language models that can be used for automatic speech recognition tasks. These models are trained on large amounts of speech data to convert spoken language into written text. Multiple language models are available per language - lightweight ones, which are suitable for mobile (about 50 MB per model), and large ones which are designed for server-side speech recognition (2 GB+ per model). + +## Downloading the language models + +By default Joplin will automatically download the [lightweight models](https://alphacephei.com/vosk/models) from the official Vosk website. That language file only needs to be downloaded the first time the voice typing feature is used. + +You can also configure the application to download the models from your own server. To do so, set the **Voice typing language files (URL)** setting in the [Configuration screen](https://github.com/laurent22/joplin/blob/dev/readme/config_screen.md). You have two options: + +* **Provide the base URL**, eg `https://example.com/models`. Then Joplin will automatically append the filename to that URL, for example it will download the French files from `https://example.com/models/fr.zip` + +* **Provide a URL template**. In that case, include a `{lang}` variable, which will be expanded to the language code. For example, if the URL is set to `https://example.com/models/vosk-model-{lang}.zip`, the app will download the French file from `https://example.com/models/vosk-model-fr.zip`. With this option you have more flexibility on where the app should get the file from. For example you can also use query parameters, as in `https://example.com/models/vosk-models.php?lang={lang}&download=true` \ No newline at end of file diff --git a/readme/welcome/5_privacy.md b/readme/welcome/5_privacy.md index 93edf3e97..862659640 100644 --- a/readme/welcome/5_privacy.md +++ b/readme/welcome/5_privacy.md @@ -14,6 +14,7 @@ In order to provide certain features, Joplin may need to connect to third-party | Wifi connection check | On mobile, Joplin checks for Wifi connectivity to give the option to synchronise data only when Wifi is enabled. | Enabled | No (1) | | Spellchecker dictionary | On Linux and Windows, the desktop application downloads the spellchecker dictionary from `redirector.gvt1.com`. | Enabled | Yes (2) | | Plugin repository | The desktop application downloads the list of available plugins from the [official GitHub repository](https://github.com/joplin/plugins). If this repository is not accessible (eg. in China) the app will try to get the plugin list from [various mirrors](https://github.com/laurent22/joplin/blob/8ac6017c02017b6efd59f5fcab7e0b07f8d44164/packages/lib/services/plugins/RepositoryApi.ts#L22), in which case the plugin screen [works slightly differently](https://github.com/laurent22/joplin/issues/5161#issuecomment-925226975). | Enabled | No +| Voice typing | If you use the voice typing feature on Android, the application will download the language files from https://alphacephei.com/vosk/models | Disabled | Yes (1) https://github.com/laurent22/joplin/issues/5705
(2) If the spellchecker is disabled, [it will not download the dictionary](https://discourse.joplinapp.org/t/new-version-of-joplin-contacting-google-servers-on-startup/23000/40?u=laurent).