Android: Switch default library used for Whisper voice typing (#11881)

2025-10-31 00:07:48 +02:00 · 2025-02-27 10:31:13 -08:00
parent 502c929c88
commit 7f51712311
528 changed files with 175296 additions and 2232 deletions
--- a/.eslintignore
+++ b/.eslintignore
@@ -811,12 +811,11 @@ packages/app-mobile/services/e2ee/crypto.js
 packages/app-mobile/services/plugins/PlatformImplementation.js
 packages/app-mobile/services/profiles/index.js
 packages/app-mobile/services/voiceTyping/VoiceTyping.js
-packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js
-packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js
 packages/app-mobile/services/voiceTyping/utils/unzip.android.js
 packages/app-mobile/services/voiceTyping/utils/unzip.js
 packages/app-mobile/services/voiceTyping/vosk.android.js
 packages/app-mobile/services/voiceTyping/vosk.js
+packages/app-mobile/services/voiceTyping/whisper.test.js
 packages/app-mobile/services/voiceTyping/whisper.js
 packages/app-mobile/setupQuickActions.js
 packages/app-mobile/tools/buildInjectedJs/BundledFile.js
--- a/.gitignore
+++ b/.gitignore
@@ -786,12 +786,11 @@ packages/app-mobile/services/e2ee/crypto.js
 packages/app-mobile/services/plugins/PlatformImplementation.js
 packages/app-mobile/services/profiles/index.js
 packages/app-mobile/services/voiceTyping/VoiceTyping.js
-packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js
-packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js
 packages/app-mobile/services/voiceTyping/utils/unzip.android.js
 packages/app-mobile/services/voiceTyping/utils/unzip.js
 packages/app-mobile/services/voiceTyping/vosk.android.js
 packages/app-mobile/services/voiceTyping/vosk.js
+packages/app-mobile/services/voiceTyping/whisper.test.js
 packages/app-mobile/services/voiceTyping/whisper.js
 packages/app-mobile/setupQuickActions.js
 packages/app-mobile/tools/buildInjectedJs/BundledFile.js
--- a/cspell.json
+++ b/cspell.json
@@ -33,6 +33,7 @@
 		"/packages/app-desktop/build/",
 		"/packages/app-desktop/utils/checkForUpdatesUtilsTestData.ts",
 		"/packages/app-desktop/vendor/",
+		"/packages/app-mobile/android/vendor/",
 		"/packages/app-mobile/ios/Pods/",
 		"/packages/app-mobile/lib/rnInjectedJs",
 		"/packages/app-mobile/pluginAssets",
--- a/packages/app-mobile/android/app/build.gradle
+++ b/packages/app-mobile/android/app/build.gradle
@@ -70,6 +70,13 @@ def enableProguardInReleaseBuilds = false
 def jscFlavor = 'org.webkit:android-jsc:+'

 android {
+
+    externalNativeBuild {
+        cmake {
+            path file('src/main/cpp/CMakeLists.txt')
+            version '3.22.1'
+        }
+    }
    ndkVersion rootProject.ext.ndkVersion
    buildToolsVersion rootProject.ext.buildToolsVersion
    compileSdk rootProject.ext.compileSdkVersion
@@ -81,12 +88,17 @@ android {
        targetSdkVersion rootProject.ext.targetSdkVersion
 		versionCode 2097764
 		versionName "3.3.1"
-		ndk {
-			abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64"
-		}
+        ndk {
+            abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64"
+        }

        // Needed to fix: The number of method references in a .dex file cannot exceed 64K
        multiDexEnabled true
+        externalNativeBuild {
+            cmake {
+                cppFlags '-DCMAKE_BUILD_TYPE=Release'
+            }
+        }
    }
    signingConfigs {
        debug {
@@ -95,14 +107,14 @@ android {
            keyAlias 'androiddebugkey'
            keyPassword 'android'
        }
-		release {
-			if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) {
-				storeFile file(JOPLIN_RELEASE_STORE_FILE)
-				storePassword JOPLIN_RELEASE_STORE_PASSWORD
-				keyAlias JOPLIN_RELEASE_KEY_ALIAS
-				keyPassword JOPLIN_RELEASE_KEY_PASSWORD
-			}
-		}
+        release {
+            if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) {
+                storeFile file(JOPLIN_RELEASE_STORE_FILE)
+                storePassword JOPLIN_RELEASE_STORE_PASSWORD
+                keyAlias JOPLIN_RELEASE_KEY_ALIAS
+                keyPassword JOPLIN_RELEASE_KEY_PASSWORD
+            }
+        }
    }
    buildTypes {
        debug {
@@ -127,10 +139,6 @@ dependencies {
    } else {
        implementation jscFlavor
    }
-
-    // Needed for Whisper speech-to-text
-    implementation 'com.microsoft.onnxruntime:onnxruntime-android:latest.release'
-    implementation 'com.microsoft.onnxruntime:onnxruntime-extensions-android:latest.release'
 }

 apply from: file("../../node_modules/@react-native-community/cli-platform-android/native_modules.gradle"); applyNativeModulesAppBuildGradle(project)
--- a/packages/app-mobile/android/app/src/main/cpp/CMakeLists.txt
+++ b/packages/app-mobile/android/app/src/main/cpp/CMakeLists.txt
@@ -0,0 +1,64 @@
+
+# For more information about using CMake with Android Studio, read the
+# documentation: https://d.android.com/studio/projects/add-native-code.html.
+# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
+
+# Sets the minimum CMake version required for this project.
+cmake_minimum_required(VERSION 3.22.1)
+
+# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
+# Since this is the top level CMakeLists.txt, the project name is also accessible
+# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
+# build script scope).
+project("joplin")
+
+# Creates and names a library, sets it as either STATIC
+# or SHARED, and provides the relative paths to its source code.
+# You can define multiple libraries, and CMake builds them for you.
+# Gradle automatically packages shared libraries with your APK.
+#
+# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
+# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
+# is preferred for the same purpose.
+#
+# In order to load a library into your app from Java/Kotlin, you must call
+# System.loadLibrary() and pass the name of the library defined here;
+# for GameActivity/NativeActivity derived applications, the same library name must be
+# used in the AndroidManifest.xml file.
+add_library(${CMAKE_PROJECT_NAME} SHARED
+	# List C/C++ source files with relative paths to this CMakeLists.txt.
+	whisperWrapper.cpp
+	utils/WhisperSession.cpp
+	utils/findLongestSilence.cpp
+	utils/findLongestSilence_test.cpp
+)
+
+
+
+set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../vendor/whisper.cpp)
+
+# Based on the Whisper.cpp Android example:
+set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -O3 ")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections")
+
+# Whisper: See https://stackoverflow.com/a/76290722
+add_subdirectory(${WHISPER_LIB_DIR} ./whisper)
+
+# Directories for header files
+target_include_directories(
+	${CMAKE_PROJECT_NAME}
+	PUBLIC
+	${PROJECT_BASE_DIR}/shared
+	${WHISPER_LIB_DIR}/include
+)
+
+
+# Specifies libraries CMake should link to your target library. You
+# can link libraries from various origins, such as libraries defined in this
+# build script, prebuilt third-party libraries, or Android system libraries.
+target_link_libraries(${CMAKE_PROJECT_NAME}
+	whisper
+	# List libraries link to the target library
+	android
+	log
+)
--- a/packages/app-mobile/android/app/src/main/cpp/utils/WhisperSession.cpp
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/WhisperSession.cpp
@@ -0,0 +1,154 @@
+#include "WhisperSession.h"
+
+#include <utility>
+#include <sstream>
+#include <algorithm>
+#include "whisper.h"
+#include "findLongestSilence.h"
+#include "androidUtil.h"
+
+WhisperSession::WhisperSession(const std::string& modelPath, std::string lang, std::string prompt)
+	: lang_ {std::move(lang)}, prompt_ {std::move(prompt)} {
+	whisper_context_params contextParams = whisper_context_default_params();
+
+	// Lifetime(pModelPath): Whisper.cpp creates a copy of pModelPath and stores it in a std::string.
+	// whisper_init_from_file_with_params doesn't seem to otherwise save pModelPath. As such, it's
+	// safe to pass a pointer to a std::string's representation:
+	const char *pModelPath = modelPath.c_str();
+	pContext_ = whisper_init_from_file_with_params(pModelPath, contextParams);
+
+	if (pContext_ == nullptr) {
+		throw std::runtime_error("Unable to initialize the Whisper context.");
+	}
+}
+
+WhisperSession::~WhisperSession() {
+	if (pContext_ != nullptr) {
+		whisper_free(pContext_);
+	}
+}
+
+whisper_full_params
+WhisperSession::buildWhisperParams_() {
+	whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+	// WHISPER_SAMPLING_BEAM_SEARCH is an alternative to greedy:
+	// params.beam_search = { .beam_size = 2 };
+	params.print_realtime = false;
+    // Disable timestamps: They make creating custom Whisper models more difficult:
+	params.print_timestamps = false;
+    params.no_timestamps = true;
+
+	params.print_progress = false;
+	params.translate = false;
+	params.offset_ms = 0;
+	params.single_segment = true;
+	// Avoid non-speech tokens (e.g. "(crackle)"). For now, this is disabled because it seems to
+	// cause increased hallucinations (e.g. repeated "Thank you"s).
+	// params.suppress_nst = true;
+	params.temperature = 0; // Initial randomness
+	// There's also a temperature_inc variable, which is used when decoding fails (Whisper increases
+	// the temperature by temperature_inc and retries).
+
+	// Following the whisper streaming example in setting prompt_tokens to nullptr
+	// when using VAD (Voice Activity Detection)
+	params.initial_prompt = prompt_.c_str();
+	params.prompt_tokens = nullptr;
+	params.prompt_n_tokens = 0;
+
+	// Lifetime: lifetime(params) < lifetime(lang_) = lifetime(this).
+	params.language = lang_.c_str();
+
+	return params;
+}
+
+std::string
+WhisperSession::transcribe_(const std::vector<float>& audio, size_t transcribeCount) {
+	int minTranscribeLength = WHISPER_SAMPLE_RATE / 2; // 0.5s
+	if (transcribeCount < minTranscribeLength) {
+		return "";
+	}
+
+	whisper_full_params params = buildWhisperParams_();
+	whisper_reset_timings(pContext_);
+
+	transcribeCount = std::min(audio.size(), transcribeCount);
+
+	if (whisper_full(pContext_, params, audio.data(), transcribeCount) != 0) {
+		throw std::runtime_error("Failed to run Whisper (non-zero exit status).");
+	} else {
+		whisper_print_timings(pContext_);
+	}
+
+	// Tokens to be used as a prompt for the next run of Whisper
+	unsigned int segmentCount = whisper_full_n_segments(pContext_);
+
+	// Build the results
+	std::stringstream results;
+	for (int i = 0; i < segmentCount; i++) {
+		results << " " << whisper_full_get_segment_text(pContext_, i);
+	}
+
+	std::string result = results.str();
+	LOGD("Transcribed: %s (audio len %.2f)", result.c_str(), audio.size() / (float) WHISPER_SAMPLE_RATE);
+
+	return result;
+}
+
+std::string
+WhisperSession::splitAndTranscribeBefore_(int transcribeUpTo, int trimTo) {
+	std::string result = transcribe_(audioBuffer_, transcribeUpTo);
+
+	// Trim
+	LOGI("Trim to %.2f s, transcribe to %.2f s", (float) trimTo / WHISPER_SAMPLE_RATE, (float) transcribeUpTo / WHISPER_SAMPLE_RATE);
+	audioBuffer_ = std::vector(audioBuffer_.begin() + trimTo, audioBuffer_.end());
+	return result;
+}
+
+std::string
+WhisperSession::transcribeNextChunk(const float *pAudio, int sizeAudio) {
+	std::string finalizedContent;
+
+	// Update the local audio buffer
+	for (int i = 0; i < sizeAudio; i++) {
+		audioBuffer_.push_back(pAudio[i]);
+	}
+
+	// Does the audio buffer need to be split somewhere?
+	int maximumSamples = WHISPER_SAMPLE_RATE * 25;
+	if (audioBuffer_.size() >= maximumSamples) {
+		float minSilenceSeconds = 0.3f;
+		auto silenceRange = findLongestSilence(
+			audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples
+		);
+
+		// In this case, the audio is long enough that it needs to be split somewhere. If there's
+		// no suitable pause available, default to splitting in the middle.
+		int halfBufferSize = audioBuffer_.size() / 2;
+		int transcribeTo = silenceRange.isValid ? silenceRange.start : halfBufferSize;
+		int trimTo = silenceRange.isValid ? silenceRange.end : halfBufferSize;
+
+		finalizedContent = splitAndTranscribeBefore_(transcribeTo, trimTo);
+	} else if (audioBuffer_.size() > WHISPER_SAMPLE_RATE * 3) {
+		// Allow brief pauses to create new paragraphs:
+		float minSilenceSeconds = 2.0f;
+		auto splitPoint = findLongestSilence(
+			audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples
+		);
+		if (splitPoint.isValid) {
+			int tolerance = WHISPER_SAMPLE_RATE / 20; // 0.05s
+			bool isCompletelySilent = splitPoint.start < tolerance && splitPoint.end > audioBuffer_.size() - tolerance;
+			if (isCompletelySilent) {
+				audioBuffer_.clear();
+			} else {
+				finalizedContent = splitAndTranscribeBefore_(splitPoint.start, splitPoint.end);
+			}
+		}
+	}
+
+	previewText_ = transcribe_(audioBuffer_, audioBuffer_.size());
+	return finalizedContent;
+}
+
+std::string WhisperSession::getPreview() {
+	return previewText_;
+}
--- a/packages/app-mobile/android/app/src/main/cpp/utils/WhisperSession.h
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/WhisperSession.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <string>
+#include "whisper.h"
+
+class WhisperSession {
+public:
+	WhisperSession(const std::string& modelPath, std::string lang, std::string prompt);
+	~WhisperSession();
+	std::string transcribeNextChunk(const float *pAudio, int sizeAudio);
+	std::string getPreview();
+
+private:
+	// Current preview state
+	std::string previewText_;
+
+	whisper_full_params buildWhisperParams_();
+	std::string transcribe_(const std::vector<float>& audio, size_t samplesToTranscribe);
+	std::string splitAndTranscribeBefore_(int transcribeUpTo, int trimTo);
+
+	whisper_context *pContext_;
+	const std::string lang_;
+	const std::string prompt_;
+
+	std::vector<float> audioBuffer_;
+};
+
--- a/packages/app-mobile/android/app/src/main/cpp/utils/androidUtil.h
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/androidUtil.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <android/log.h>
+
+// Use macros for these rather than functions. Functions generate a "may be unsafe"
+// warning because the compiler can't check that the first argument is a string
+// literal.
+#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, "Whisper::JNI", __VA_ARGS__);
+#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "Whisper::JNI", __VA_ARGS__);
+#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "Whisper::JNI", __VA_ARGS__);
--- a/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence.cpp
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence.cpp
@@ -0,0 +1,111 @@
+#include "findLongestSilence.h"
+#include "androidUtil.h"
+
+static void highpass(std::vector<float>& data, int sampleRate) {
+	// Highpass filter. See https://en.wikipedia.org/wiki/High-pass_filter and
+	// the example in whisper.cpp/streaming.
+	float highpassCutoffHz = 60.0f;
+	float RC = 1.0f / (2 * 3.1416f * highpassCutoffHz);
+	float timePerSample = 1.0f / sampleRate;
+	float alpha = RC / (RC + timePerSample);
+
+	float lastInput = data[0];
+	for (int i = 1; i < data.size(); i++) {
+		float currentInput = data[i];
+		data[i] = alpha * data[i - 1] + alpha * (currentInput - lastInput);
+		lastInput = currentInput;
+	}
+}
+
+SilenceRange findLongestSilence(
+	const std::vector<float>& audioData,
+	int sampleRate,
+	float minSilenceLengthSeconds,
+	int maxSilencePosition
+) {
+	int bestCandidateLength = 0;
+	int bestCandidateStart = -1;
+	int bestCandidateEnd = -1;
+
+	int currentCandidateStart = -1;
+
+	std::vector<float> processedAudio { audioData };
+	highpass(processedAudio, sampleRate);
+
+	// Break into windows of size `windowSize`:
+	int windowSize = 256;
+	int windowsPerSecond = sampleRate / windowSize;
+	int quietWindows = 0;
+
+	// Finishes the current candidate for longest silence
+	auto finalizeCandidate = [&] (int currentOffset) {
+		bool hasCandidate = currentCandidateStart >= 0;
+		if (!hasCandidate) {
+			return;
+		}
+
+		int currentCandidateLength = currentOffset - currentCandidateStart;
+		if (currentCandidateLength > bestCandidateLength && currentCandidateStart <= maxSilencePosition) {
+			bestCandidateLength = currentCandidateLength;
+			bestCandidateStart = currentCandidateStart;
+			bestCandidateEnd = currentOffset;
+			LOGD("New best candidate with length %d", currentCandidateLength);
+		}
+
+		currentCandidateStart = -1;
+	};
+
+	int windowOffset;
+	for (windowOffset = 0; windowOffset < processedAudio.size() && windowOffset <= maxSilencePosition; windowOffset += windowSize) {
+		int rollingAverageSize = 24;
+		float threshold = static_cast<float>(rollingAverageSize) / 80.0f;
+
+		// Count the number of samples that (when averaged with the nearby samples)
+		// are below some threshold value.
+		float absSum = 0;
+		int silentSamples = 0;
+		for (int i = windowOffset; i < windowOffset + windowSize && i < processedAudio.size(); i++) {
+			absSum += abs(processedAudio[i]);
+
+			bool isSumComplete = i - rollingAverageSize >= windowOffset;
+			if (isSumComplete) {
+				absSum -= abs(processedAudio[i - rollingAverageSize]);
+
+				if (absSum < threshold) {
+					silentSamples++;
+				}
+			}
+		}
+
+		// The window should be considered "quiet" if enough samples were below the threshold.
+		// Don't require all of them to be to allow clicks and pops.
+		if (silentSamples >= windowSize * 3 / 4) {
+			quietWindows ++;
+		} else {
+			quietWindows = 0;
+		}
+
+		int minQuietWindows = static_cast<int>(windowsPerSecond * minSilenceLengthSeconds);
+		if (quietWindows >= minQuietWindows && currentCandidateStart == -1) {
+			// Found a candidate. Start it.
+			currentCandidateStart = windowOffset;
+		} else if (quietWindows == 0) {
+			// Ended a candidate. Is it better than the best?
+			finalizeCandidate(windowOffset);
+		}
+	}
+
+	finalizeCandidate(windowOffset);
+
+	// Return the best candidate.
+	if (bestCandidateLength == 0) {
+		return { .isValid = false, .start = 0, .end = 0 };
+	} else {
+		return {
+			.isValid=true,
+			.start=bestCandidateStart,
+			.end=bestCandidateEnd
+		};
+	}
+}
+
--- a/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence.h
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <vector>
+#include <optional>
+#include <tuple>
+
+struct SilenceRange {
+	bool isValid;
+	int start;
+	int end;
+};
+
+SilenceRange findLongestSilence(
+	const std::vector<float>& audioData,
+	int sampleRate,
+
+	// Minimum length of silence in seconds
+	float minSilenceLengthSeconds,
+
+	// Doesn't check for silence at a position greater than maximumSilenceStart
+	int maximumSilenceStart
+);
+
+
--- a/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence_test.cpp
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence_test.cpp
@@ -0,0 +1,169 @@
+#include "findLongestSilence_test.h"
+#include "findLongestSilence.h"
+#include "androidUtil.h"
+
+#include <string>
+#include <vector>
+#include <sstream>
+#include <cmath>
+#include <random>
+
+static void testTones();
+static void testToneWithPause();
+static void testSilence();
+static void testNoise();
+
+static void fail(const std::string& message);
+
+struct GeneratedAudio {
+	std::vector<float> data;
+	int sampleRate;
+	int sampleCount;
+};
+
+using AudioGenerator = std::function<const float(float)>;
+static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration);
+static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel);
+static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel);
+
+
+void findLongestSilence_test() {
+	testTones();
+	testToneWithPause();
+	testSilence();
+	testNoise();
+}
+
+
+static void testTones() {
+	for (int frequency = 440; frequency < 1600; frequency += 300) {
+		std::stringstream messageBuilder;
+		messageBuilder << "Should not find silence in tone with frequency " << frequency << " HZ.";
+
+		auto audioTone = makeAudio([frequency](float t) {
+			// Also set the amplitude to 0.2f (to more closely match mic input).
+			return std::sin(t * static_cast<float>(frequency)) * 0.2f;
+		}, 15000, 10.0f);
+
+		expectNoSilence(audioTone, messageBuilder.str());
+	}
+
+	auto lowFrequencyTone = makeAudio([](float t) {
+		return std::sin(t * 8) * 0.3f;
+	}, 15000, 10.0f);
+	expectSilenceBetween(lowFrequencyTone, 0.0f, 10.0f, "Should find silence in a very low-frequency tone");
+}
+
+static void testToneWithPause() {
+	auto audioToneWithPause = makeAudio([](float t) {
+		if (t < 5.0f || t > 6.0f) {
+			return std::sin(t * 880);
+		} else {
+			return 0.0f;
+		}
+	}, 15000, 11.0f);
+	expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region");
+
+	auto audioToneWithTwoPauses = makeAudio([](float t) {
+		if (t < 1.0f || (t > 8.0f && t < 10.0f)) {
+			return 0.0f;
+		} else {
+			return std::sin(t * 880);
+		}
+	}, 15000, 20.0f);
+	expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region");
+}
+
+static void testSilence() {
+	auto silence = makeAudio([](float t) {
+		return 0.0f;
+	}, 16000, 10.0f);
+	expectSilenceBetween(silence, 0.0f, 10.0f, "Should find silence in a completely silent signal");
+}
+
+static void testNoise() {
+	std::minstd_rand randomness {2};
+	std::uniform_real_distribution noiseGenerator {-1.0, 1.0};
+	auto quietNoise = makeAudio([&](float t) {
+		return noiseGenerator(randomness) * 0.02f;
+	}, 16000, 5.0f);
+	expectSilenceBetween(quietNoise, 0.0f, 5.0f, "Should find silence in a tone with low-amplitude noise");
+}
+
+
+static void fail(const std::string& message) {
+	throw std::runtime_error(message);
+}
+
+static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration) {
+	std::vector<float> result { };
+
+	int numSamples = static_cast<int>(static_cast<float>(sampleRate) * duration);
+	for (int i = 0; i < numSamples; i++) {
+		float time = static_cast<float>(i) / static_cast<float>(sampleRate);
+		result.push_back(generator(time));
+	}
+
+	return {
+		.data=result,
+		.sampleRate=sampleRate,
+		.sampleCount=numSamples,
+	};
+}
+
+static void logTestPass(const std::string& message) {
+	LOGI("Test PASS: %s", message.c_str());
+}
+
+static float samplesToSeconds(int samples, int sampleRate) {
+	return static_cast<float>(samples) / static_cast<float>(sampleRate);
+}
+
+static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel) {
+	auto silence = findLongestSilence(
+			audio.data,
+			audio.sampleRate,
+			0.02f,
+			audio.sampleCount
+	);
+	if (silence.isValid) {
+		std::stringstream errorBuilder;
+		float startSeconds = samplesToSeconds(silence.start, audio.sampleRate);
+		float stopSeconds = samplesToSeconds(silence.end, audio.sampleRate);
+		errorBuilder << "Error: Found silence between " << startSeconds << "s and " << stopSeconds << "s";
+		errorBuilder << ": " << testLabel;
+		fail(errorBuilder.str());
+	}
+
+	logTestPass(testLabel);
+}
+
+static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel) {
+	auto silenceResult = findLongestSilence(
+			audio.data,
+			audio.sampleRate,
+			0.02f,
+			audio.sampleCount
+	);
+
+	if (!silenceResult.isValid) {
+		fail("Error: No silence found: " + testLabel);
+	}
+
+	auto checkEndpoint = [&] (int actualValueSamples, float expectedValueSeconds, const std::string& description) {
+		float actualValueSeconds = samplesToSeconds(actualValueSamples, audio.sampleRate);
+		float tolerance = 0.1f; // 100ms
+		if (std::abs(expectedValueSeconds - actualValueSeconds) > tolerance) {
+			std::stringstream messageBuilder;
+			messageBuilder << "Error: Silence " << description << " mismatch: ";
+			messageBuilder << "got " << actualValueSeconds << "s expected " << expectedValueSeconds << "s. ";
+			messageBuilder << testLabel;
+			fail(messageBuilder.str());
+		}
+	};
+
+	checkEndpoint(silenceResult.start, startTimeSeconds, "start time");
+	checkEndpoint(silenceResult.end, stopTimeSeconds, "stop time");
+
+	logTestPass(testLabel);
+}
--- a/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence_test.h
+++ b/packages/app-mobile/android/app/src/main/cpp/utils/findLongestSilence_test.h
@@ -0,0 +1,3 @@
+#pragma once
+
+void findLongestSilence_test();
--- a/packages/app-mobile/android/app/src/main/cpp/whisperWrapper.cpp
+++ b/packages/app-mobile/android/app/src/main/cpp/whisperWrapper.cpp
@@ -0,0 +1,125 @@
+// Write C++ code here.
+//
+// Do not forget to dynamically load the C++ library into your application.
+//
+// For instance,
+//
+// In MainActivity.java:
+//    static {
+//       System.loadLibrary("joplin");
+//    }
+//
+// Or, in MainActivity.kt:
+//    companion object {
+//      init {
+//         System.loadLibrary("joplin")
+//      }
+//    }
+#include <jni.h>
+#include <memory>
+#include <string>
+#include <sstream>
+#include <android/log.h>
+#include "whisper.h"
+#include "utils/WhisperSession.h"
+#include "utils/androidUtil.h"
+#include "utils/findLongestSilence_test.h"
+
+void log_android(enum ggml_log_level level, const char* message, void* user_data) {
+	android_LogPriority priority = level == 4 ? ANDROID_LOG_ERROR : ANDROID_LOG_INFO;
+	__android_log_print(priority, "Whisper::JNI::cpp", "%s", message);
+}
+
+jstring stringToJava(JNIEnv *env, const std::string& source) {
+	return env->NewStringUTF(source.c_str());
+}
+
+std::string stringToCXX(JNIEnv *env, jstring jString) {
+	const char *jStringChars = env->GetStringUTFChars(jString, nullptr);
+	std::string result { jStringChars };
+	env->ReleaseStringUTFChars(jString, jStringChars);
+
+	return result;
+}
+
+void throwException(JNIEnv *env, const std::string& message) {
+	jclass errorClass = env->FindClass("java/lang/Exception");
+	env->ThrowNew(errorClass, message.c_str());
+}
+
+extern "C"
+JNIEXPORT jlong JNICALL
+Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_init(
+		JNIEnv *env,
+		jobject thiz,
+		jstring modelPath,
+		jstring language,
+		jstring prompt
+) {
+	whisper_log_set(log_android, nullptr);
+
+	try {
+		auto *pSession = new WhisperSession(
+				stringToCXX(env, modelPath), stringToCXX(env, language), stringToCXX(env, prompt)
+		);
+		return (jlong) pSession;
+	} catch (const std::exception& exception) {
+		LOGW("Failed to init whisper: %s", exception.what());
+		throwException(env, exception.what());
+		return 0;
+	}
+}
+
+extern "C"
+JNIEXPORT void JNICALL
+Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_free(JNIEnv *env, jobject thiz,
+																 jlong pointer) {
+	std::free(reinterpret_cast<WhisperSession *>(pointer));
+}
+
+extern "C"
+JNIEXPORT jstring JNICALL
+Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_fullTranscribe(JNIEnv *env,
+																		   jobject thiz,
+																		   jlong pointer,
+																		   jfloatArray audio_data) {
+	auto *pSession = reinterpret_cast<WhisperSession *> (pointer);
+	jfloat *pAudioData = env->GetFloatArrayElements(audio_data, nullptr);
+	jsize lenAudioData = env->GetArrayLength(audio_data);
+	std::string result;
+
+	try {
+		LOGD("Starting Whisper, transcribe %d", lenAudioData);
+		result = pSession->transcribeNextChunk(pAudioData, lenAudioData);
+		auto preview = pSession->getPreview();
+		LOGD("Ran Whisper. Got %s (preview %s)", result.c_str(), preview.c_str());
+	} catch (const std::exception& exception) {
+		LOGW("Failed to run whisper: %s", exception.what());
+		throwException(env, exception.what());
+	}
+
+	// JNI_ABORT: "free the buffer without copying back the possible changes", pass 0 to copy
+	// changes (there should be no changes)
+	env->ReleaseFloatArrayElements(audio_data, pAudioData, JNI_ABORT);
+
+	return stringToJava(env, result);
+}
+extern "C"
+JNIEXPORT jstring JNICALL
+Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_getPreview(
+		JNIEnv *env, jobject thiz, jlong pointer
+) {
+	auto *pSession = reinterpret_cast<WhisperSession *> (pointer);
+	return stringToJava(env, pSession->getPreview());
+}
+
+extern "C"
+JNIEXPORT void JNICALL
+Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_runTests(JNIEnv *env, jobject thiz) {
+	try {
+		findLongestSilence_test();
+	} catch (const std::exception& exception) {
+		LOGW("Failed to run tests: %s", exception.what());
+		throwException(env, exception.what());
+	}
+}
--- a/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/AudioRecorder.kt
+++ b/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/AudioRecorder.kt
@@ -21,7 +21,7 @@ class AudioRecorder(context: Context) : Closeable {
 	private var bufferWriteOffset = 0

 	// Accessor must not modify result
-	val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset)
+	private val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset)
 	val bufferLengthSeconds: Double get() = bufferWriteOffset.toDouble() / sampleRate

 	init {
@@ -74,11 +74,16 @@ class AudioRecorder(context: Context) : Closeable {
 	}

 	// Pulls all available data from the audio recorder's buffer
-	fun pullAvailable() {
-		return read(maxBufferSize, AudioRecord.READ_NON_BLOCKING)
+	fun pullAvailable(): FloatArray {
+		read(maxBufferSize, AudioRecord.READ_NON_BLOCKING)
+
+		val result = bufferedData
+		buffer.fill(0.0f, 0, maxBufferSize);
+		bufferWriteOffset = 0
+		return result
 	}

-	fun pullNextSeconds(seconds: Double) {
+	fun pullNextSeconds(seconds: Double):FloatArray {
 		val remainingSize = maxBufferSize - bufferWriteOffset
 		val requestedSize = (seconds * sampleRate).toInt()

@@ -87,7 +92,8 @@ class AudioRecorder(context: Context) : Closeable {
 			advanceStartBySamples(maxBufferSize / 3)
 		}

-		return read(requestedSize, AudioRecord.READ_BLOCKING)
+		read(requestedSize, AudioRecord.READ_BLOCKING)
+		return pullAvailable()
 	}

 	override fun close() {
--- a/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/NativeWhisperLib.kt
+++ b/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/NativeWhisperLib.kt
@@ -0,0 +1,54 @@
+package net.cozic.joplin.audio
+
+import java.io.Closeable
+
+class NativeWhisperLib(
+	modelPath: String,
+	languageCode: String,
+	prompt: String,
+) : Closeable {
+	companion object {
+		init {
+			System.loadLibrary("joplin")
+		}
+
+		external fun runTests(): Unit;
+
+		// TODO: The example whisper.cpp project transfers pointers as Longs to the Kotlin code.
+		// This seems unsafe. Try changing how this is managed.
+		private external fun init(modelPath: String, languageCode: String, prompt: String): Long;
+		private external fun free(pointer: Long): Unit;
+
+		private external fun fullTranscribe(pointer: Long, audioData: FloatArray): String;
+		private external fun getPreview(pointer: Long): String;
+	}
+
+	private var closed = false
+	private val pointer: Long = init(modelPath, languageCode, prompt)
+
+	fun transcribe(audioData: FloatArray): String {
+		if (closed) {
+			throw Exception("Cannot transcribe using a closed session")
+		}
+
+		return fullTranscribe(pointer, audioData)
+	}
+
+	fun getPreview(): String {
+		if (closed) {
+			throw Exception("Cannot get preview from a closed session")
+		}
+
+		return getPreview(pointer)
+	}
+
+	override fun close() {
+		if (closed) {
+			throw Exception("Cannot close a whisper session twice")
+		}
+
+		closed = true
+		free(pointer)
+	}
+
+}
--- a/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextConverter.kt
+++ b/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextConverter.kt
@@ -1,110 +1,33 @@
 package net.cozic.joplin.audio

-import ai.onnxruntime.OnnxTensor
-import ai.onnxruntime.OrtEnvironment
-import ai.onnxruntime.OrtSession
-import ai.onnxruntime.extensions.OrtxPackage
-import android.annotation.SuppressLint
 import android.content.Context
 import android.util.Log
 import java.io.Closeable
-import java.nio.FloatBuffer
-import java.nio.IntBuffer
-import kotlin.time.DurationUnit
-import kotlin.time.measureTimedValue

 class SpeechToTextConverter(
 	modelPath: String,
 	locale: String,
+	prompt: String,
 	recorderFactory: AudioRecorderFactory,
-	private val environment: OrtEnvironment,
 	context: Context,
 ) : Closeable {
 	private val recorder = recorderFactory(context)
-	private val session: OrtSession = environment.createSession(
-		modelPath,
-		OrtSession.SessionOptions().apply {
-			// Needed for audio decoding
-			registerCustomOpLibrary(OrtxPackage.getLibraryPath())
-		},
-	)
 	private val languageCode = Regex("_.*").replace(locale, "")
-	private val decoderInputIds = when (languageCode) {
-		// Add 50363 to the end to omit timestamps
-		"en" -> intArrayOf(50258, 50259, 50359)
-		"fr" -> intArrayOf(50258, 50265, 50359)
-		"es" -> intArrayOf(50258, 50262, 50359)
-		"de" -> intArrayOf(50258, 50261, 50359)
-		"it" -> intArrayOf(50258, 50274, 50359)
-		"nl" -> intArrayOf(50258, 50271, 50359)
-		"ko" -> intArrayOf(50258, 50264, 50359)
-		"th" -> intArrayOf(50258, 50289, 50359)
-		"ru" -> intArrayOf(50258, 50263, 50359)
-		"pt" -> intArrayOf(50258, 50267, 50359)
-		"pl" -> intArrayOf(50258, 50269, 50359)
-		"id" -> intArrayOf(50258, 50275, 50359)
-		"hi" -> intArrayOf(50258, 50276, 50359)
-		// Let Whisper guess the language
-		else -> intArrayOf(50258)
-	}
+	private var whisper = NativeWhisperLib(
+		modelPath,
+		languageCode,
+		prompt,
+	)

 	fun start() {
 		recorder.start()
 	}

-	private fun getInputs(data: FloatArray): MutableMap<String, OnnxTensor> {
-		fun intTensor(value: Int) = OnnxTensor.createTensor(
-			environment,
-			IntBuffer.wrap(intArrayOf(value)),
-			longArrayOf(1),
-		)
-		fun floatTensor(value: Float) = OnnxTensor.createTensor(
-			environment,
-			FloatBuffer.wrap(floatArrayOf(value)),
-			longArrayOf(1),
-		)
-		val audioPcmTensor = OnnxTensor.createTensor(
-			environment,
-			FloatBuffer.wrap(data),
-			longArrayOf(1, data.size.toLong()),
-		)
-		val decoderInputIdsTensor = OnnxTensor.createTensor(
-			environment,
-			IntBuffer.wrap(decoderInputIds),
-			longArrayOf(1, decoderInputIds.size.toLong())
-		)
-
-		return mutableMapOf(
-			"audio_pcm" to audioPcmTensor,
-			"max_length" to intTensor(412),
-			"min_length" to intTensor(0),
-			"num_return_sequences" to intTensor(1),
-			"num_beams" to intTensor(1),
-			"length_penalty" to floatTensor(1.1f),
-			"repetition_penalty" to floatTensor(3f),
-			"decoder_input_ids" to decoderInputIdsTensor,
-
-			// Required for timestamps
-			"logits_processor" to intTensor(1)
-		)
-	}
-
-	// TODO .get() fails on older Android versions
-	@SuppressLint("NewApi")
 	private fun convert(data: FloatArray): String {
-		val (inputs, convertInputsTime) = measureTimedValue {
-			getInputs(data)
-		}
-		val (outputs, getOutputsTime) = measureTimedValue {
-			session.run(inputs, setOf("str"))
-		}
-		val mainOutput = outputs.get("str").get().value as Array<Array<String>>
-		outputs.close()
-
-		Log.i("Whisper", "Converted ${data.size / 16000}s of data in ${
-			getOutputsTime.toString(DurationUnit.SECONDS, 2)
-		} converted inputs in ${convertInputsTime.inWholeMilliseconds}ms")
-		return mainOutput[0][0]
+		Log.d("Whisper", "Pre-transcribe data of size ${data.size}")
+		val result = whisper.transcribe(data)
+		Log.d("Whisper", "Post transcribe. Got $result")
+		return result;
 	}

 	fun dropFirstSeconds(seconds: Double) {
@@ -114,23 +37,26 @@ class SpeechToTextConverter(

 	val bufferLengthSeconds: Double get() = recorder.bufferLengthSeconds

-	fun expandBufferAndConvert(seconds: Double): String {
-		recorder.pullNextSeconds(seconds)
-		// Also pull any extra available data, in case the speech-to-text converter
-		// is lagging behind the audio recorder.
-		recorder.pullAvailable()
-
-		return convert(recorder.bufferedData)
+	fun convertNext(seconds: Double): String {
+		val buffer = recorder.pullNextSeconds(seconds)
+		val result = convert(buffer)
+		dropFirstSeconds(seconds)
+		return result
 	}

 	// Converts as many seconds of buffered data as possible, without waiting
-	fun expandBufferAndConvert(): String {
-		recorder.pullAvailable()
-		return convert(recorder.bufferedData)
+	fun convertRemaining(): String {
+		val buffer = recorder.pullAvailable()
+		return convert(buffer)
+	}
+
+	fun getPreview(): String {
+		return whisper.getPreview()
 	}

 	override fun close() {
+		Log.d("Whisper", "Close")
 		recorder.close()
-		session.close()
+		whisper.close()
 	}
 }
--- a/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextPackage.kt
+++ b/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextPackage.kt
@@ -1,6 +1,5 @@
 package net.cozic.joplin.audio

-import ai.onnxruntime.OrtEnvironment
 import com.facebook.react.ReactPackage
 import com.facebook.react.bridge.LifecycleEventListener
 import com.facebook.react.bridge.NativeModule
@@ -24,7 +23,6 @@ class SpeechToTextPackage : ReactPackage {
 	class SpeechToTextModule(
 		private var context: ReactApplicationContext,
 	) : ReactContextBaseJavaModule(context), LifecycleEventListener {
-		private var environment: OrtEnvironment? = null
 		private val executorService: ExecutorService = Executors.newFixedThreadPool(1)
 		private val sessionManager = SpeechToTextSessionManager(executorService)

@@ -32,21 +30,24 @@ class SpeechToTextPackage : ReactPackage {

 		override fun onHostResume() { }
 		override fun onHostPause() { }
-		override fun onHostDestroy() {
-			environment?.close()
+		override fun onHostDestroy() { }
+
+		@ReactMethod
+		fun runTests(promise: Promise) {
+			try {
+				NativeWhisperLib.runTests()
+				promise.resolve(true)
+			} catch (exception: Throwable) {
+				promise.reject(exception)
+			}
 		}

 		@ReactMethod
-		fun openSession(modelPath: String, locale: String, promise: Promise) {
+		fun openSession(modelPath: String, locale: String, prompt: String, promise: Promise) {
 			val appContext = context.applicationContext
-			// Initialize environment as late as possible:
-			val ortEnvironment = environment ?: OrtEnvironment.getEnvironment()
-			if (environment != null) {
-				environment = ortEnvironment
-			}

 			try {
-				val sessionId = sessionManager.openSession(modelPath, locale, ortEnvironment, appContext)
+				val sessionId = sessionManager.openSession(modelPath, locale, prompt, appContext)
 				promise.resolve(sessionId)
 			} catch (exception: Throwable) {
 				promise.reject(exception)
@@ -69,8 +70,8 @@ class SpeechToTextPackage : ReactPackage {
 		}

 		@ReactMethod
-		fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) {
-			sessionManager.expandBufferAndConvert(sessionId, duration, promise)
+		fun convertNext(sessionId: Int, duration: Double, promise: Promise) {
+			sessionManager.convertNext(sessionId, duration, promise)
 		}

 		@ReactMethod
@@ -78,6 +79,11 @@ class SpeechToTextPackage : ReactPackage {
 			sessionManager.convertAvailable(sessionId, promise)
 		}

+		@ReactMethod
+		fun getPreview(sessionId: Int, promise: Promise) {
+			sessionManager.getPreview(sessionId, promise)
+		}
+
 		@ReactMethod
 		fun closeSession(sessionId: Int, promise: Promise) {
 			sessionManager.closeSession(sessionId, promise)
--- a/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextSessionManager.kt
+++ b/packages/app-mobile/android/app/src/main/java/net/cozic/joplin/audio/SpeechToTextSessionManager.kt
@@ -1,6 +1,5 @@
 package net.cozic.joplin.audio

-import ai.onnxruntime.OrtEnvironment
 import android.content.Context
 import com.facebook.react.bridge.Promise
 import java.util.concurrent.Executor
@@ -21,13 +20,13 @@ class SpeechToTextSessionManager(
 	fun openSession(
 		modelPath: String,
 		locale: String,
-		environment: OrtEnvironment,
+		prompt: String,
 		context: Context,
 	): Int {
 		val sessionId = nextSessionId++
 		sessions[sessionId] = SpeechToTextSession(
 			SpeechToTextConverter(
-				modelPath, locale, recorderFactory = AudioRecorder.factory, environment, context,
+				modelPath, locale, prompt, recorderFactory = AudioRecorder.factory, context,
 			)
 		)
 		return sessionId
@@ -87,9 +86,9 @@ class SpeechToTextSessionManager(
 	}

 	// Waits for the next [duration] seconds to become available, then converts
-	fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) {
+	fun convertNext(sessionId: Int, duration: Double, promise: Promise) {
 		this.concurrentWithSession(sessionId, promise::reject) { session ->
-			val result = session.converter.expandBufferAndConvert(duration)
+			val result = session.converter.convertNext(duration)
 			promise.resolve(result)
 		}
 	}
@@ -97,7 +96,14 @@ class SpeechToTextSessionManager(
 	// Converts all available recorded data
 	fun convertAvailable(sessionId: Int, promise: Promise) {
 		this.concurrentWithSession(sessionId, promise::reject) { session ->
-			val result = session.converter.expandBufferAndConvert()
+			val result = session.converter.convertRemaining()
+			promise.resolve(result)
+		}
+	}
+
+	fun getPreview(sessionId: Int, promise: Promise) {
+		this.concurrentWithSession(sessionId, promise::reject) { session ->
+			val result = session.converter.getPreview()
 			promise.resolve(result)
 		}
 	}
--- a/packages/app-mobile/android/vendor/.gitignore
+++ b/packages/app-mobile/android/vendor/.gitignore
@@ -0,0 +1,9 @@
+whisper.cpp/.gitmodules
+whisper.cpp/scripts/
+whisper.cpp/samples/
+whisper.cpp/tests/
+whisper.cpp/models/
+whisper.cpp/examples/
+whisper.cpp/.*/
+whisper.cpp/bindings/
+whisper.cpp/**/*.Dockerfile
--- a/packages/app-mobile/android/vendor/README.md
+++ b/packages/app-mobile/android/vendor/README.md
@@ -0,0 +1,7 @@
+# Vendored Android packages
+
+This directory contains upstream packages that can't be added as direct dependencies (e.g. through `npm`).
+
+## whisper.cpp
+
+`whisper.cpp` provides voice typing capabilities. It can be updated by replacing the contents of the `whisper.cpp` directory with the latest content from https://github.com/ggerganov/whisper.cpp. To decrease the size of the `whisper.cpp` directory, some files are ignored by the `.gitignore`.
--- a/packages/app-mobile/android/vendor/whisper.cpp/.gitignore
+++ b/packages/app-mobile/android/vendor/whisper.cpp/.gitignore
@@ -0,0 +1,60 @@
+*.o
+*.a
+*.d
+.cache/
+.coreml/
+.test/
+.venv/
+.vs/
+.vscode/
+.DS_Store
+.vimspector.json
+/CMakeSettings.json
+/talk-llama.dSYM/
+
+build/
+build-*/
+
+# SPM
+.build/
+.swiftpm
+*.metallib
+
+ggml-metal-embed.metal
+ggml-metal-embed.metal.tmp
+
+/main
+/stream
+/command
+/talk
+/talk-llama
+/bench
+/quantize
+/server
+/lsp
+
+arm_neon.h
+sync.sh
+libwhisper.a
+libwhisper.so
+compile_commands.json
+
+examples/arm_neon.h
+examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
+examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
+examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
+
+extra/bench-gg.txt
+
+models/*.mlmodel
+models/*.mlmodelc
+models/*.mlpackage
+bindings/java/.gradle/
+bindings/java/.idea/
+.idea/
+
+benchmark_results.csv
+cmake-build-debug/
+.cxx/
+.gradle/
+local.properties
--- a/packages/app-mobile/android/vendor/whisper.cpp/AUTHORS
+++ b/packages/app-mobile/android/vendor/whisper.cpp/AUTHORS
@@ -0,0 +1,510 @@
+# date: Tue Feb  4 13:03:35 EET 2025
+# this file is auto-generated by scripts/gen-authors.sh
+
+0/0 <zero@imaskeleton.me>
+0cc4m <picard12@live.de>
+0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
+65a <10104049+65a@users.noreply.github.com>
+AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
+AT <manyoso@users.noreply.github.com>
+Aarni Koskela <akx@iki.fi>
+Aaron Pham <29749331+aarnphm@users.noreply.github.com>
+Aaron Taylor <aaron@exphat.com>
+Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
+Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
+Adam Jones <domdomegg+git@gmail.com>
+Adrien Gallouët <adrien@gallouet.fr>
+Adrien Gallouët <angt@huggingface.co>
+AfryMask <AfryMask@163.com>
+Ahmad Bilal <ahmad.bilal@empglabs.com>
+Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
+AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
+AidanBeltonS <aidan.belton@codeplay.com>
+Akarshan Biswas <akarshan.biswas@gmail.com>
+Akarshan Biswas <akarshanbiswas@fedoraproject.org>
+Akash Mahajan <akash7190@gmail.com>
+Akash Mahajan <akashmjn@stanford.edu>
+Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
+Alan <unknown>
+Albert Jin <albert.jin@gmail.com>
+Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
+Alberto Cabrera Pérez <alberto.cabrera@intel.com>
+Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
+Alex Azarov <alex@azarov.by>
+Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
+Alex Evgrashin <aevgrashin@yandex.ru>
+Alex O'Connell <35843486+acon96@users.noreply.github.com>
+Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
+Alexandru Mariuti <alex@mariuti.com>
+Alexey Kharlamov <alexey@kharlamov.biz>
+Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
+Ali Alameh <ali.alameh@isae.edu.lb>
+Alter <0x7c48@gmail.com>
+Ananta Bastola <anantarajbastola@gmail.com>
+Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
+Andreas Lubbe <git@lubbe.org>
+Andreu Huguet <andreuhuguet@gmail.com>
+Andrew Huynh <a5thuynh@gmail.com>
+Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
+Andrew S <andrews54757@gmail.com>
+Andy Maloney <asmaloney@gmail.com>
+Anton Kostin <masguit42@users.noreply.github.com>
+Artyom Mezin <psycho.fading@gmail.com>
+Asad Memon <asad.lionpk@gmail.com>
+Ashraful Islam <ashraful.meche@gmail.com>
+AsukaMinato <asukaminato@nyan.eu.org>
+AustinMroz <austinmroz@utexas.edu>
+Avik Sengupta <avik@sengupta.net>
+Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
+Baffin Lee <baffinlee@gmail.com>
+Ben Ashbaugh <ben.ashbaugh@intel.com>
+Ben Nortier <bjnortier@gmail.com>
+Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
+Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
+Binozo <70137898+Binozo@users.noreply.github.com>
+Bo-Yi Wu <appleboy.tw@gmail.com>
+Boris Bliznioukov <blib@mail.com>
+Borislav Stanimirov <b.stanimirov@abv.bg>
+Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
+Brian Murray <brian@bmurray.ca>
+CRD716 <crd716@gmail.com>
+Canis Lupus <Canis-UK@users.noreply.github.com>
+Carlos Zoido <mrgalleta@gmail.com>
+Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
+CarterLi999 <664681047@qq.com>
+ChangSeok Oh <shivamidow@users.noreply.github.com>
+Changyeon Kim <cyzero.kim@samsung.com>
+Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
+Charles Xu <63788048+chaxu01@users.noreply.github.com>
+Charles Xu <charles.xu@arm.com>
+Chen Xi <xi2.chen@intel.com>
+Chen Xi <xixichen08@foxmail.com>
+Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
+Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
+Chidi Williams <williamschidi1@gmail.com>
+Chris Elrod <elrodc@gmail.com>
+Christian <12550267+iceychris@users.noreply.github.com>
+Christian Kastner <ckk@kvr.at>
+Clifford Heath <clifford.heath@gmail.com>
+Clint Herron <hanclinto@gmail.com>
+Colin <github@whoisc.cc>
+Conrad Kramer <conrad@conradkramer.com>
+Corey Earwood <iamcgn+github@gmail.com>
+CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
+DAN™ <dranger003@gmail.com>
+DGdev91 <DGdev91@users.noreply.github.com>
+Damian Czaja <trojan295@protonmail.com>
+Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
+Dan Johansson <dan.johansson@arm.com>
+Daniel Bevenius <daniel.bevenius@gmail.com>
+Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
+Daniel Ziegenberg <daniel@ziegenberg.at>
+Daniele <57776841+daniandtheweb@users.noreply.github.com>
+Dave <dave-fl@users.noreply.github.com>
+Dave Airlie <airlied@gmail.com>
+Dave Airlie <airlied@redhat.com>
+Daven Sanassy <daven@vochlea.co.uk>
+David <dnhkng@gmail.com>
+David Thorpe <djt@mutablelogic.com>
+DavidKorczynski <david@adalogics.com>
+Davidson Francis <davidsondfgl@gmail.com>
+Dener Stassun <denerstassun@gmail.com>
+Dibakar Gope <dibakar.gope@arm.com>
+Didzis Gosko <didzis@users.noreply.github.com>
+Diego Devesa <slarengh@gmail.com>
+Digipom <admin@digipom.com>
+Dimo <dimo@ieee.org>
+Djip007 <3705339+Djip007@users.noreply.github.com>
+Djip007 <djip.perois@free.fr>
+Dody Suria Wijaya <dodysw@gmail.com>
+Dou Xinpeng <15529241576@163.com>
+Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
+Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
+Duncan McConnell <ddmcconnell4@gmail.com>
+Egor Egorov <me@egorfine.com>
+Elkana Bardugo <ttv200@gmail.com>
+Emmanuel Schmidbauer <eschmidbauer@gmail.com>
+Engininja2 <139037756+Engininja2@users.noreply.github.com>
+Eric Curtin <ericcurtin17@gmail.com>
+Eric Swanson <eswanson@alloscomp.com>
+Eric Tendian <erictendian@gmail.com>
+Eric Zhang <34133756+EZForever@users.noreply.github.com>
+Erik Scholz <Green-Sky@users.noreply.github.com>
+Evan Jones <evan.q.jones@gmail.com>
+Evan Martin <evan.martin@gmail.com>
+Eve <139727413+netrunnereve@users.noreply.github.com>
+Evgeny Kuznetsov <evgeny@kuznetsov.md>
+F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
+Faisal Zaghloul <quic_fzaghlou@quicinc.com>
+Fangjun Kuang <csukuangfj@gmail.com>
+Felix <stenbackfelix@gmail.com>
+Finn Voorhees <finnvoorhees@gmail.com>
+FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
+FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
+Frankie Robertson <frankier@users.noreply.github.com>
+Gang Chen <goncha@gmail.com>
+Gavin Cai <gavin1818@hotmail.com>
+George Hindle <george@georgehindle.com>
+Georgi Gerganov <ggerganov@gmail.com>
+Gilad S <7817232+giladgd@users.noreply.github.com>
+Gilad S <giladgd@users.noreply.github.com>
+Gilad S. <7817232+giladgd@users.noreply.github.com>
+GitAritron <103900385+GitAritron@users.noreply.github.com>
+GiviMAD <GiviMAD@users.noreply.github.com>
+Gleicon Moraes <gleicon@gmail.com>
+Gregor Jasny <gjasny@googlemail.com>
+Guillaume Wenzek <gwenzek@users.noreply.github.com>
+HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
+Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
+Hang <bebound@gmail.com>
+Haus1 <haus.xda@gmail.com>
+Herman Semenov <GermanAizek@yandex.ru>
+HimariO <dsfhe49854@gmail.com>
+Hong Bo PENG <penghb@cn.ibm.com>
+Hrishikesh Barman <geekodour@users.noreply.github.com>
+Hugo <hugo@whynothugo.nl>
+Ian Bicking <ian@ianbicking.org>
+Ian Bull <irbull@eclipsesource.com>
+Ihar Hrachyshka <ihrachys@redhat.com>
+Ikko Ashimine <eltociear@gmail.com>
+Ikko Eltociear Ashimine <eltociear@gmail.com>
+InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
+Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
+Ivan <nekotekina@gmail.com>
+Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
+Ivan Gorin <ivangorin21@gmail.com>
+Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
+JJ <103335846+computerscienceiscool@users.noreply.github.com>
+Jack Mousseau <jmousseau@users.noreply.github.com>
+JacobLinCool <jacoblincool@gmail.com>
+Jakub Ráček <blizzcz@gmail.com>
+Jared Van Bortel <jared@nomic.ai>
+Jay Binks <jaybinks@gmail.com>
+Jayant <jayantyadav202@gmail.com>
+Jeff Bolz <jbolz@nvidia.com>
+Jeroen Mostert <jeroen.mostert@cm.com>
+Jhen-Jie Hong <developer@jhen.me>
+Jhen-Jie Hong <iainst0409@gmail.com>
+JidongZhang-THU <1119708529@qq.com>
+Jo Liss <joliss42@gmail.com>
+Joe Todd <joe.todd@codeplay.com>
+Johan <jr.raffin@gmail.com>
+Johannes Gäßler <johannesg@5d6.de>
+John Balis <phobossystems@gmail.com>
+JohnnyB <jboero@users.noreply.github.com>
+Jonathan Soo <jcsoo@agora.com>
+Jonno <1160532+razodactyl@users.noreply.github.com>
+Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
+Jose <34888496+Jerry-Master@users.noreply.github.com>
+Josh Bleecher Snyder <josharian@gmail.com>
+Josscii <jossciiweiyi@gmail.com>
+Judd <foldl@users.noreply.github.com>
+Jumper775 <78500318+jumpers775@users.noreply.github.com>
+Jun Hee Yoo <contact.jhyoo@gmail.com>
+Junil Kim <logyourself@gmail.com>
+Justina Cho <justcho5@gmail.com>
+Justine Tunney <jtunney@gmail.com>
+Justine Tunney <jtunney@mozilla.com>
+KITAITI Makoto <KitaitiMakoto@gmail.com>
+KP Kaiser <kirk@zothcorp.com>
+Kamilake <exjang0@gmail.com>
+Karol Kontny <82021046+kkontny@users.noreply.github.com>
+Karthick <j.karthic2004@gmail.com>
+Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
+Kasumi <90275229+kasumi-1@users.noreply.github.com>
+Kawrakow <48489457+ikawrakow@users.noreply.github.com>
+Kendrick Taylor <kendrick@circuitsix.com>
+Kevin Brothaler <admin@digipom.com>
+Kevin Gibbons <bakkot@gmail.com>
+Konosuke Sakai <konosuke@konosuke.work>
+Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
+Kreijstal <rainb@tfwno.gf>
+Kylin <56434533+KyL0N@users.noreply.github.com>
+LBlue <153975653+lbluep@users.noreply.github.com>
+Larry Battle <larry.battle.tech@gmail.com>
+Laytan Laats <laytanlaats@hotmail.com>
+Leo Moll <leo.moll@yeasoft.com>
+Lexevolution <31176843+Lexevolution@users.noreply.github.com>
+LittleLoli <26589867+WhichWho@users.noreply.github.com>
+Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
+Luis Herrera <herrera-luis@users.noreply.github.com>
+Lukas Rist <glaslos@gmail.com>
+M. A. Ali <73258591+MightyStud@users.noreply.github.com>
+M. Eren Akbiyik <erenakbiyik@gmail.com>
+Ma Mingfei <mingfei.ma@intel.com>
+Maciek <maciek.mab122@gmail.com>
+Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
+Marcin Mielniczuk <marmistrz.dev@zoho.eu>
+Mark Karpelès <MagicalTux@users.noreply.github.com>
+Mark Zhuang <zhuangqiubin@gmail.com>
+Markus Tavenrath <mtavenrath@users.noreply.github.com>
+Martin Delille <martin@delille.org>
+Martin Warnaar <martinwarnaar@gmail.com>
+Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
+Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
+Mathieu Baudier <mbaudier@argeo.org>
+Mathijs de Bruin <mathijs@mathijsfietst.nl>
+Matija Pevec <mightymatth@users.noreply.github.com>
+Matt Stephenson <mstephenson6@users.noreply.github.com>
+Max Krasnyansky <max.krasnyansky@gmail.com>
+Max Krasnyansky <quic_maxk@quicinc.com>
+Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
+Meng, Hengyu <hengyu.meng@intel.com>
+Mengqing Cao <cmq0113@163.com>
+Michael Podvitskiy <podvitskiymichael@gmail.com>
+Michael Rienstra <mrienstra@gmail.com>
+Mikhail Grigorev <sleuthhound@gmail.com>
+Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
+Mohit Agarwal <mohit@sdf.org>
+Molly Sophia <mollysophia379@gmail.com>
+Murilo Santana <mvrilo@gmail.com>
+NETZkultur GmbH <mulholland@netzkultur.de>
+Natsu <chino@hotococoa.moe>
+Neil Chudleigh <nchudleigh@users.noreply.github.com>
+Neo Zhang <14088817+arthw@users.noreply.github.com>
+Neo Zhang Jianyu <jianyu.zhang@intel.com>
+Neuman Vong <neuman.vong@gmail.com>
+Nicholai Tukanov <nicholaitukanov@gmail.com>
+Nicholas Albion <nalbion@yahoo.com>
+Nico Bosshard <nico@bosshome.ch>
+Nicolò Scipione <nicolo.scipione@codeplay.com>
+Niels Mayer <Niels.Mayer@gmail.com>
+Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
+Nikolaj Olsson <nikse.dk@gmail.com>
+Okabintaro <103938900+Okabintaro@users.noreply.github.com>
+Oleg Sidorov <me@whitebox.io>
+Oleg Sidorov <oleg@sidorov.nl>
+Olivier Chafik <ochafik@users.noreply.github.com>
+Ondrej Kokes <ondrej.kokes@gmail.com>
+Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
+PAB <pierreantoine.bannier@gmail.com>
+Paul Tsochantaris <ptsochantaris@icloud.com>
+Pedro Probst <pprobst@insiberia.net>
+Peng <hzp1024@qq.com>
+Peter <peter277@users.noreply.github.com>
+Philipp Zabel <philipp.zabel@gmail.com>
+Philippe Normand <phil@base-art.net>
+Philippe Normand <philn@igalia.com>
+Plamen Minev <pacominev@gmail.com>
+Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
+Przemysław Pawełczyk <przemoc@gmail.com>
+Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
+R0CKSTAR <xiaodong.ye@mthreads.com>
+R0CKSTAR <yeahdongcn@gmail.com>
+Radoslav Gerganov <rgerganov@gmail.com>
+Radosław Gryta <radek.gryta@gmail.com>
+Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
+Raiya Araki <83504221+rai62@users.noreply.github.com>
+Reinforce-II <fate@eastal.com>
+Reinis Muiznieks <muiznieks.reinis@gmail.com>
+RelatedTitle <r3latedtitle@gmail.com>
+Rémy Oudompheng <oudomphe@phare.normalesup.org>
+RhinoDevel <RhinoDevel@users.noreply.github.com>
+Rich Jones <miserlou@gmail.com>
+Robert Ormandi <52251610+ormandi@users.noreply.github.com>
+Robin <robin.xw@hotmail.com>
+Roddur Dasgupta <roddurd@gmail.com>
+Roland Rabien <figbug@gmail.com>
+Romain Biessy <romain.biessy@codeplay.com>
+Ronsor <ronsor@ronsor.pw>
+Rotem Dan <rotemdan@gmail.com>
+Ryan Hitchman <hitchmanr@gmail.com>
+Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
+RyanChang <ftes90015@gmail.com>
+SRHMorris <69468379+SRHMorris@users.noreply.github.com>
+SXX <sxx1136965276@gmail.com>
+Sacha Arbonel <sacha.arbonel@hotmail.fr>
+Salman Faroz <stsfaroz@gmail.com>
+Salvatore Mesoraca <s.mesoraca16@gmail.com>
+Sam <49637763+Onlyartist9@users.noreply.github.com>
+Sam Pullara <spullara@gmail.com>
+Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
+Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
+Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
+Sergio López <slp@redhat.com>
+Sergio López <slp@sinrega.org>
+Shanshan Shen <467638484@qq.com>
+Shijie <821898965@qq.com>
+Shupei Fan <dymarkfan@outlook.com>
+Siddharth Ramakrishnan <srr2141@columbia.edu>
+Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
+Simon Moisselin <simon.moisstoll@gmail.com>
+Sindre Sorhus <sindresorhus@gmail.com>
+Slava Primenko <primenko.s@gmail.com>
+Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
+Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
+Stefan Sydow <s.sydow@heinlein-video.de>
+Stefan Sydow <stefan@sydow.email>
+Syahmi Azhar <prsyahmi@gmail.com>
+Syed Jafri <syedjafri97@gmail.com>
+Sơn Phan Trung <phantrungson17@gmail.com>
+Taisei Mima <bhbstar.me@gmail.com>
+Takeshi Inoue <inoue.takeshi@gmail.com>
+Tamotsu Takahashi <ttakah+github@gmail.com>
+Taras Glek <taras@thegp.com>
+Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
+Thamster <Thamster@users.noreply.github.com>
+Thijs Raymakers <thijs@raymakers.nl>
+Thomas Fitzsimmons <fitzsim@fitzsim.org>
+Tiago Fassoni <tiagofassoni@users.noreply.github.com>
+Tienshiao Ma <tienshiao@tienshiao.org>
+Tim Miller <drasticactions@users.noreply.github.com>
+Timothy Cronin <40186632+4imothy@users.noreply.github.com>
+Tobrun <tobrun.van.nuland@gmail.com>
+Todd <taf2@users.noreply.github.com>
+Toliver <teejae@gmail.com>
+Tong Li <31761981+litongjava@users.noreply.github.com>
+Tony Wasserka <4840017+neobrain@users.noreply.github.com>
+Topping1 <78745143+Topping1@users.noreply.github.com>
+Travis Cline <travis.cline@gmail.com>
+UEXTM.com <84163508+uextm@users.noreply.github.com>
+UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
+Vadim Peretokin <vperetokin@hey.com>
+Valentin Gosu <1454649+valenting@users.noreply.github.com>
+Vin Misra <vinith@alum.mit.edu>
+Vulcan <93451215+trholding@users.noreply.github.com>
+WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
+William Tambellini <william.tambellini@gmail.com>
+William Tambellini <wtambellini@sdl.com>
+Wilson Silva <wilson.dsigns@gmail.com>
+Xiang (Kevin) Li <kevinli020508@gmail.com>
+Xiao-Yong Jin <jinxiaoyong@gmail.com>
+XiaotaoChen <chenxiaotao1234@gmail.com>
+Xingchen Song(宋星辰) <xingchensong1996@163.com>
+Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
+Xuan Son Nguyen <thichthat@gmail.com>
+Yajing Tang <phillis@google.com>
+Yang Shen <aplshenyang@gmail.com>
+Yunès <jean.baptiste.yunes@free.fr>
+Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
+Yusuf Redžić <48274562+redzic@users.noreply.github.com>
+ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
+Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
+Zhiyuan Li <lizhiyuan@uniartisan.com>
+Zhiyuan Li <uniartisan2017@gmail.com>
+Zigfrid Zvezdin <ziggerZZ@gmail.com>
+Zollner <24618122+Zolliner@users.noreply.github.com>
+a3sh <38979186+A3shTnT@users.noreply.github.com>
+ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
+agray3 <agray3@users.noreply.github.com>
+ai-at-home <149282006+ai-at-home@users.noreply.github.com>
+aldorof <aldorof@users.noreply.github.com>
+alonfaraj <alonfaraj@gmail.com>
+amd-dwang <dong.wang@amd.com>
+amritahs-ibm <amritahs@linux.vnet.ibm.com>
+andypayne <apayne@gmail.com>
+ardfork <134447697+ardfork@users.noreply.github.com>
+arizhih <40765267+arizhih@users.noreply.github.com>
+automaticcat <daogiatuank54@gmail.com>
+bandoti <141645996+bandoti@users.noreply.github.com>
+be-next <jerome.ramette@gmail.com>
+bert hubert <bert@hubertnet.nl>
+billyct <billy_allen@126.com>
+bmwl <brian.marshall@tolko.com>
+bobqianic <129547291+bobqianic@users.noreply.github.com>
+bocytko <bocytko+github@gmail.com>
+boolemancer <48014766+boolemancer@users.noreply.github.com>
+boolemancer <boolemancer@gmail.com>
+bradmit <151883577+bradmit@users.noreply.github.com>
+brunofaustino <b.fa.amorim@gmail.com>
+bssrdf <merlintiger@hotmail.com>
+byte-6174 <88070277+byte-6174@users.noreply.github.com>
+cdosoftei <ciprian.dosoftei@gmail.com>
+clach04 <Chris.Clark@actian.com>
+compilade <113953597+compilade@users.noreply.github.com>
+compilade <git@compilade.net>
+conradg <conradjgodfrey@gmail.com>
+crummyh <elijah@crums.us>
+ddpasa <112642920+ddpasa@users.noreply.github.com>
+denersc <denerstassun@gmail.com>
+dscripka <dscripka@users.noreply.github.com>
+duthils <duthils@duthils.net>
+ecneladis <ecneladis@users.noreply.github.com>
+faker <nspyia2002@gmail.com>
+fitzsim <fitzsim@fitzsim.org>
+fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
+fraxy-v <65565042+fraxy-v@users.noreply.github.com>
+genevera (she/her) <genevera@users.noreply.github.com>
+geniusnut <geniusnut@gmail.com>
+gilbertgong <gilbert.gong@gmail.com>
+gn64 <yukikaze.jp@gmail.com>
+goldwaving <77494627+goldwaving@users.noreply.github.com>
+greeshmay <greeshmay@gmail.com>
+haopeng <657407891@qq.com>
+hipudding <huafengchun@gmail.com>
+hsinhoyeh <yhh92u@gmail.com>
+hydai <z54981220@gmail.com>
+iamthad <thadeus.j.fleming@gmail.com>
+issixx <46835150+issixx@users.noreply.github.com>
+james wolf <contractorwolf@hotmail.com>
+jdomke <28772296+jdomke@users.noreply.github.com>
+jettoblack <jettoblack@gmail.com>
+jiez <373447296@qq.com>
+joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
+jorismertz <35079666+jorismertz@users.noreply.github.com>
+junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
+junkfood <69683722+JunkFood02@users.noreply.github.com>
+jwijffels <jwijffels@bnosac.be>
+k.h.lai <adrian.k.h.lai@outlook.com>
+kamranjon <kamranjon@gmail.com>
+katsu560 <katsu560oo-@docomo.ne.jp>
+kennethge <57784063+kenneth-ge@users.noreply.github.com>
+keyehzy <msamuel@aluno.puc-rio.br>
+kunnis <kunnis@users.noreply.github.com>
+l3utterfly <gc.pthzfoldr@gmail.com>
+leejet <leejet714@gmail.com>
+leo-pony <nengjunma@outlook.com>
+lhez <quic_lih@quicinc.com>
+litong <31761981+litongjava@users.noreply.github.com>
+liuwei-git <14815172+liuwei-git@users.noreply.github.com>
+lnyan <lkwq007@gmail.com>
+luoyu-intel <yu.luo@intel.com>
+m.bell <m.bell@techsmith.com>
+mahorozte <41834471+mahorozte@users.noreply.github.com>
+mashizora <30516315+mashizora@users.noreply.github.com>
+matt23654 <matthew.webber@protonmail.com>
+matteo <matteogeniaccio@yahoo.it>
+mgrachten <maarten@grachten.eu>
+mkiol <mkiol@users.noreply.github.com>
+mky_coder <47767389+mkycoder@users.noreply.github.com>
+novag <7754358+novag@users.noreply.github.com>
+pajowu <pajowu@pajowu.de>
+pengxin99 <pengxin.yuan@intel.com>
+petterreinholdtsen <pere-github@hungry.com>
+polarmoon <90010972+polarmoon@users.noreply.github.com>
+rlapray <lapray.romain@gmail.com>
+sandrohanea <40202887+sandrohanea@users.noreply.github.com>
+semiformal-net <84111142+semiformal-net@users.noreply.github.com>
+shibukazu <61775791+shibukazu@users.noreply.github.com>
+shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
+slaren <slarengh@gmail.com>
+slashlib <slashlib@users.noreply.github.com>
+snadampal <87143774+snadampal@users.noreply.github.com>
+someone13574 <81528246+someone13574@users.noreply.github.com>
+st-gr <38470677+st-gr@users.noreply.github.com>
+stduhpf <stephduh@live.fr>
+stormofice <58337328+stormofice@users.noreply.github.com>
+texmex76 <40733439+texmex76@users.noreply.github.com>
+thefinaldegree <thefinaldegree@gmail.com>
+thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
+toboil-features <160222185+toboil-features@users.noreply.github.com>
+trixirt <trix@redhat.com>
+ulatekh <ulatekh@yahoo.com>
+undef <undefdev@gmail.com>
+uvos <devnull@uvos.xyz>
+uvos <philipp@uvos.xyz>
+valVk <valVk@users.noreply.github.com>
+venkr <venkateshrameshkumar+1@gmail.com>
+vicalloy <zbirder@gmail.com>
+wangshuai09 <391746016@qq.com>
+woachk <24752637+woachk@users.noreply.github.com>
+xctan <axunlei@gmail.com>
+xdrudis <xavierdrudis@yahoo.es>
+yuri@FreeBSD <yuri@FreeBSD>
+zhangjixiong <code.zjx@gmail.com>
+zhentaoyu <zhentao.yu@intel.com>
+zhouwg <6889919+zhouwg@users.noreply.github.com>
+zhouwg <zhouwg2000@gmail.com>
+谢乃闻 <sienaiwun@users.noreply.github.com>
+布客飞龙 <562826179@qq.com>
+Артём Земляк <azemlyak@smart-consulting.ru>
--- a/packages/app-mobile/android/vendor/whisper.cpp/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/CMakeLists.txt
@@ -0,0 +1,185 @@
+cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
+project("whisper.cpp" C CXX)
+project("whisper.cpp" VERSION 1.7.4)
+include(CheckIncludeFileCXX)
+
+set(SOVERSION 1)
+
+#set(CMAKE_WARN_DEPRECATED YES)
+set(CMAKE_WARN_UNUSED_CLI YES)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
+# Add path to modules
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(WHISPER_STANDALONE ON)
+
+    include(git-vars)
+
+    # configure project version
+    configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
+else()
+    set(WHISPER_STANDALONE OFF)
+endif()
+
+if (EMSCRIPTEN)
+    set(BUILD_SHARED_LIBS_DEFAULT OFF)
+
+    option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
+
+    # TODO: without these, we get the following error:
+    #       wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
+    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
+else()
+    if (MINGW)
+        set(BUILD_SHARED_LIBS_DEFAULT OFF)
+    else()
+        set(BUILD_SHARED_LIBS_DEFAULT ON)
+    endif()
+endif()
+
+option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
+
+#
+# option list
+#
+
+# general
+option(WHISPER_CCACHE "whisper: use ccache if available" ON)
+
+# debug
+option(WHISPER_ALL_WARNINGS           "whisper: enable all compiler warnings"                   ON)
+option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
+
+# build
+option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
+
+# sanitizers
+option(WHISPER_SANITIZE_THREAD    "whisper: enable thread sanitizer"    OFF)
+option(WHISPER_SANITIZE_ADDRESS   "whisper: enable address sanitizer"   OFF)
+option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
+
+# extra artifacts
+option(WHISPER_BUILD_TESTS    "whisper: build tests"          ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_EXAMPLES "whisper: build examples"       ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_SERVER   "whisper: build server example" ${WHISPER_STANDALONE})
+
+# 3rd party libs
+option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
+option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
+
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
+endif()
+
+option(WHISPER_COREML                "whisper: enable Core ML framework"  OFF)
+option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
+option(WHISPER_OPENVINO              "whisper: support for OpenVINO"      OFF)
+
+# Required for relocatable CMake package
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
+
+# override ggml options
+set(GGML_CCACHE             ${WHISPER_CCACHE})
+set(GGML_SANITIZE_THREAD    ${WHISPER_SANITIZE_THREAD})
+set(GGML_SANITIZE_ADDRESS   ${WHISPER_SANITIZE_ADDRESS})
+set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
+set(GGML_ALL_WARNINGS       ${WHISPER_ALL_WARNINGS})
+set(GGML_FATAL_WARNINGS     ${WHISPER_FATAL_WARNINGS})
+
+# transition helpers
+function (whisper_option_depr TYPE OLD NEW)
+    if (${OLD})
+        message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
+        set(${NEW} ON)
+    endif()
+endfunction()
+
+whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS              GGML_CUDA)
+whisper_option_depr(WARNING     WHISPER_CUDA                GGML_CUDA)
+whisper_option_depr(WARNING     WHISPER_KOMPUTE             GGML_KOMPUTE)
+whisper_option_depr(WARNING     WHISPER_METAL               GGML_METAL)
+whisper_option_depr(WARNING     WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
+whisper_option_depr(WARNING     WHISPER_NATIVE              GGML_NATIVE)
+whisper_option_depr(WARNING     WHISPER_OPENMP              GGML_OPENMP)
+whisper_option_depr(WARNING     WHISPER_RPC                 GGML_RPC)
+whisper_option_depr(WARNING     WHISPER_SYCL                GGML_SYCL)
+whisper_option_depr(WARNING     WHISPER_SYCL_F16            GGML_SYCL_F16)
+
+#
+# build the library
+#
+
+if (NOT TARGET ggml)
+    add_subdirectory(ggml)
+    # ... otherwise assume ggml is added by a parent CMakeLists.txt
+endif()
+add_subdirectory(src)
+
+#
+# install
+#
+
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+set(WHISPER_BUILD_NUMBER        ${BUILD_NUMBER})
+set(WHISPER_BUILD_COMMIT        ${BUILD_COMMIT})
+set(WHISPER_INSTALL_VERSION     ${CMAKE_PROJECT_VERSION})
+
+set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files")
+set(WHISPER_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
+set(WHISPER_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
+
+get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
+
+set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
+install(TARGETS whisper LIBRARY PUBLIC_HEADER)
+
+configure_package_config_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
+    PATH_VARS
+    WHISPER_INCLUDE_INSTALL_DIR
+    WHISPER_LIB_INSTALL_DIR
+    WHISPER_BIN_INSTALL_DIR )
+
+write_basic_package_version_file(
+    ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
+    VERSION ${WHISPER_INSTALL_VERSION}
+    COMPATIBILITY SameMajorVersion)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
+              ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
+
+configure_file(cmake/whisper.pc.in
+        "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
+        @ONLY)
+
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
+        DESTINATION lib/pkgconfig)
+
+#
+# programs, examples and tests
+#
+
+if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+    #include(CTest)
+    #add_subdirectory(tests)
+endif ()
+
+if (WHISPER_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/LICENSE
+++ b/packages/app-mobile/android/vendor/whisper.cpp/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/packages/app-mobile/android/vendor/whisper.cpp/Package.swift
+++ b/packages/app-mobile/android/vendor/whisper.cpp/Package.swift
@@ -0,0 +1,19 @@
+// swift-tools-version:5.5
+
+import PackageDescription
+
+let package = Package(
+    name: "whisper",
+    platforms: [
+        .macOS(.v12),
+        .iOS(.v14),
+        .watchOS(.v4),
+        .tvOS(.v14)
+    ],
+    products: [
+        .library(name: "whisper", targets: ["whisper"]),
+    ],
+    targets: [
+        .systemLibrary(name: "whisper", pkgConfig: "whisper"),
+    ]
+)
--- a/packages/app-mobile/android/vendor/whisper.cpp/README.md
+++ b/packages/app-mobile/android/vendor/whisper.cpp/README.md
@@ -0,0 +1,679 @@
+# whisper.cpp
+
+![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
+
+[![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
+[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
+
+> [!NOTE]
+> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788
+
+Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+
+High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
+
+- Plain C/C++ implementation without dependencies
+- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
+- AVX intrinsics support for x86 architectures
+- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
+- Mixed F16 / F32 precision
+- [Integer quantization support](#quantization)
+- Zero memory allocations at runtime
+- [Vulkan support](#vulkan-gpu-support)
+- Support for CPU-only inference
+- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
+- [OpenVINO Support](#openvino-support)
+- [Ascend NPU Support](#ascend-npu-support)
+- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
+
+Supported platforms:
+
+- [x] Mac OS (Intel and Arm)
+- [x] [iOS](examples/whisper.objc)
+- [x] [Android](examples/whisper.android)
+- [x] [Java](bindings/java/README.md)
+- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
+- [x] [WebAssembly](examples/whisper.wasm)
+- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
+- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
+- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
+
+The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
+The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
+
+Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
+As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
+
+https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
+
+You can also easily make your own offline voice assistant application: [command](examples/command)
+
+https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
+
+On Apple Silicon, the inference runs fully on the GPU via Metal:
+
+https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
+
+## Quick start
+
+First clone the repository:
+
+```bash
+git clone https://github.com/ggerganov/whisper.cpp.git
+```
+
+Navigate into the directory:
+
+```
+cd whisper.cpp
+```
+
+Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
+
+```bash
+sh ./models/download-ggml-model.sh base.en
+```
+
+Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
+
+```bash
+# build the project
+cmake -B build
+cmake --build build --config Release
+
+# transcribe an audio file
+./build/bin/whisper-cli -f samples/jfk.wav
+```
+
+---
+
+For a quick demo, simply run `make base.en`.
+
+The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
+
+For detailed usage instructions, run: `./build/bin/whisper-cli -h`
+
+Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
+For example, you can use `ffmpeg` like this:
+
+```bash
+ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
+```
+
+## More audio samples
+
+If you want some extra audio samples to play with, simply run:
+
+```
+make -j samples
+```
+
+This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
+
+You can download and run the other models as follows:
+
+```
+make -j tiny.en
+make -j tiny
+make -j base.en
+make -j base
+make -j small.en
+make -j small
+make -j medium.en
+make -j medium
+make -j large-v1
+make -j large-v2
+make -j large-v3
+make -j large-v3-turbo
+```
+
+## Memory usage
+
+| Model  | Disk    | Mem     |
+| ------ | ------- | ------- |
+| tiny   | 75 MiB  | ~273 MB |
+| base   | 142 MiB | ~388 MB |
+| small  | 466 MiB | ~852 MB |
+| medium | 1.5 GiB | ~2.1 GB |
+| large  | 2.9 GiB | ~3.9 GB |
+
+## POWER VSX Intrinsics
+
+`whisper.cpp` supports POWER architectures and includes code which
+significantly speeds operation on Linux running on POWER9/10, making it
+capable of faster-than-realtime transcription on underclocked Raptor
+Talos II. Ensure you have a BLAS package installed, and replace the
+standard cmake setup with:
+
+```bash
+# build with GGML_BLAS defined
+cmake -B build -DGGML_BLAS=1
+cmake --build build --config Release
+./build/bin/whisper-cli [ .. etc .. ]
+
+## Quantization
+
+`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
+Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
+
+Here are the steps for creating and using a quantized model:
+
+```bash
+# quantize a model with Q5_0 method
+cmake -B build
+cmake --build build --config Release
+./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
+
+# run the examples as usual, specifying the quantized model file
+./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
+```
+
+## Core ML support
+
+On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
+speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
+
+- Install Python dependencies needed for the creation of the Core ML model:
+
+  ```bash
+  pip install ane_transformers
+  pip install openai-whisper
+  pip install coremltools
+  ```
+
+  - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
+  - Python 3.10 is recommended.
+  - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
+  - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
+    - To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
+    - To activate the environment, use: `conda activate py310-whisper`
+
+- Generate a Core ML model. For example, to generate a `base.en` model, use:
+
+  ```bash
+  ./models/generate-coreml-model.sh base.en
+  ```
+
+  This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
+
+- Build `whisper.cpp` with Core ML support:
+
+  ```bash
+  # using CMake
+  cmake -B build -DWHISPER_COREML=1
+  cmake --build build -j --config Release
+  ```
+
+- Run the examples as usual. For example:
+
+  ```text
+  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
+
+  ...
+
+  whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
+  whisper_init_state: first run on a device may take a while ...
+  whisper_init_state: Core ML model loaded
+
+  system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
+
+  ...
+  ```
+
+  The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
+  Next runs are faster.
+
+For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
+
+## OpenVINO support
+
+On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
+on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
+
+This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
+
+- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
+
+  Windows:
+
+  ```powershell
+  cd models
+  python -m venv openvino_conv_env
+  openvino_conv_env\Scripts\activate
+  python -m pip install --upgrade pip
+  pip install -r requirements-openvino.txt
+  ```
+
+  Linux and macOS:
+
+  ```bash
+  cd models
+  python3 -m venv openvino_conv_env
+  source openvino_conv_env/bin/activate
+  python -m pip install --upgrade pip
+  pip install -r requirements-openvino.txt
+  ```
+
+- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
+
+  ```
+  python convert-whisper-to-openvino.py --model base.en
+  ```
+
+  This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
+  is the default location that the OpenVINO extension will search at runtime.
+
+- Build `whisper.cpp` with OpenVINO support:
+
+  Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
+
+  After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
+
+  Linux:
+
+  ```bash
+  source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
+  ```
+
+  Windows (cmd):
+
+  ```powershell
+  C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
+  ```
+
+  And then build the project using cmake:
+
+  ```bash
+  cmake -B build -DWHISPER_OPENVINO=1
+  cmake --build build -j --config Release
+  ```
+
+- Run the examples as usual. For example:
+
+  ```text
+  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
+
+  ...
+
+  whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
+  whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
+  whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
+  whisper_ctx_init_openvino_encoder: OpenVINO model loaded
+
+  system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
+
+  ...
+  ```
+
+  The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
+  cached for the next run.
+
+For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
+
+## NVIDIA GPU support
+
+With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
+First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
+
+Now build `whisper.cpp` with CUDA support:
+
+```
+cmake -B build -DGGML_CUDA=1
+cmake --build build -j --config Release
+```
+
+## Vulkan GPU support
+Cross-vendor solution which allows you to accelerate workload on your GPU.
+First, make sure your graphics card driver provides support for Vulkan API.
+
+Now build `whisper.cpp` with Vulkan support:
+```
+cmake -B build -DGGML_VULKAN=1
+cmake --build build -j --config Release
+```
+
+## BLAS CPU support via OpenBLAS
+
+Encoder processing can be accelerated on the CPU via OpenBLAS.
+First, make sure you have installed `openblas`: https://www.openblas.net/
+
+Now build `whisper.cpp` with OpenBLAS support:
+
+```
+cmake -B build -DGGML_BLAS=1
+cmake --build build -j --config Release
+```
+
+## Ascend NPU support
+
+Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
+
+First, check if your Ascend NPU device is supported:
+
+**Verified devices**
+| Ascend NPU                    | Status  |
+|:-----------------------------:|:-------:|
+| Atlas 300T A2                 | Support |
+
+Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
+
+Now build `whisper.cpp` with CANN support:
+
+```
+cmake -B build -DGGML_CANN=1
+cmake --build build -j --config Release
+```
+
+Run the inference examples as usual, for example:
+
+```
+./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
+```
+
+*Notes:*
+
+- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
+- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
+
+## Docker
+
+### Prerequisites
+
+- Docker must be installed and running on your system.
+- Create a folder to store big models & intermediate files (ex. /whisper/models)
+
+### Images
+
+We have two Docker images available for this project:
+
+1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
+2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+
+### Usage
+
+```shell
+# download model and persist it in a local folder
+docker run -it --rm \
+  -v path/to/models:/models \
+  whisper.cpp:main "./models/download-ggml-model.sh base /models"
+# transcribe an audio file
+docker run -it --rm \
+  -v path/to/models:/models \
+  -v path/to/audios:/audios \
+  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+# transcribe an audio file in samples folder
+docker run -it --rm \
+  -v path/to/models:/models \
+  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+```
+
+## Installing with Conan
+
+You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
+
+```
+conan install --requires="whisper-cpp/[*]" --build=missing
+```
+
+For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
+
+## Limitations
+
+- Inference only
+
+## Real-time audio input example
+
+This is a naive example of performing real-time inference on audio from your microphone.
+The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
+More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
+
+```bash
+cmake -B build -DWHISPER_SDL2=ON
+cmake --build build --config Release
+./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+```
+
+https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+
+## Confidence color-coding
+
+Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
+to highlight words with high or low confidence:
+
+```bash
+./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
+```
+
+<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
+
+## Controlling the length of the generated text segments (experimental)
+
+For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
+
+```text
+$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
+
+whisper_model_load: loading model from './models/ggml-base.en.bin'
+...
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
+
+main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+
+[00:00:00.000 --> 00:00:00.850]   And so my
+[00:00:00.850 --> 00:00:01.590]   fellow
+[00:00:01.590 --> 00:00:04.140]   Americans, ask
+[00:00:04.140 --> 00:00:05.660]   not what your
+[00:00:05.660 --> 00:00:06.840]   country can do
+[00:00:06.840 --> 00:00:08.430]   for you, ask
+[00:00:08.430 --> 00:00:09.440]   what you can do
+[00:00:09.440 --> 00:00:10.020]   for your
+[00:00:10.020 --> 00:00:11.000]   country.
+```
+
+## Word-level timestamp (experimental)
+
+The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
+
+```text
+$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
+
+whisper_model_load: loading model from './models/ggml-base.en.bin'
+...
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
+
+main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+
+[00:00:00.000 --> 00:00:00.320]
+[00:00:00.320 --> 00:00:00.370]   And
+[00:00:00.370 --> 00:00:00.690]   so
+[00:00:00.690 --> 00:00:00.850]   my
+[00:00:00.850 --> 00:00:01.590]   fellow
+[00:00:01.590 --> 00:00:02.850]   Americans
+[00:00:02.850 --> 00:00:03.300]  ,
+[00:00:03.300 --> 00:00:04.140]   ask
+[00:00:04.140 --> 00:00:04.990]   not
+[00:00:04.990 --> 00:00:05.410]   what
+[00:00:05.410 --> 00:00:05.660]   your
+[00:00:05.660 --> 00:00:06.260]   country
+[00:00:06.260 --> 00:00:06.600]   can
+[00:00:06.600 --> 00:00:06.840]   do
+[00:00:06.840 --> 00:00:07.010]   for
+[00:00:07.010 --> 00:00:08.170]   you
+[00:00:08.170 --> 00:00:08.190]  ,
+[00:00:08.190 --> 00:00:08.430]   ask
+[00:00:08.430 --> 00:00:08.910]   what
+[00:00:08.910 --> 00:00:09.040]   you
+[00:00:09.040 --> 00:00:09.320]   can
+[00:00:09.320 --> 00:00:09.440]   do
+[00:00:09.440 --> 00:00:09.760]   for
+[00:00:09.760 --> 00:00:10.020]   your
+[00:00:10.020 --> 00:00:10.510]   country
+[00:00:10.510 --> 00:00:11.000]  .
+```
+
+## Speaker segmentation via tinydiarize (experimental)
+
+More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
+
+Sample usage:
+
+```py
+# download a tinydiarize compatible model
+./models/download-ggml-model.sh small.en-tdrz
+
+# run as usual, adding the "-tdrz" command-line argument
+./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
+...
+main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
+...
+[00:00:00.000 --> 00:00:03.800]   Okay Houston, we've had a problem here. [SPEAKER_TURN]
+[00:00:03.800 --> 00:00:06.200]   This is Houston. Say again please. [SPEAKER_TURN]
+[00:00:06.200 --> 00:00:08.260]   Uh Houston we've had a problem.
+[00:00:08.260 --> 00:00:11.320]   We've had a main beam up on a volt. [SPEAKER_TURN]
+[00:00:11.320 --> 00:00:13.820]   Roger main beam interval. [SPEAKER_TURN]
+[00:00:13.820 --> 00:00:15.100]   Uh uh [SPEAKER_TURN]
+[00:00:15.100 --> 00:00:18.020]   So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
+[00:00:18.020 --> 00:00:25.740]   Okay uh right now uh Houston the uh voltage is uh is looking good um.
+[00:00:27.620 --> 00:00:29.940]   And we had a a pretty large bank or so.
+```
+
+## Karaoke-style movie generation (experimental)
+
+The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
+currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
+This requires to have `ffmpeg` installed.
+
+Here are a few _"typical"_ examples:
+
+```bash
+./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
+source ./samples/jfk.wav.wts
+ffplay ./samples/jfk.wav.mp4
+```
+
+https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
+
+---
+
+```bash
+./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
+source ./samples/mm0.wav.wts
+ffplay ./samples/mm0.wav.mp4
+```
+
+https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
+
+---
+
+```bash
+./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
+source ./samples/gb0.wav.wts
+ffplay ./samples/gb0.wav.mp4
+```
+
+https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
+
+---
+
+## Video comparison of different models
+
+Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
+
+```bash
+./scripts/bench-wts.sh samples/jfk.wav
+ffplay ./samples/jfk.wav.all.mp4
+```
+
+https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
+
+---
+
+## Benchmarks
+
+In order to have an objective comparison of the performance of the inference across different system configurations,
+use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
+took to execute it. The results are summarized in the following Github issue:
+
+[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
+
+Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
+
+You can run it with the following command, by default it will run against any standard model in the models folder.
+
+```bash
+python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
+```
+
+It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
+
+It outputs a csv file with the results of the benchmarking.
+
+## `ggml` format
+
+The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
+
+- model parameters
+- mel filters
+- vocabulary
+- weights
+
+You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
+or manually from here:
+
+- https://huggingface.co/ggerganov/whisper.cpp
+- https://ggml.ggerganov.com
+
+For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
+
+## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
+
+- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
+- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
+  - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
+- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
+- [x] Java:
+  - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
+- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
+- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
+  - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
+- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
+  - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
+  - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
+- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
+  - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
+  - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
+  - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
+  - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
+- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
+- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
+
+## Examples
+
+There are various examples of using the library for different projects in the [examples](examples) folder.
+Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
+
+| Example                                             | Web                                   | Description                                                                                                                     |
+| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
+| [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
+| [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
+| [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
+| [whisper-server](examples/server)                   |                                       | HTTP transcription server with OAI-like API                                                                                     |
+| [whisper-talk-llama](examples/talk-llama)           |                                       | Talk with a LLaMA bot                                                                                                           |
+| [whisper.objc](examples/whisper.objc)               |                                       | iOS mobile application using whisper.cpp                                                                                        |
+| [whisper.swiftui](examples/whisper.swiftui)         |                                       | SwiftUI iOS / macOS application using whisper.cpp                                                                               |
+| [whisper.android](examples/whisper.android)         |                                       | Android mobile application using whisper.cpp                                                                                    |
+| [whisper.nvim](examples/whisper.nvim)               |                                       | Speech-to-text plugin for Neovim                                                                                                |
+| [generate-karaoke.sh](examples/generate-karaoke.sh) |                                       | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture                           |
+| [livestream.sh](examples/livestream.sh)             |                                       | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185)                                           |
+| [yt-wsp.sh](examples/yt-wsp.sh)                     |                                       | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
+| [wchess](examples/wchess)                           | [wchess.wasm](examples/wchess)        | Voice-controlled chess                                                                                                          |
+
+## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
+
+If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
+You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
+to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
+[Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
--- a/packages/app-mobile/android/vendor/whisper.cpp/README_sycl.md
+++ b/packages/app-mobile/android/vendor/whisper.cpp/README_sycl.md
@@ -0,0 +1,249 @@
+# whisper.cpp for SYCL
+
+[Background](#background)
+
+[OS](#os)
+
+[Intel GPU](#intel-gpu)
+
+[Linux](#linux)
+
+[Environment Variable](#environment-variable)
+
+[Known Issue](#known-issue)
+
+[Todo](#todo)
+
+## Background
+
+SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
+
+oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
+
+Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
+
+To avoid  re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
+
+The whisper.cpp for SYCL is used to support Intel GPUs.
+
+For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
+
+## OS
+
+|OS|Status|Verified|
+|-|-|-|
+|Linux|Support|Ubuntu 22.04|
+|Windows|Ongoing| |
+
+
+## Intel GPU
+
+|Intel GPU| Status | Verified Model|
+|-|-|-|
+|Intel Data Center Max Series| Support| Max 1550|
+|Intel Data Center Flex Series| Support| Flex 170|
+|Intel Arc Series| Support| Arc 770|
+|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
+|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
+
+
+## Linux
+
+### Setup Environment
+
+1. Install Intel GPU driver.
+
+a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
+
+Note: for iGPU, please install the client GPU driver.
+
+b. Add user to group: video, render.
+
+```
+sudo usermod -aG render username
+sudo usermod -aG video username
+```
+
+Note: re-login to enable it.
+
+c. Check
+
+```
+sudo apt install clinfo
+sudo clinfo -l
+```
+
+Output (example):
+
+```
+Platform #0: Intel(R) OpenCL Graphics
+ `-- Device #0: Intel(R) Arc(TM) A770 Graphics
+
+
+Platform #0: Intel(R) OpenCL HD Graphics
+ `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
+```
+
+2. Install Intel� oneAPI Base toolkit.
+
+
+a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
+
+Recommend to install to default folder: **/opt/intel/oneapi**.
+
+Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
+
+b. Check
+
+```
+source /opt/intel/oneapi/setvars.sh
+
+sycl-ls
+```
+
+There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
+
+Output (example):
+```
+[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000]
+[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
+[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [23.30.26918.50]
+[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
+
+```
+
+2. Build locally:
+
+```
+mkdir -p build
+cd build
+source /opt/intel/oneapi/setvars.sh
+
+#for FP16
+#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON 
+
+#for FP32
+cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+#build example/main only
+#cmake --build . --config Release --target main
+
+#build all binary
+cmake --build . --config Release -v
+
+```
+
+or
+
+```
+./examples/sycl/build.sh
+```
+
+Note:
+
+- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
+
+### Run
+
+1. Put model file to folder **models**
+
+2. Enable oneAPI running environment
+
+```
+source /opt/intel/oneapi/setvars.sh
+```
+
+3. List device ID
+
+Run without parameter:
+
+```
+./build/bin/ls-sycl-device
+
+or
+
+./build/bin/main
+```
+
+Check the ID in startup log, like:
+
+```
+found 4 SYCL devices:
+  Device 0: Intel(R) Arc(TM) A770 Graphics,	compute capability 1.3,
+    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
+  Device 1: Intel(R) FPGA Emulation Device,	compute capability 1.2,
+    max compute_units 24,	max work group size 67108864,	max sub group size 64,	global mem size 67065057280
+  Device 2: 13th Gen Intel(R) Core(TM) i7-13700K,	compute capability 3.0,
+    max compute_units 24,	max work group size 8192,	max sub group size 64,	global mem size 67065057280
+  Device 3: Intel(R) Arc(TM) A770 Graphics,	compute capability 3.0,
+    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
+
+```
+
+|Attribute|Note|
+|-|-|
+|compute capability 1.3|Level-zero running time, recommended |
+|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
+
+4. Set device ID and execute whisper.cpp
+
+Set device ID = 0 by **GGML_SYCL_DEVICE=0**
+
+```
+GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
+```
+or run by script:
+
+```
+./examples/sycl/run_whisper.sh
+```
+
+
+
+5. Check the device ID in output
+
+Like:
+```
+Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
+```
+
+
+## Environment Variable
+
+#### Build
+
+|Name|Value|Function|
+|-|-|-|
+|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
+|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
+|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
+|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
+
+#### Running
+
+
+|Name|Value|Function|
+|-|-|-|
+|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
+|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
+
+## Known Issue
+
+- Error:  `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
+
+  Miss to enable oneAPI running environment.
+
+  Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
+
+
+- Hang during startup
+
+  llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
+
+  Solution: add **--no-mmap**.
+
+## Todo
+
+- Support to build in Windows.
+
+- Support multiple cards.
--- a/packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
+++ b/packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
@@ -0,0 +1,5 @@
+module whisper [system] {
+    header "whisper.h"
+    link "whisper"
+    export *
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/whisper.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/whisper.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include <whisper.h>
+
--- a/packages/app-mobile/android/vendor/whisper.cpp/close-issue.yml
+++ b/packages/app-mobile/android/vendor/whisper.cpp/close-issue.yml
@@ -0,0 +1,28 @@
+name: Close inactive issues
+on:
+  schedule:
+    - cron: "42 0 * * *"
+
+# Fine-grant permission
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
+permissions:
+  issues: write
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v5
+        with:
+          exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
+          days-before-issue-stale: 30
+          days-before-issue-close: 14
+          stale-issue-label: "stale"
+          close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
+          days-before-pr-stale: -1
+          days-before-pr-close: -1
+          operations-per-run: 10000
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
@@ -0,0 +1,16 @@
+# Set the default compile features and properties for a target.
+
+if (NOT TARGET)
+    message(FATAL_ERROR "TARGET not set before including DefaultTargetOptions")
+endif()
+
+target_compile_features(${TARGET}
+    PRIVATE
+        cxx_std_11
+    )
+
+set_target_properties(${TARGET}
+    PROPERTIES
+        EXPORT_COMPILE_COMMANDS ON
+        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
+)
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/FindFFmpeg.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/FindFFmpeg.cmake
@@ -0,0 +1,163 @@
+# From
+# https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake
+#
+# vim: ts=2 sw=2
+# - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC)
+#
+# Once done this will define
+#  FFMPEG_FOUND         - System has the all required components.
+#  FFMPEG_INCLUDE_DIRS  - Include directory necessary for using the required components headers.
+#  FFMPEG_LIBRARIES     - Link these to use the required ffmpeg components.
+#  FFMPEG_DEFINITIONS   - Compiler switches required for using the required ffmpeg components.
+#
+# For each of the components it will additionally set.
+#   - AVCODEC
+#   - AVDEVICE
+#   - AVFORMAT
+#   - AVFILTER
+#   - AVUTIL
+#   - POSTPROC
+#   - SWSCALE
+# the following variables will be defined
+#  <component>_FOUND        - System has <component>
+#  <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers
+#  <component>_LIBRARIES    - Link these to use <component>
+#  <component>_DEFINITIONS  - Compiler switches required for using <component>
+#  <component>_VERSION      - The components version
+#
+# Copyright (c) 2006, Matthias Kretz, <kretz@kde.org>
+# Copyright (c) 2008, Alexander Neundorf, <neundorf@kde.org>
+# Copyright (c) 2011, Michael Jansen, <kde@michael-jansen.biz>
+#
+# Redistribution and use is allowed according to the terms of the BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+
+include(FindPackageHandleStandardArgs)
+
+# The default components were taken from a survey over other FindFFMPEG.cmake files
+if (NOT FFmpeg_FIND_COMPONENTS)
+  set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE)
+endif()
+
+#
+### Macro: set_component_found
+#
+# Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present.
+#
+macro(set_component_found _component )
+  if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS)
+    message(DEBUG "  - ${_component} found.")
+    set(${_component}_FOUND TRUE)
+  else ()
+  message(DEBUG "  - ${_component} not found.")
+  endif ()
+endmacro()
+
+#
+### Macro: find_component
+#
+# Checks for the given component by invoking pkgconfig and then looking up the libraries and
+# include directories.
+#
+macro(find_component _component _pkgconfig _library _header)
+
+  if (NOT WIN32)
+     # use pkg-config to get the directories and then use these values
+     # in the FIND_PATH() and FIND_LIBRARY() calls
+     find_package(PkgConfig)
+     if (PKG_CONFIG_FOUND)
+       pkg_check_modules(PC_${_component} ${_pkgconfig})
+       message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}")
+       message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}")
+       message(STATUS "${PC_${_component}_CFLAGS}")
+     endif ()
+  endif (NOT WIN32)
+
+
+  find_path(${_component}_INCLUDE_DIRS ${_header}
+    HINTS
+      ${PC_${_component}_INCLUDEDIR}
+      ${PC_${_component}_INCLUDE_DIRS}
+    PATH_SUFFIXES
+      ffmpeg
+  )
+
+  # CMake's default is to search first for shared libraries and then for static libraries.
+  # Todo later: add option to prefer static libs over dynamic:
+  find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a
+      HINTS
+      ${PC_${_component}_LIBDIR}
+      ${PC_${_component}_LIBRARY_DIRS}
+  )
+
+  set(${_component}_DEFINITIONS  ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.")
+  set(${_component}_VERSION      ${PC_${_component}_VERSION}      CACHE STRING "The ${_component} version number.")
+
+  set_component_found(${_component})
+
+  mark_as_advanced(
+    ${_component}_INCLUDE_DIRS
+    ${_component}_LIBRARIES
+    ${_component}_DEFINITIONS
+    ${_component}_VERSION)
+
+endmacro()
+
+
+# Check for cached results. If there are skip the costly part.
+if (NOT FFMPEG_LIBRARIES)
+
+  # Check for all possible component.
+  find_component(AVCODEC    libavcodec    avcodec  libavcodec/avcodec.h)
+  find_component(AVFORMAT   libavformat   avformat libavformat/avformat.h)
+  find_component(AVDEVICE   libavdevice   avdevice libavdevice/avdevice.h)
+  #find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample
+  find_component(AVUTIL     libavutil     avutil   libavutil/avutil.h)
+  find_component(AVFILTER   libavfilter   avfilter libavfilter/avfilter.h)
+  find_component(SWSCALE    libswscale    swscale  libswscale/swscale.h)
+  find_component(POSTPROC   libpostproc   postproc libpostproc/postprocess.h)
+  find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h)
+
+  # Check if the required components were found and add their stuff to the FFMPEG_* vars.
+  foreach (_component ${FFmpeg_FIND_COMPONENTS})
+    if (${_component}_FOUND)
+      # message(STATUS "Required component ${_component} present.")
+      set(FFMPEG_LIBRARIES   ${FFMPEG_LIBRARIES}   ${${_component}_LIBRARIES})
+      set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS})
+      list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS})
+    else ()
+      # message(STATUS "Required component ${_component} missing.")
+    endif ()
+  endforeach ()
+
+  # Build the include path with duplicates removed.
+  if (FFMPEG_INCLUDE_DIRS)
+    list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS)
+  endif ()
+
+  # cache the vars.
+  set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE)
+  set(FFMPEG_LIBRARIES    ${FFMPEG_LIBRARIES}    CACHE STRING "The FFmpeg libraries." FORCE)
+  set(FFMPEG_DEFINITIONS  ${FFMPEG_DEFINITIONS}  CACHE STRING "The FFmpeg cflags." FORCE)
+
+  mark_as_advanced(FFMPEG_INCLUDE_DIRS
+                   FFMPEG_LIBRARIES
+                   FFMPEG_DEFINITIONS)
+
+endif ()
+
+# Now set the noncached _FOUND vars for the components.
+# whisper.cpp does not need SWSCALE
+foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS)
+  set_component_found(${_component})
+endforeach ()
+
+# Compile the list of required vars
+set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS)
+foreach (_component ${FFmpeg_FIND_COMPONENTS})
+  list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS)
+endforeach ()
+
+# Give a nice error message if some of the required vars are missing.
+find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS})
+
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/build-info.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/build-info.cmake
@@ -0,0 +1,60 @@
+set(BUILD_NUMBER 0)
+set(BUILD_COMMIT "unknown")
+set(BUILD_COMPILER "unknown")
+set(BUILD_TARGET "unknown")
+
+# Look for git
+find_package(Git)
+if(NOT Git_FOUND)
+    find_program(GIT_EXECUTABLE NAMES git git.exe)
+    if(GIT_EXECUTABLE)
+        set(Git_FOUND TRUE)
+        message(STATUS "Found Git: ${GIT_EXECUTABLE}")
+    else()
+        message(WARNING "Git not found. Build info will not be accurate.")
+    endif()
+endif()
+
+# Get the commit count and hash
+if(Git_FOUND)
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE HEAD
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_COMMIT ${HEAD})
+    endif()
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE COUNT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_NUMBER ${COUNT})
+    endif()
+endif()
+
+if(MSVC)
+    set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
+    set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
+else()
+    execute_process(
+        COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_COMPILER ${OUT})
+    execute_process(
+        COMMAND ${CMAKE_C_COMPILER} -dumpmachine
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_TARGET ${OUT})
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/git-vars.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/git-vars.cmake
@@ -0,0 +1,22 @@
+find_package(Git)
+
+# the commit's SHA1
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_SHA1
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the date of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DATE
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the subject of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%s
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
@@ -0,0 +1,65 @@
+set(WHISPER_VERSION      @WHISPER_INSTALL_VERSION@)
+set(WHISPER_BUILD_COMMIT @WHISPER_BUILD_COMMIT@)
+set(WHISPER_BUILD_NUMBER @WHISPER_BUILD_NUMBER@)
+set(WHISPER_SHARED_LIB   @BUILD_SHARED_LIBS@)
+
+set(GGML_BLAS       @GGML_BLAS@)
+set(GGML_CUDA       @GGML_CUDA@)
+set(GGML_METAL      @GGML_METAL@)
+set(GGML_HIPBLAS    @GGML_HIPBLAS@)
+set(GGML_ACCELERATE @GGML_ACCELERATE@)
+
+@PACKAGE_INIT@
+
+set_and_check(WHISPER_INCLUDE_DIR "@PACKAGE_WHISPER_INCLUDE_INSTALL_DIR@")
+set_and_check(WHISPER_LIB_DIR     "@PACKAGE_WHISPER_LIB_INSTALL_DIR@")
+set_and_check(WHISPER_BIN_DIR     "@PACKAGE_WHISPER_BIN_INSTALL_DIR@")
+
+# Ensure transient dependencies satisfied
+
+find_package(Threads REQUIRED)
+
+if (APPLE AND GGML_ACCELERATE)
+    find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
+endif()
+
+if (GGML_BLAS)
+    find_package(BLAS REQUIRED)
+endif()
+
+if (GGML_CUDA)
+    find_package(CUDAToolkit REQUIRED)
+endif()
+
+if (GGML_METAL)
+    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
+    find_library(METAL_FRAMEWORK Metal REQUIRED)
+    find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
+endif()
+
+if (GGML_HIPBLAS)
+    find_package(hip REQUIRED)
+    find_package(hipblas REQUIRED)
+    find_package(rocblas REQUIRED)
+endif()
+
+find_library(whisper_LIBRARY whisper
+    REQUIRED
+    HINTS ${WHISPER_LIB_DIR})
+
+set(_whisper_link_deps "Threads::Threads" "@WHISPER_EXTRA_LIBS@")
+set(_whisper_transient_defines "@WHISPER_TRANSIENT_DEFINES@")
+
+add_library(whisper UNKNOWN IMPORTED)
+
+set_target_properties(whisper
+    PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${WHISPER_INCLUDE_DIR}"
+        INTERFACE_LINK_LIBRARIES "${_whisper_link_deps}"
+        INTERFACE_COMPILE_DEFINITIONS "${_whisper_transient_defines}"
+        IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+        IMPORTED_LOCATION "${whisper_LIBRARY}"
+        INTERFACE_COMPILE_FEATURES cxx_std_11
+        POSITION_INDEPENDENT_CODE ON )
+
+check_required_components(whisper)
--- a/packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper.pc.in
+++ b/packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper.pc.in
@@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/lib
+includedir=${prefix}/include
+
+Name: whisper
+Description: Port of OpenAI's Whisper model in C/C++
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lggml  -lggml-base -lwhisper
+Cflags: -I${includedir}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/.gitignore
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/.gitignore
@@ -0,0 +1 @@
+src/ggml-metal-embed.metal
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/CMakeLists.txt
@@ -0,0 +1,343 @@
+cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
+project("ggml" C CXX)
+include(CheckIncludeFileCXX)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(GGML_STANDALONE ON)
+
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+    # configure project version
+    # TODO
+else()
+    set(GGML_STANDALONE OFF)
+endif()
+
+if (EMSCRIPTEN)
+    set(BUILD_SHARED_LIBS_DEFAULT OFF)
+
+    option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
+else()
+    if (MINGW)
+        set(BUILD_SHARED_LIBS_DEFAULT OFF)
+    else()
+        set(BUILD_SHARED_LIBS_DEFAULT ON)
+    endif()
+endif()
+
+# remove the lib prefix on win32 mingw
+if (WIN32)
+    set(CMAKE_STATIC_LIBRARY_PREFIX "")
+    set(CMAKE_SHARED_LIBRARY_PREFIX "")
+    set(CMAKE_SHARED_MODULE_PREFIX  "")
+endif()
+
+option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
+option(GGML_BACKEND_DL   "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
+
+#
+# option list
+#
+
+# TODO: mark all options as advanced when not GGML_STANDALONE
+
+if (APPLE)
+    set(GGML_METAL_DEFAULT ON)
+    set(GGML_BLAS_DEFAULT ON)
+    set(GGML_BLAS_VENDOR_DEFAULT "Apple")
+else()
+    set(GGML_METAL_DEFAULT OFF)
+    set(GGML_BLAS_DEFAULT OFF)
+    set(GGML_BLAS_VENDOR_DEFAULT "Generic")
+endif()
+
+if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
+    message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF")
+    set(GGML_NATIVE_DEFAULT OFF)
+else()
+    set(GGML_NATIVE_DEFAULT ON)
+endif()
+
+# defaults
+if (NOT GGML_LLAMAFILE_DEFAULT)
+    set(GGML_LLAMAFILE_DEFAULT OFF)
+endif()
+
+if (NOT GGML_CUDA_GRAPHS_DEFAULT)
+    set(GGML_CUDA_GRAPHS_DEFAULT OFF)
+endif()
+
+# general
+option(GGML_STATIC "ggml: static link libraries"                     OFF)
+option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
+option(GGML_LTO    "ggml: enable link time optimization"             OFF)
+option(GGML_CCACHE "ggml: use ccache if available"                   ON)
+
+# debug
+option(GGML_ALL_WARNINGS           "ggml: enable all compiler warnings"                   ON)
+option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
+option(GGML_GPROF                  "ggml: enable gprof"                                   OFF)
+
+# build
+option(GGML_FATAL_WARNINGS    "ggml: enable -Werror flag"    OFF)
+
+# sanitizers
+option(GGML_SANITIZE_THREAD    "ggml: enable thread sanitizer"    OFF)
+option(GGML_SANITIZE_ADDRESS   "ggml: enable address sanitizer"   OFF)
+option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
+
+# instruction set specific
+if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
+    set(INS_ENB OFF)
+else()
+    set(INS_ENB ON)
+endif()
+
+option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF)
+option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
+option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB})
+option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF)
+option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB})
+option(GGML_AVX512           "ggml: enable AVX512F"          OFF)
+option(GGML_AVX512_VBMI      "ggml: enable AVX512-VBMI"      OFF)
+option(GGML_AVX512_VNNI      "ggml: enable AVX512-VNNI"      OFF)
+option(GGML_AVX512_BF16      "ggml: enable AVX512-BF16"      OFF)
+if (NOT MSVC)
+    # in MSVC F16C and FMA is implied with AVX2/AVX512
+    option(GGML_FMA          "ggml: enable FMA"              ${INS_ENB})
+    option(GGML_F16C         "ggml: enable F16C"             ${INS_ENB})
+    # MSVC does not seem to support AMX
+    option(GGML_AMX_TILE     "ggml: enable AMX-TILE"         OFF)
+    option(GGML_AMX_INT8     "ggml: enable AMX-INT8"         OFF)
+    option(GGML_AMX_BF16     "ggml: enable AMX-BF16"         OFF)
+endif()
+option(GGML_LASX             "ggml: enable lasx"             ON)
+option(GGML_LSX              "ggml: enable lsx"              ON)
+option(GGML_RVV              "ggml: enable rvv"              ON)
+
+option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
+set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
+
+
+if (WIN32)
+    set(GGML_WIN_VER "0x602" CACHE STRING   "ggml: Windows version")
+endif()
+
+# ggml core
+set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
+option(GGML_CPU                             "ggml: enable CPU backend"                        ON)
+
+# 3rd party libs / backends
+option(GGML_ACCELERATE                      "ggml: enable Accelerate framework"               ON)
+option(GGML_BLAS                            "ggml: use BLAS"                                  ${GGML_BLAS_DEFAULT})
+set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
+                                            "ggml: BLAS library vendor")
+option(GGML_LLAMAFILE                       "ggml: use LLAMAFILE"                             ${GGML_LLAMAFILE_DEFAULT})
+
+option(GGML_CUDA                            "ggml: use CUDA"                                  OFF)
+option(GGML_MUSA                            "ggml: use MUSA"                                  OFF)
+option(GGML_CUDA_FORCE_MMQ                  "ggml: use mmq kernels instead of cuBLAS"         OFF)
+option(GGML_CUDA_FORCE_CUBLAS               "ggml: always use cuBLAS instead of mmq kernels"  OFF)
+option(GGML_CUDA_F16                        "ggml: use 16 bit floats for some calculations"   OFF)
+set   (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
+                                            "ggml: max. batch size for using peer access")
+option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copies"            OFF)
+option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
+option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
+option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT})
+
+option(GGML_HIP                             "ggml: use HIP"                                   OFF)
+option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF)
+option(GGML_HIP_NO_VMM                      "ggml: do not try to use HIP VMM"                 ON)
+option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF)
+option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF)
+option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF)
+option(GGML_VULKAN_DEBUG                    "ggml: enable Vulkan debug output"                OFF)
+option(GGML_VULKAN_MEMORY_DEBUG             "ggml: enable Vulkan memory debug output"         OFF)
+option(GGML_VULKAN_SHADER_DEBUG_INFO        "ggml: enable Vulkan shader debug info"           OFF)
+option(GGML_VULKAN_PERF                     "ggml: enable Vulkan perf output"                 OFF)
+option(GGML_VULKAN_VALIDATE                 "ggml: enable Vulkan validation"                  OFF)
+option(GGML_VULKAN_RUN_TESTS                "ggml: run Vulkan tests"                          OFF)
+option(GGML_KOMPUTE                         "ggml: use Kompute"                               OFF)
+option(GGML_METAL                           "ggml: use Metal"                                 ${GGML_METAL_DEFAULT})
+option(GGML_METAL_USE_BF16                  "ggml: use bfloat if available"                   OFF)
+option(GGML_METAL_NDEBUG                    "ggml: disable Metal debugging"                   OFF)
+option(GGML_METAL_SHADER_DEBUG              "ggml: compile Metal with -fno-fast-math"         OFF)
+option(GGML_METAL_EMBED_LIBRARY             "ggml: embed Metal library"                       ${GGML_METAL})
+set   (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
+                                            "ggml: metal minimum macOS version")
+set   (GGML_METAL_STD "" CACHE STRING       "ggml: metal standard version (-std flag)")
+option(GGML_OPENMP                          "ggml: use OpenMP"                                ON)
+option(GGML_RPC                             "ggml: use RPC"                                   OFF)
+option(GGML_SYCL                            "ggml: use SYCL"                                  OFF)
+option(GGML_SYCL_F16                        "ggml: use 16 bit floats for sycl calculations"   OFF)
+set   (GGML_SYCL_TARGET "INTEL" CACHE STRING
+                                            "ggml: sycl target device")
+set   (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
+                                            "ggml: sycl device architecture")
+
+option(GGML_OPENCL                          "ggml: use OpenCL"                                OFF)
+option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increases overhead)" OFF)
+option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON)
+option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
+
+# toolchain for vulkan-shaders-gen
+set   (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
+
+# extra artifacts
+option(GGML_BUILD_TESTS    "ggml: build tests"    ${GGML_STANDALONE})
+option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
+
+#
+# dependencies
+#
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED true)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+find_package(Threads REQUIRED)
+
+#
+# build the library
+#
+
+add_subdirectory(src)
+
+#
+# tests and examples
+#
+
+if (GGML_BUILD_TESTS)
+    enable_testing()
+    add_subdirectory(tests)
+endif ()
+
+if (GGML_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+endif ()
+
+#
+# install
+#
+
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+# all public headers
+set(GGML_PUBLIC_HEADERS
+    include/ggml.h
+    include/ggml-cpu.h
+    include/ggml-alloc.h
+    include/ggml-backend.h
+    include/ggml-blas.h
+    include/ggml-cann.h
+    include/ggml-cuda.h
+    include/ggml-kompute.h
+    include/ggml-opt.h
+    include/ggml-metal.h
+    include/ggml-rpc.h
+    include/ggml-sycl.h
+    include/ggml-vulkan.h
+    include/gguf.h)
+
+set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
+#if (GGML_METAL)
+#    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
+#endif()
+install(TARGETS ggml LIBRARY PUBLIC_HEADER)
+install(TARGETS ggml-base LIBRARY)
+
+if (GGML_STANDALONE)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
+        ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
+        @ONLY)
+
+    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
+        DESTINATION share/pkgconfig)
+endif()
+
+#
+# Create CMake package
+#
+
+# Generate version info based on git commit.
+
+if(NOT DEFINED GGML_BUILD_NUMBER)
+    find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
+    execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE GGML_BUILD_NUMBER
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    if(GGML_BUILD_NUMBER EQUAL 1)
+        message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
+    endif()
+
+    execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE GGML_BUILD_COMMIT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+endif()
+
+
+# Capture variables prefixed with GGML_.
+
+set(variable_set_statements
+"
+####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() #######
+####### Any changes to this file will be overwritten by the next CMake run        #######
+
+")
+
+set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS})
+
+get_cmake_property(all_variables VARIABLES)
+foreach(variable_name IN LISTS all_variables)
+    if(variable_name MATCHES "^GGML_")
+        string(REPLACE ";" "\\;"
+               variable_value "${${variable_name}}")
+
+        set(variable_set_statements
+            "${variable_set_statements}set(${variable_name} \"${variable_value}\")\n")
+    endif()
+endforeach()
+
+set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
+
+# Create the CMake package and set install location.
+
+set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
+set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files")
+set(GGML_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
+set(GGML_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
+
+configure_package_config_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml
+    PATH_VARS GGML_INCLUDE_INSTALL_DIR
+              GGML_LIB_INSTALL_DIR
+              GGML_BIN_INSTALL_DIR)
+
+write_basic_package_version_file(
+        ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
+    VERSION ${GGML_INSTALL_VERSION}
+    COMPATIBILITY SameMajorVersion)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
+              ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake
@@ -0,0 +1,54 @@
+# Add new build types
+
+# ReleaseGG - Release with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELEASEGG
+    CMAKE_C_FLAGS_RELEASEGG
+    CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
+
+# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/GitVars.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/GitVars.cmake
@@ -0,0 +1,22 @@
+find_package(Git)
+
+# the commit's SHA1
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_SHA1
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the date of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DATE
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the subject of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%s
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
@@ -0,0 +1,147 @@
+
+@GGML_VARIABLES_EXPANDED@
+
+@PACKAGE_INIT@
+
+set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
+set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
+set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
+
+find_package(Threads REQUIRED)
+
+find_library(GGML_LIBRARY ggml
+    REQUIRED
+    HINTS ${GGML_LIB_DIR}
+    NO_CMAKE_FIND_ROOT_PATH)
+
+add_library(ggml::ggml UNKNOWN IMPORTED)
+set_target_properties(ggml::ggml
+    PROPERTIES
+        IMPORTED_LOCATION "${GGML_LIBRARY}")
+
+find_library(GGML_BASE_LIBRARY ggml-base
+    REQUIRED
+    HINTS ${GGML_LIB_DIR}
+    NO_CMAKE_FIND_ROOT_PATH)
+
+add_library(ggml::ggml-base UNKNOWN IMPORTED)
+set_target_properties(ggml::ggml-base
+    PROPERTIES
+        IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
+
+if (NOT GGML_SHARED_LIB)
+    if (APPLE AND GGML_ACCELERATE)
+        find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
+    endif()
+
+    if (GGML_OPENMP)
+        find_package(OpenMP REQUIRED)
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
+    endif()
+
+    if (GGML_CPU_HBM)
+        find_library(memkind memkind REQUIRED)
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
+    endif()
+
+    if (GGML_BLAS)
+        find_package(BLAS REQUIRED)
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
+        list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS   ${BLAS_LINKER_FLAGS})
+    endif()
+
+    if (GGML_CUDA)
+        find_package(CUDAToolkit REQUIRED)
+    endif()
+
+    if (GGML_METAL)
+        find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
+        find_library(METAL_FRAMEWORK    Metal REQUIRED)
+        find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
+
+        list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES
+                    ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
+    endif()
+
+    if (GGML_VULKAN)
+        find_package(Vulkan REQUIRED)
+        list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan)
+    endif()
+
+    if (GGML_HIP)
+        find_package(hip     REQUIRED)
+        find_package(hipblas REQUIRED)
+        find_package(rocblas REQUIRED)
+        list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
+    endif()
+
+    if (GGML_SYCL)
+        find_package(DNNL)
+        if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
+            list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
+        endif()
+        if (WIN32)
+            find_package(IntelSYCL REQUIRED)
+            find_package(MKL       REQUIRED)
+            list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
+        endif()
+    endif()
+endif()
+
+set(_ggml_all_targets "")
+foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
+    string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
+    string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)
+
+    find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
+        REQUIRED
+        HINTS ${GGML_LIB_DIR}
+        NO_CMAKE_FIND_ROOT_PATH)
+
+    message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")
+
+    add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
+    set_target_properties(ggml::${_ggml_backend}
+        PROPERTIES
+            INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
+            IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+            IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
+            INTERFACE_COMPILE_FEATURES c_std_90
+            POSITION_INDEPENDENT_CODE ON)
+
+    string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
+    if(is_cpu_variant)
+        list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
+        set_target_properties(ggml::${_ggml_backend}
+           PROPERTIES
+               INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
+
+        if(GGML_CPU_INTERFACE_LINK_OPTIONS)
+            set_target_properties(ggml::${_ggml_backend}
+                PROPERTIES
+                    INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
+        endif()
+
+    else()
+        list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
+        set_target_properties(ggml::${_ggml_backend}
+            PROPERTIES
+                INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
+
+        if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
+            set_target_properties(ggml::${_ggml_backend}
+                PROPERTIES
+                    INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
+        endif()
+    endif()
+
+    list(APPEND _ggml_all_targets ggml::${_ggml_backend})
+endforeach()
+
+add_library(ggml::all INTERFACE IMPORTED)
+set_target_properties(ggml::all
+    PROPERTIES
+        INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")
+
+check_required_components(ggml)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-alloc.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-alloc.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "ggml.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
+typedef struct      ggml_backend_buffer * ggml_backend_buffer_t;
+typedef struct             ggml_backend * ggml_backend_t;
+
+// Tensor allocator
+struct ggml_tallocr {
+    ggml_backend_buffer_t buffer;
+    void * base;
+    size_t alignment;
+    size_t offset;
+};
+
+GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
+GGML_API void                ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
+
+// Graph allocator
+/*
+  Example usage:
+    ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
+
+    // optional: create a worst-case graph and reserve the buffers to avoid reallocations
+    ggml_gallocr_reserve(galloc, build_graph(max_batch));
+
+    // allocate the graph
+    struct ggml_cgraph * graph = build_graph(batch);
+    ggml_gallocr_alloc_graph(galloc, graph);
+
+    printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
+
+    // evaluate the graph
+    ggml_backend_graph_compute(backend, graph);
+*/
+
+// special tensor flags for use with the graph allocator:
+//   ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
+//   ggml_set_output(): output tensors are never freed and never overwritten
+
+typedef struct ggml_gallocr * ggml_gallocr_t;
+
+GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
+GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
+GGML_API void           ggml_gallocr_free(ggml_gallocr_t galloc);
+
+// pre-allocate buffers from a measure graph - does not allocate or modify the graph
+// call with a worst-case graph to avoid buffer reallocations
+// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
+// returns false if the buffer allocation failed
+GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
+GGML_API bool ggml_gallocr_reserve_n(
+    ggml_gallocr_t galloc,
+    struct ggml_cgraph * graph,
+    const int * node_buffer_ids,
+    const int * leaf_buffer_ids);
+
+// automatic reallocation if the topology changes when using a single buffer
+// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
+GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
+
+GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
+
+// Utils
+// Create a buffer and allocate all the tensors in a ggml_context
+GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
+GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-backend.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-backend.h
@@ -0,0 +1,354 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+
+#ifdef GGML_BACKEND_SHARED
+#    if defined(_WIN32) && !defined(__MINGW32__)
+#        ifdef GGML_BACKEND_BUILD
+#            define GGML_BACKEND_API __declspec(dllexport) extern
+#        else
+#            define GGML_BACKEND_API __declspec(dllimport) extern
+#        endif
+#    else
+#        define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
+#    endif
+#else
+#    define GGML_BACKEND_API extern
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
+    typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
+    typedef struct ggml_backend_event * ggml_backend_event_t;
+    typedef struct ggml_backend * ggml_backend_t;
+    typedef void * ggml_backend_graph_plan_t;
+    typedef struct ggml_backend_reg * ggml_backend_reg_t;
+    typedef struct ggml_backend_device * ggml_backend_dev_t;
+
+
+    //
+    // Backend buffer type
+    //
+
+    GGML_API const char *          ggml_backend_buft_name          (ggml_backend_buffer_type_t buft);
+    GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer  (ggml_backend_buffer_type_t buft, size_t size);
+    GGML_API size_t                ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
+    GGML_API size_t                ggml_backend_buft_get_max_size  (ggml_backend_buffer_type_t buft);
+    GGML_API size_t                ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
+    GGML_API bool                  ggml_backend_buft_is_host       (ggml_backend_buffer_type_t buft);
+    GGML_API ggml_backend_dev_t    ggml_backend_buft_get_device    (ggml_backend_buffer_type_t buft);
+
+    //
+    // Backend buffer
+    //
+
+    enum ggml_backend_buffer_usage {
+        GGML_BACKEND_BUFFER_USAGE_ANY = 0,
+        GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
+        GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
+    };
+
+    GGML_API const char *                   ggml_backend_buffer_name          (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_free          (ggml_backend_buffer_t buffer);
+    GGML_API void *                         ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API size_t                         ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_max_size  (ggml_backend_buffer_t buffer);
+    GGML_API size_t                         ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API void                           ggml_backend_buffer_clear         (ggml_backend_buffer_t buffer, uint8_t value);
+    GGML_API bool                           ggml_backend_buffer_is_host       (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_set_usage     (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
+    GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage     (ggml_backend_buffer_t buffer);
+    GGML_API ggml_backend_buffer_type_t     ggml_backend_buffer_get_type      (ggml_backend_buffer_t buffer);
+    GGML_API void                           ggml_backend_buffer_reset         (ggml_backend_buffer_t buffer);
+
+    // tensor copy between different backends
+    GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
+
+    //
+    // Backend (stream)
+    //
+
+    GGML_API ggml_guid_t  ggml_backend_guid(ggml_backend_t backend);
+    GGML_API const char * ggml_backend_name(ggml_backend_t backend);
+    GGML_API void         ggml_backend_free(ggml_backend_t backend);
+
+    GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
+    GGML_API ggml_backend_buffer_t      ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
+    GGML_API size_t                     ggml_backend_get_alignment(ggml_backend_t backend);
+    GGML_API size_t                     ggml_backend_get_max_size(ggml_backend_t backend);
+
+    GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+
+    // "offset" refers to the offset in tensor->data for setting/getting data
+    GGML_API void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_memset(   struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size);
+
+    GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
+
+    GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API void                      ggml_backend_graph_plan_free  (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+
+    GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+    GGML_API enum ggml_status ggml_backend_graph_compute      (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+    GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+
+    // NOTE: will be removed, use device version instead
+    GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
+    GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
+    GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
+
+    // asynchronous copy
+    // the copy is performed after all the currently queued operations in backend_src
+    // backend_dst will wait for the copy to complete before performing other operations
+    // automatic fallback to sync copy if async is not supported
+    GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
+
+    GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
+
+    //
+    // Events
+    //
+
+    GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
+    GGML_API void                 ggml_backend_event_free(ggml_backend_event_t event);
+    GGML_API void                 ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
+    GGML_API void                 ggml_backend_event_synchronize(ggml_backend_event_t event);
+    GGML_API void                 ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
+
+    //
+    // Backend device
+    //
+
+    enum ggml_backend_dev_type {
+        // CPU device using system memory
+        GGML_BACKEND_DEVICE_TYPE_CPU,
+        // GPU device using dedicated memory
+        GGML_BACKEND_DEVICE_TYPE_GPU,
+        // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
+        GGML_BACKEND_DEVICE_TYPE_ACCEL
+    };
+
+    // functionality supported by the device
+    struct ggml_backend_dev_caps {
+        // asynchronous operations
+        bool async;
+        // pinned host buffer
+        bool host_buffer;
+        // creating buffers from host ptr
+        bool buffer_from_host_ptr;
+        // event synchronization
+        bool events;
+    };
+
+    // all the device properties
+    struct ggml_backend_dev_props {
+        const char * name;
+        const char * description;
+        size_t memory_free;
+        size_t memory_total;
+        enum ggml_backend_dev_type type;
+        struct ggml_backend_dev_caps caps;
+    };
+
+    GGML_API const char *                  ggml_backend_dev_name(ggml_backend_dev_t device);
+    GGML_API const char *                  ggml_backend_dev_description(ggml_backend_dev_t device);
+    GGML_API void                          ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
+    GGML_API enum ggml_backend_dev_type    ggml_backend_dev_type(ggml_backend_dev_t device);
+    GGML_API void                          ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
+    GGML_API ggml_backend_reg_t            ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
+    GGML_API ggml_backend_t                ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
+    GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
+    GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
+    GGML_API ggml_backend_buffer_t         ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
+
+    GGML_API bool                          ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
+    GGML_API bool                          ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
+    GGML_API bool                          ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
+
+    //
+    // Backend (reg)
+    //
+
+    GGML_API const char *       ggml_backend_reg_name(ggml_backend_reg_t reg);
+    GGML_API size_t             ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
+    GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
+    GGML_API void *             ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
+
+    // Common functions that may be obtained using ggml_backend_reg_get_proc_address
+
+    // Split buffer type for tensor parallelism
+    typedef ggml_backend_buffer_type_t   (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
+    // Set the number of threads for the backend
+    typedef void                         (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
+    // Get additional buffer types provided by the device (returns a NULL-terminated array)
+    typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
+    // Set the abort callback for the backend
+    typedef void                         (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
+    // Get a list of feature flags supported by the backend (returns a NULL-terminated array)
+    struct ggml_backend_feature {
+        const char * name;
+        const char * value;
+    };
+    typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
+
+    //
+    // Backend registry
+    //
+
+    GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
+
+    // Backend (reg) enumeration
+    GGML_API size_t             ggml_backend_reg_count(void);
+    GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
+    GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
+
+    // Device enumeration
+    GGML_API size_t             ggml_backend_dev_count(void);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
+    GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
+
+    // Direct backend (stream) initialization
+    // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
+    GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
+    // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
+    GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
+    // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
+    GGML_API ggml_backend_t ggml_backend_init_best(void);
+
+    // Load a backend from a dynamic library and register it
+    GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
+    // Unload a backend if loaded dynamically and unregister it
+    GGML_API void               ggml_backend_unload(ggml_backend_reg_t reg);
+    // Load all known backends from dynamic libraries
+    GGML_API void               ggml_backend_load_all(void);
+    GGML_API void               ggml_backend_load_all_from_path(const char * dir_path);
+
+    //
+    // Backend scheduler
+    //
+
+    // The backend scheduler allows for multiple backend devices to be used together
+    // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
+    // The backends are selected based on:
+    // - the backend that supports the operation
+    // - the location of the pre-allocated tensors (e.g. the weights)
+    /*
+      Example usage:
+
+        // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
+        // preferrably to run on the same backend as the buffer
+        ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
+
+        sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
+
+        // initialize buffers from a max size graph (optional)
+        reserve_graph = build_graph(sched, max_batch_size);
+
+        // manually assign nodes to a backend (optional, should not be needed in most cases)
+        struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
+        ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
+
+        ggml_backend_sched_reserve(sched, reserve_graph);
+
+        // compute
+        graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
+        for (int i = 0; i < 10; ++i) {
+            ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
+        }
+
+        // if there are graph inputs:
+        graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
+        ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
+        ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
+        ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
+        ggml_backend_sched_graph_compute(sched, graph); // execute the graph
+
+        // as an alternative to the above it is also possible to assign the inputs to a dedicated context and
+        // allocate them statically via ggml_backend_alloc_ctx_tensors
+    }
+    */
+
+    typedef struct ggml_backend_sched * ggml_backend_sched_t;
+
+    // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
+    // when ask == true, the scheduler wants to know if the user wants to observe this node
+    // this allows the scheduler to batch nodes together in order to evaluate them in a single call
+    //
+    // when ask == false, the scheduler is passing the node tensor to the user for observation
+    // if the user returns false, the scheduler will cancel the graph compute
+    //
+    typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
+
+    // Initialize a backend scheduler, backends with low index are given priority over backends with high index
+    GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
+    GGML_API void                 ggml_backend_sched_free(ggml_backend_sched_t sched);
+
+    // Initialize backend buffers from a measure graph
+    GGML_API bool                 ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
+
+    GGML_API int                  ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
+    GGML_API ggml_backend_t       ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
+
+    // Get the number of splits of the last graph
+    GGML_API int                  ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
+    GGML_API int                  ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
+
+    GGML_API size_t               ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
+
+    GGML_API void                 ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
+    GGML_API ggml_backend_t       ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
+
+    // Allocate and compute graph on the backend scheduler
+    GGML_API bool                 ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
+    GGML_API enum ggml_status     ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+    GGML_API enum ggml_status     ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+    GGML_API void                 ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
+
+    // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
+    // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
+    // The correct way to use this API is to discard the deallocated tensors and create new ones.
+    GGML_API void                 ggml_backend_sched_reset(ggml_backend_sched_t sched);
+
+    // Set a callback to be called for each resulting node during graph compute
+    GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
+
+    //
+    // Utils
+    //
+
+    struct ggml_backend_graph_copy {
+        ggml_backend_buffer_t buffer;
+        struct ggml_context * ctx_allocated;
+        struct ggml_context * ctx_unallocated;
+        struct ggml_cgraph * graph;
+    };
+
+    // Copy a graph to a different backend
+    GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
+    GGML_API void                           ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
+
+    typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
+
+    // Compare the output of two backends
+    GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
+
+    // Tensor initialization
+    GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
+    GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
+
+    // CPU buffer types are always available
+    GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
+    GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-blas.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-blas.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
+
+GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
+
+// number of threads used for conversion to float
+// for openblas and blis, this will also set the number of threads used for blas operations
+GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
+
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cann.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cann.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "ggml-backend.h"
+#include "ggml.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Maximum number of CANN devices supported.
+ */
+#define GGML_CANN_MAX_DEVICES 16
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
+
+/**
+ * @brief Initializes the CANN backend for a specified device.
+ *
+ * This function initializes the CANN backend for the given device.
+ * It verifies the device index, allocates a context, and creates a backend
+ * instance.
+ *
+ * @param device The index of the device to initialize.
+ * @return A pointer to the initialized backend instance, or nullptr on failure.
+ */
+GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
+
+/**
+ * @brief Checks if a given backend is a CANN backend.
+ *
+ * This function verifies if the provided backend is a CANN backend by comparing
+ * its GUID with the CANN backend's GUID.
+ *
+ * @param backend The backend instance to check.
+ * @return True if the backend is a CANN backend, false otherwise.
+ */
+GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
+
+/**
+ * @brief Retrieves the CANN buffer type for a specified device.
+ *
+ * This function initializes and returns the buffer type interface associated
+ * with the given device. It ensures thread-safe access using a mutex.
+ *
+ * @param device The device index for which to retrieve the buffer type.
+ * @return A pointer to the buffer type interface for the specified device, or
+ * nullptr if the device index is out of range.
+ */
+GGML_BACKEND_API ggml_backend_buffer_type_t
+ggml_backend_cann_buffer_type(int32_t device);
+
+/**
+ * @brief Retrieves the number of CANN devices available.
+ *
+ * This function returns the number of CANN devices available based on
+ * information obtained from `ggml_cann_info()`.
+ *
+ * @return The number of CANN devices available.
+ */
+GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
+
+/**
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
+ *
+ * @return A pointer to the host buffer type interface.
+ */
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
+
+/**
+ * @brief Retrieves the description of a specific CANN device.
+ *
+ * This function sets the specified device, retrieves the SoC name,
+ * and writes it into the provided description buffer.
+ *
+ * @param device The device index to retrieve the description for.
+ * @param description Pointer to a buffer where the description will be written.
+ * @param description_size Size of the description buffer.
+ */
+GGML_BACKEND_API void ggml_backend_cann_get_device_description(
+    int32_t device, char* description, size_t description_size);
+
+/**
+ * @brief Retrieves the memory information of a specific CANN device.
+ *
+ * This function sets the specified device, retrieves the free and total
+ * memory information of the specified type (ACL_HBM_MEM), and stores them
+ * in the provided pointers.
+ *
+ * @param device The device index to retrieve memory information for.
+ * @param free Pointer to a variable where the free memory size will be stored.
+ * @param total Pointer to a variable where the total memory size will be
+ * stored.
+ */
+GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
+                                                  size_t* free,
+                                                  size_t* total);
+
+#ifdef __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpp.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpp.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#ifndef __cplusplus
+#error "This header is for C++ only"
+#endif
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+#include "gguf.h"
+#include <memory>
+
+// Smart pointers for ggml types
+
+// ggml
+
+struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
+struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
+
+typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
+typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
+
+// ggml-alloc
+
+struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
+
+typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
+
+// ggml-backend
+
+struct ggml_backend_deleter        { void operator()(ggml_backend_t backend)       { ggml_backend_free(backend); } };
+struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
+struct ggml_backend_event_deleter  { void operator()(ggml_backend_event_t event)   { ggml_backend_event_free(event); } };
+struct ggml_backend_sched_deleter  { void operator()(ggml_backend_sched_t sched)   { ggml_backend_sched_free(sched); } };
+
+typedef std::unique_ptr<ggml_backend,        ggml_backend_deleter>        ggml_backend_ptr;
+typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
+typedef std::unique_ptr<ggml_backend_event,  ggml_backend_event_deleter>  ggml_backend_event_ptr;
+typedef std::unique_ptr<ggml_backend_sched,  ggml_backend_sched_deleter>  ggml_backend_sched_ptr;
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpu.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpu.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    // the compute plan that needs to be prepared for ggml_graph_compute()
+    // since https://github.com/ggerganov/ggml/issues/287
+    struct ggml_cplan {
+        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
+        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
+
+        int n_threads;
+        struct ggml_threadpool * threadpool;
+
+        // abort ggml_graph_compute when true
+        ggml_abort_callback abort_callback;
+        void *              abort_callback_data;
+    };
+
+    // numa strategies
+    enum ggml_numa_strategy {
+        GGML_NUMA_STRATEGY_DISABLED   = 0,
+        GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
+        GGML_NUMA_STRATEGY_ISOLATE    = 2,
+        GGML_NUMA_STRATEGY_NUMACTL    = 3,
+        GGML_NUMA_STRATEGY_MIRROR     = 4,
+        GGML_NUMA_STRATEGY_COUNT
+    };
+
+    GGML_BACKEND_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
+    GGML_BACKEND_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node
+
+    GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
+    GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
+
+    GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
+    GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
+
+    GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
+    GGML_BACKEND_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
+
+    GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
+    GGML_BACKEND_API void    ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
+
+    GGML_BACKEND_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
+    GGML_BACKEND_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
+
+    GGML_BACKEND_API float   ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
+    GGML_BACKEND_API void    ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
+
+    GGML_BACKEND_API struct ggml_threadpool *      ggml_threadpool_new           (struct ggml_threadpool_params  * params);
+    GGML_BACKEND_API void                          ggml_threadpool_free          (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API int                           ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API void                          ggml_threadpool_pause         (struct ggml_threadpool * threadpool);
+    GGML_BACKEND_API void                          ggml_threadpool_resume        (struct ggml_threadpool * threadpool);
+
+    // ggml_graph_plan() has to be called before ggml_graph_compute()
+    // when plan.work_size > 0, caller must allocate memory for plan.work_data
+    GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
+                  const struct ggml_cgraph * cgraph,
+                                       int   n_threads, /* = GGML_DEFAULT_N_THREADS */
+                    struct ggml_threadpool * threadpool /* = NULL */ );
+    GGML_BACKEND_API enum ggml_status  ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+
+    // same as ggml_graph_compute() but the work data is allocated as a part of the context
+    // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
+    GGML_BACKEND_API enum ggml_status  ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
+
+    //
+    // system info
+    //
+
+    // x86
+    GGML_BACKEND_API int ggml_cpu_has_sse3       (void);
+    GGML_BACKEND_API int ggml_cpu_has_ssse3      (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx        (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx2       (void);
+    GGML_BACKEND_API int ggml_cpu_has_f16c       (void);
+    GGML_BACKEND_API int ggml_cpu_has_fma        (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512     (void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
+    GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
+    GGML_BACKEND_API int ggml_cpu_has_amx_int8   (void);
+    // ARM
+    GGML_BACKEND_API int ggml_cpu_has_neon       (void);
+    GGML_BACKEND_API int ggml_cpu_has_arm_fma    (void);
+    GGML_BACKEND_API int ggml_cpu_has_fp16_va    (void);
+    GGML_BACKEND_API int ggml_cpu_has_dotprod    (void);
+    GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
+    GGML_BACKEND_API int ggml_cpu_has_sve        (void);
+    GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes
+    // other
+    GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
+    GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
+    GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
+    GGML_BACKEND_API int ggml_cpu_has_llamafile  (void);
+
+    // Internal types and functions exposed for tests and benchmarks
+
+    typedef void (*ggml_vec_dot_t)  (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
+                                       const void * GGML_RESTRICT y, size_t by, int nrc);
+
+    struct ggml_type_traits_cpu {
+        ggml_from_float_t        from_float;
+        ggml_vec_dot_t           vec_dot;
+        enum ggml_type           vec_dot_type;
+        int64_t                  nrows; // number of rows to process simultaneously
+    };
+
+    GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
+
+    GGML_BACKEND_API void ggml_cpu_init(void);
+
+    //
+    // CPU backend
+    //
+
+    GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
+
+    GGML_BACKEND_API bool ggml_backend_is_cpu                (ggml_backend_t backend);
+    GGML_BACKEND_API void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads);
+    GGML_BACKEND_API void ggml_backend_cpu_set_threadpool    (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
+    GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
+
+    GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cuda.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cuda.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#ifdef GGML_USE_HIP
+#define GGML_CUDA_NAME "ROCm"
+#define GGML_CUBLAS_NAME "hipBLAS"
+#elif defined(GGML_USE_MUSA)
+#define GGML_CUDA_NAME "MUSA"
+#define GGML_CUBLAS_NAME "muBLAS"
+#else
+#define GGML_CUDA_NAME "CUDA"
+#define GGML_CUBLAS_NAME "cuBLAS"
+#endif
+#define GGML_CUDA_MAX_DEVICES       16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
+
+// device buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
+
+GGML_BACKEND_API int  ggml_backend_cuda_get_device_count(void);
+GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
+GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
+
+GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
+GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-kompute.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-kompute.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GGML_KOMPUTE_MAX_DEVICES 16
+
+struct ggml_vk_device {
+    int index;
+    int type; // same as VkPhysicalDeviceType
+    size_t heapSize;
+    const char * name;
+    const char * vendor;
+    int subgroupSize;
+    uint64_t bufferAlignment;
+    uint64_t maxAlloc;
+};
+
+struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
+bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
+bool ggml_vk_has_vulkan(void);
+bool ggml_vk_has_device(void);
+struct ggml_vk_device ggml_vk_current_device(void);
+
+//
+// backend API
+//
+
+// forward declaration
+typedef struct ggml_backend * ggml_backend_t;
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-metal.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-metal.h
@@ -0,0 +1,66 @@
+// Note: this description is outdated
+//
+// An interface allowing to compute ggml_cgraph with Metal
+//
+// This is a fully functional interface that extends ggml with GPU support for Apple devices.
+// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
+//
+// How it works?
+//
+// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
+// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
+// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
+//
+// You only need to make sure that all memory buffers that you used during the graph creation
+// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
+// used during the graph evaluation to determine the arguments of the compute kernels.
+//
+// Synchronization between device and host memory (for example for input and output tensors)
+// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
+//
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stddef.h>
+#include <stdbool.h>
+
+struct ggml_tensor;
+struct ggml_cgraph;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// backend API
+// user-code should use only these functions
+//
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
+
+GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
+
+GGML_DEPRECATED(
+        GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
+        "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
+
+GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
+
+// helper to check if the device supports a specific family
+// ideally, the user code should be doing these checks
+// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
+
+// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
+GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
+
+#ifdef __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opencl.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opencl.h
@@ -0,0 +1,26 @@
+#ifndef GGML_OPENCL_H
+#define GGML_OPENCL_H
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+//
+// backend API
+//
+GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
+GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif // GGML_OPENCL_H
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opt.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opt.h
@@ -0,0 +1,216 @@
+// This file contains functionality for training models using GGML.
+// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
+// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
+//
+// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <stdint.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    struct ggml_opt_dataset;
+    struct ggml_opt_context;
+    struct ggml_opt_result;
+
+    typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
+    typedef struct ggml_opt_context * ggml_opt_context_t;
+    typedef struct ggml_opt_result  * ggml_opt_result_t;
+
+    // ====== Loss ======
+
+    // built-in loss types, i.e. the built-in quantities minimized by the optimizer
+    // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
+    enum ggml_opt_loss_type {
+        GGML_OPT_LOSS_TYPE_MEAN,
+        GGML_OPT_LOSS_TYPE_SUM,
+        GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
+        GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
+    };
+
+    // ====== Dataset ======
+
+    GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
+            int64_t ne_datapoint, // number of elements per datapoint
+            int64_t ne_label,     // number of elements per label
+            int64_t ndata,        // total number of datapoints/labels
+            int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
+    GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
+
+    // get underlying tensors that store the data
+    GGML_API struct ggml_tensor * ggml_opt_dataset_data  (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
+    GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label,     ndata]
+
+    // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
+    GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
+
+    // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
+    GGML_API void ggml_opt_dataset_get_batch(
+            ggml_opt_dataset_t   dataset,
+            struct ggml_tensor * data_batch,   // shape = [ne_datapoint, ndata_batch]
+            struct ggml_tensor * labels_batch, // shape = [ne_label,     ndata_batch]
+            int64_t              ibatch);
+
+    // ====== Model / Context ======
+
+    enum ggml_opt_build_type {
+        GGML_OPT_BUILD_TYPE_FORWARD,
+        GGML_OPT_BUILD_TYPE_GRAD,
+        GGML_OPT_BUILD_TYPE_OPT,
+    };
+
+    // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
+    struct ggml_opt_optimizer_params {
+        // AdamW optimizer parameters
+        struct {
+            float alpha; // learning rate
+            float beta1;
+            float beta2;
+            float eps;   // epsilon for numerical stability
+            float wd;    // weight decay for AdamW, use 0.0f to disable
+        } adamw;
+    };
+
+    // callback to calculate optimizer parameters prior to a backward pass
+    // userdata can be used to pass arbitrary data
+    typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
+
+    // returns the default optimizer params (constant)
+    // userdata is not used
+    GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
+
+    // parameters for initializing a new optimization context
+    struct ggml_opt_params {
+        ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
+
+        struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
+
+        // the forward graph is defined by inputs and outputs
+        // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
+        struct ggml_tensor * inputs;
+        struct ggml_tensor * outputs;
+
+        enum ggml_opt_loss_type  loss_type;
+        enum ggml_opt_build_type build_type;
+
+        int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
+
+        ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
+        void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters
+    };
+
+    // get parameters for an optimization context with defaults set where possible
+    // parameters for which no sensible defaults exist are supplied as arguments to this function
+    GGML_API ggml_opt_params ggml_opt_default_params(
+            ggml_backend_sched_t      backend_sched,
+            struct ggml_context     * ctx_compute,
+            struct ggml_tensor      * inputs,
+            struct ggml_tensor      * outputs,
+            enum ggml_opt_loss_type   loss_type);
+
+    GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
+    GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
+
+    // set gradients to zero, initilize loss, and optionally reset the optimizer
+    GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
+
+    // get underlying tensors that store data
+    GGML_API struct ggml_tensor * ggml_opt_inputs(  ggml_opt_context_t opt_ctx); // forward graph input tensor
+    GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
+    GGML_API struct ggml_tensor * ggml_opt_labels(  ggml_opt_context_t opt_ctx); // labels to compare outputs against
+    GGML_API struct ggml_tensor * ggml_opt_loss(    ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
+    GGML_API struct ggml_tensor * ggml_opt_pred(    ggml_opt_context_t opt_ctx); // predictions made by outputs
+    GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
+
+    GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
+
+    // ====== Optimization Result ======
+
+    GGML_API ggml_opt_result_t ggml_opt_result_init();
+    GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
+    GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
+
+    // get data from result, uncertainties are optional and can be ignored by passing NULL
+    GGML_API void ggml_opt_result_ndata(   ggml_opt_result_t result, int64_t * ndata);                  // writes 1 value, number of datapoints
+    GGML_API void ggml_opt_result_loss(    ggml_opt_result_t result, double  * loss,     double * unc); // writes 1 value
+    GGML_API void ggml_opt_result_pred(    ggml_opt_result_t result, int32_t * pred);                   // writes ndata values
+    GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double  * accuracy, double * unc); // writes 1 value
+
+    // ====== Computation ======
+
+    // do forward pass, increment result if not NULL
+    GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
+
+    // do forward pass, increment result if not NULL, do backward pass
+    GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
+
+    // ############################################################################
+    // ## The high-level functions start here. They do not depend on any private ##
+    // ## functions or structs and can be copied to and adapted for user code.   ##
+    // ############################################################################
+
+    // ====== Intended Usage ======
+    //
+    // 1. Select the appropriate loss for your problem.
+    // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
+    //    Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
+    // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
+    //    The first context should contain the model parameters and inputs and be allocated statically in user code.
+    //    The second context should contain all other tensors and will be (re)allocated automatically.
+    //    Due to this automated allocation the data of the second context is not defined when accessed in user code.
+    //    Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
+    // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
+
+    // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
+    typedef void (*ggml_opt_epoch_callback)(
+            bool               train,       // true after training evaluation, false after validation evaluation
+            ggml_opt_context_t opt_ctx,
+            ggml_opt_dataset_t dataset,
+            ggml_opt_result_t  result,      // result associated with the dataset subsection
+            int64_t            ibatch,      // number of batches that have been evaluated so far
+            int64_t            ibatch_max,  // total number of batches in this dataset subsection
+            int64_t            t_start_us); // time at which the evaluation on the dataset subsection was started
+
+    // do training on front of dataset, do evaluation only on back of dataset
+    GGML_API void ggml_opt_epoch(
+            ggml_opt_context_t      opt_ctx,
+            ggml_opt_dataset_t      dataset,
+            ggml_opt_result_t       result_train,   // result to increment during training, ignored if NULL
+            ggml_opt_result_t       result_eval,    // result to increment during evaluation, ignored if NULL
+            int64_t                 idata_split,    // data index at which to split training and evaluation
+            ggml_opt_epoch_callback callback_train,
+            ggml_opt_epoch_callback callback_eval);
+
+    // callback that prints a progress bar on stderr
+    GGML_API void ggml_opt_epoch_callback_progress_bar(
+            bool               train,
+            ggml_opt_context_t opt_ctx,
+            ggml_opt_dataset_t dataset,
+            ggml_opt_result_t  result,
+            int64_t            ibatch,
+            int64_t            ibatch_max,
+            int64_t            t_start_us);
+
+    // fit model defined by inputs and outputs to dataset
+    GGML_API void ggml_opt_fit(
+            ggml_backend_sched_t            backend_sched,  // backend scheduler for constructing the compute graphs
+            ggml_context                  * ctx_compute,    // context with temporarily allocated tensors to calculate the outputs
+            ggml_tensor                   * inputs,         // input tensor with shape [ne_datapoint, ndata_batch]
+            ggml_tensor                   * outputs,        // output tensor, must have shape [ne_label, ndata_batch] if labels are used
+            ggml_opt_dataset_t              dataset,        // dataset with data and optionally also labels
+            enum ggml_opt_loss_type         loss_type,      // loss to minimize
+            ggml_opt_get_optimizer_params   get_opt_pars,   // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
+            int64_t                         nepoch,         // how many times the dataset should be iterated over
+            int64_t                         nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
+            float                           val_split,      // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
+            bool                            silent);        // whether or not info prints to stderr should be suppressed
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-rpc.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-rpc.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define GGML_RPC_MAX_SERVERS       16
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
+GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
+
+GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
+
+GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
+
+GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-sycl.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-sycl.h
@@ -0,0 +1,49 @@
+//
+//  MIT license
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#define GGML_SYCL_NAME "SYCL"
+#define GGML_SYCL_MAX_DEVICES 48
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
+
+GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
+
+// devide buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
+
+GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
+GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
+GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
+                                                       char *description,
+                                                       size_t description_size);
+GGML_BACKEND_API int  ggml_backend_sycl_get_device_count();
+GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
+
+// SYCL doesn't support registering host memory, keep here for reference
+// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
+// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-vulkan.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-vulkan.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define GGML_VK_NAME "Vulkan"
+#define GGML_VK_MAX_DEVICES 16
+
+GGML_BACKEND_API void ggml_vk_instance_init(void);
+
+// backend API
+GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
+
+GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
+GGML_BACKEND_API int  ggml_backend_vk_get_device_count(void);
+GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
+GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
+
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml.h
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/gguf.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/include/gguf.h
@@ -0,0 +1,202 @@
+// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
+// GGUF files have the following structure:
+//
+// 1. File magic "GGUF" (4 bytes).
+// 2. File version (uint32_t).
+// 3. Number of ggml tensors in file (int64_t).
+// 4. Number of key-value-pairs in file (int64_t).
+// 5. For each KV pair:
+//   1. The key (string).
+//   2. The value type (gguf_type).
+//   3a. If the value type is GGUF_TYPE_ARRAY:
+//     1. The type of the array (gguf_type).
+//     2. The number of elements in the array (uint64_t).
+//     3. The binary representation of each element in the array.
+//   3b. Otherwise:
+//     1. The binary representation of the value.
+// 6. For each ggml tensor:
+//   1. The tensor name (string).
+//   2. The number of dimensions of the tensor (uint32_t).
+//   3. For each dimension:
+//     1. The size of the tensor in the dimension (int64_t).
+//   4. The tensor data type (ggml_type).
+//   5. The tensor data offset in the tensor data binary blob (uint64_t).
+// 7. The tensor data binary blob (optional, aligned).
+//
+// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
+// All enums are stored as int32_t.
+// All bool values are stored as int8_t.
+// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
+//   otherwise GGUF_DEFAULT_ALIGNMENT is used.
+//
+// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
+
+#pragma once
+
+#include "ggml.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define GGUF_MAGIC   "GGUF"
+#define GGUF_VERSION 3
+
+#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
+
+#define GGUF_DEFAULT_ALIGNMENT 32
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    // types that can be stored as GGUF KV data
+    enum gguf_type {
+        GGUF_TYPE_UINT8   = 0,
+        GGUF_TYPE_INT8    = 1,
+        GGUF_TYPE_UINT16  = 2,
+        GGUF_TYPE_INT16   = 3,
+        GGUF_TYPE_UINT32  = 4,
+        GGUF_TYPE_INT32   = 5,
+        GGUF_TYPE_FLOAT32 = 6,
+        GGUF_TYPE_BOOL    = 7,
+        GGUF_TYPE_STRING  = 8,
+        GGUF_TYPE_ARRAY   = 9,
+        GGUF_TYPE_UINT64  = 10,
+        GGUF_TYPE_INT64   = 11,
+        GGUF_TYPE_FLOAT64 = 12,
+        GGUF_TYPE_COUNT,       // marks the end of the enum
+    };
+
+    struct gguf_context;
+
+    struct gguf_init_params {
+        bool no_alloc;
+
+        // if not NULL, create a ggml_context and allocate the tensor data in it
+        struct ggml_context ** ctx;
+    };
+
+    GGML_API struct gguf_context * gguf_init_empty(void);
+    GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
+    //GGML_API struct gguf_context * gguf_init_from_buffer(..);
+
+    GGML_API void gguf_free(struct gguf_context * ctx);
+
+    GGML_API const char * gguf_type_name(enum gguf_type type);
+
+    GGML_API uint32_t gguf_get_version    (const struct gguf_context * ctx);
+    GGML_API size_t   gguf_get_alignment  (const struct gguf_context * ctx);
+    GGML_API size_t   gguf_get_data_offset(const struct gguf_context * ctx);
+
+    GGML_API int64_t      gguf_get_n_kv(const struct gguf_context * ctx);
+    GGML_API int64_t      gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
+    GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
+
+    GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
+
+    // will abort if the wrong type is used for the key
+    GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
+    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
+    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
+    GGML_API size_t       gguf_get_arr_n   (const struct gguf_context * ctx, int64_t key_id);
+
+    // get raw pointer to the first element of the array with the given key_id
+    // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
+    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
+
+    // get ith C string from array with given key_id
+    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
+
+    GGML_API int64_t        gguf_get_n_tensors    (const struct gguf_context * ctx);
+    GGML_API int64_t        gguf_find_tensor      (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
+    GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
+    GGML_API const char *   gguf_get_tensor_name  (const struct gguf_context * ctx, int64_t tensor_id);
+    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int64_t tensor_id);
+    GGML_API size_t         gguf_get_tensor_size  (const struct gguf_context * ctx, int64_t tensor_id);
+
+    // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
+    GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
+
+    // overrides an existing KV pair or adds a new one, the new KV pair is always at the back
+    GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t      val);
+    GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t       val);
+    GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t     val);
+    GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t      val);
+    GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t     val);
+    GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t      val);
+    GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float        val);
+    GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t     val);
+    GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t      val);
+    GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double       val);
+    GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool         val);
+    GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
+
+    // creates a new array with n elements of the given type and copies the corresponding number of bytes from data
+    GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
+
+    // creates a new array with n strings and copies the corresponding strings from data
+    GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
+
+    // set or add KV pairs from another context
+    GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
+
+    // add tensor to GGUF context, tensor name must be unique
+    GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
+
+    // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
+    //   in such a way that the tensor data remains as one contiguous block (except for padding)
+    GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
+
+    // assumes that at least gguf_get_tensor_size bytes can be read from data
+    GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
+
+    // writing gguf files can be done in 3 ways:
+    //
+    // - write the entire gguf_context to a binary file in a single pass:
+    //
+    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
+    //
+    // - write only the meta data to a file, then re-open the file and append the tensor data:
+    //
+    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
+    //   FILE * f = fopen(fname, "ab");
+    //   fwrite(f, ...); // write tensor data
+    //   fclose(f);
+    //
+    // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
+    //
+    //   FILE * f = fopen(fname, "wb");
+    //   const size_t size_meta = gguf_get_meta_size(ctx);
+    //   fseek(f, size_meta, SEEK_SET);
+    //   fwrite(f, ...); // write tensor data
+    //   void * data = malloc(size_meta);
+    //   gguf_get_meta_data(ctx, data);
+    //   rewind(f);
+    //   fwrite(data, 1, data, f);
+    //   free(data);
+    //   fclose(f);
+    //
+
+    // write the entire context to a binary file
+    GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
+
+    // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
+    GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
+
+    // writes the meta data to pointer "data"
+    GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data);
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/CMakeLists.txt
@@ -0,0 +1,357 @@
+include(CheckCXXCompilerFlag)
+
+add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
+
+# enable libstdc++ assertions for debug builds
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
+endif()
+
+if (NOT MSVC)
+    if (GGML_SANITIZE_THREAD)
+        add_compile_options(-fsanitize=thread)
+        link_libraries     (-fsanitize=thread)
+    endif()
+
+    if (GGML_SANITIZE_ADDRESS)
+        add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
+        link_libraries     (-fsanitize=address)
+    endif()
+
+    if (GGML_SANITIZE_UNDEFINED)
+        add_compile_options(-fsanitize=undefined)
+        link_libraries     (-fsanitize=undefined)
+    endif()
+endif()
+
+function(ggml_get_flags CCID CCVER)
+    set(C_FLAGS "")
+    set(CXX_FLAGS "")
+
+    if (CCID MATCHES "Clang")
+        set(C_FLAGS   -Wunreachable-code-break -Wunreachable-code-return)
+        set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
+
+        if (
+            (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
+            (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
+        )
+            list(APPEND C_FLAGS -Wdouble-promotion)
+        endif()
+    elseif (CCID STREQUAL "GNU")
+        set(C_FLAGS   -Wdouble-promotion)
+        set(CXX_FLAGS -Wno-array-bounds)
+
+        if (CCVER VERSION_GREATER_EQUAL 8.1.0)
+            list(APPEND CXX_FLAGS -Wextra-semi)
+        endif()
+    endif()
+
+    set(GF_C_FLAGS   ${C_FLAGS}   PARENT_SCOPE)
+    set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
+endfunction()
+
+if (GGML_FATAL_WARNINGS)
+    if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        list(APPEND C_FLAGS   -Werror)
+        list(APPEND CXX_FLAGS -Werror)
+    elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+        add_compile_options(/WX)
+    endif()
+endif()
+
+if (GGML_ALL_WARNINGS)
+    if (NOT MSVC)
+        list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+        list(APPEND C_FLAGS       -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
+                                  -Werror=implicit-int -Werror=implicit-function-declaration)
+        list(APPEND CXX_FLAGS     -Wmissing-declarations -Wmissing-noreturn)
+
+        list(APPEND C_FLAGS   ${WARNING_FLAGS})
+        list(APPEND CXX_FLAGS ${WARNING_FLAGS})
+
+        ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
+
+        add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
+                            "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
+    else()
+        # todo : msvc
+        set(C_FLAGS   "")
+        set(CXX_FLAGS "")
+    endif()
+endif()
+
+if (GGML_LTO)
+    include(CheckIPOSupported)
+    check_ipo_supported(RESULT result OUTPUT output)
+    if (result)
+        set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
+    else()
+        message(WARNING "IPO is not supported: ${output}")
+    endif()
+endif()
+
+if (GGML_CCACHE)
+    find_program(GGML_CCACHE_FOUND ccache)
+    find_program(GGML_SCCACHE_FOUND sccache)
+
+    if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND)
+        if(GGML_CCACHE_FOUND)
+            set(GGML_CCACHE_VARIANT ccache)
+        else()
+            set(GGML_CCACHE_VARIANT sccache)
+        endif()
+        # TODO: should not be set globally
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
+        set(ENV{CCACHE_SLOPPINESS} time_macros)
+        message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
+    else()
+        message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF")
+    endif ()
+endif()
+
+# this version of Apple ld64 is buggy
+execute_process(
+    COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
+    ERROR_VARIABLE output
+    OUTPUT_QUIET
+)
+
+if (output MATCHES "dyld-1015\.7")
+    add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
+endif()
+
+# architecture specific
+# TODO: probably these flags need to be tweaked on some architectures
+#       feel free to update the Makefile for your architecture and send a pull request or issue
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+if (MSVC)
+    string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
+    message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
+else ()
+    set(CMAKE_GENERATOR_PLATFORM_LWR "")
+endif ()
+
+if (NOT MSVC)
+    if (GGML_STATIC)
+        add_link_options(-static)
+        if (MINGW)
+            add_link_options(-static-libgcc -static-libstdc++)
+        endif()
+    endif()
+    if (GGML_GPROF)
+        add_compile_options(-pg)
+    endif()
+endif()
+
+if (MINGW)
+    # Target Windows 8 for PrefetchVirtualMemory
+    add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
+endif()
+
+#
+# POSIX conformance
+#
+
+# clock_gettime came in POSIX.1b (1993)
+# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
+# posix_memalign came in POSIX.1-2001 / SUSv3
+# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
+
+# Somehow in OpenBSD whenever POSIX conformance is specified
+# some string functions rely on locale_t availability,
+# which was introduced in POSIX.1-2008, forcing us to go higher
+if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
+    add_compile_definitions(_XOPEN_SOURCE=700)
+else()
+    add_compile_definitions(_XOPEN_SOURCE=600)
+endif()
+
+# Data types, macros and functions related to controlling CPU affinity and
+# some memory allocation are available on Linux through GNU extensions in libc
+if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
+    add_compile_definitions(_GNU_SOURCE)
+endif()
+
+# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
+# and on macOS its availability depends on enabling Darwin extensions
+# similarly on DragonFly, enabling BSD extensions is necessary
+if (
+    CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
+    CMAKE_SYSTEM_NAME MATCHES "iOS"    OR
+    CMAKE_SYSTEM_NAME MATCHES "tvOS"   OR
+    CMAKE_SYSTEM_NAME MATCHES "DragonFly"
+)
+    add_compile_definitions(_DARWIN_C_SOURCE)
+endif()
+
+# alloca is a non-standard interface that is not visible on BSDs when
+# POSIX conformance is specified, but not all of them provide a clean way
+# to enable it in such cases
+if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
+    add_compile_definitions(__BSD_VISIBLE)
+endif()
+if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
+    add_compile_definitions(_NETBSD_SOURCE)
+endif()
+if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
+    add_compile_definitions(_BSD_SOURCE)
+endif()
+
+if (WIN32)
+    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+endif()
+
+# ggml
+
+if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
+    message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
+endif()
+
+add_library(ggml-base
+            ../include/ggml.h
+            ../include/ggml-alloc.h
+            ../include/ggml-backend.h
+            ../include/ggml-cpp.h
+            ../include/ggml-opt.h
+            ../include/gguf.h
+            ggml.c
+            ggml-alloc.c
+            ggml-backend.cpp
+            ggml-opt.cpp
+            ggml-threading.cpp
+            ggml-threading.h
+            ggml-quants.c
+            ggml-quants.h
+            gguf.cpp)
+
+target_include_directories(ggml-base PRIVATE .)
+
+add_library(ggml
+            ggml-backend-reg.cpp)
+
+target_link_libraries(ggml PUBLIC ggml-base)
+
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    target_link_libraries(ggml PRIVATE dl)
+endif()
+
+function(ggml_add_backend_library backend)
+    if (GGML_BACKEND_DL)
+        add_library(${backend} MODULE ${ARGN})
+        # write the shared library to the output directory
+        set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+        target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
+        add_dependencies(ggml ${backend})
+    else()
+        add_library(${backend} ${ARGN})
+        target_link_libraries(ggml PUBLIC ${backend})
+        install(TARGETS ${backend} LIBRARY)
+    endif()
+
+    target_link_libraries(${backend} PRIVATE ggml-base)
+    target_include_directories(${backend} PRIVATE ..)
+
+    if (${BUILD_SHARED_LIBS})
+        target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
+        target_compile_definitions(${backend} PUBLIC  GGML_BACKEND_SHARED)
+    endif()
+
+    if(NOT GGML_AVAILABLE_BACKENDS)
+        set(GGML_AVAILABLE_BACKENDS "${backend}"
+            CACHE INTERNAL "List of backends for cmake package")
+    else()
+        list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend)
+        if(has_backend EQUAL -1)
+            set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}"
+                CACHE INTERNAL "List of backends for cmake package")
+        endif()
+    endif()
+endfunction()
+
+function(ggml_add_backend backend)
+    string(TOUPPER "GGML_${backend}" backend_id)
+    if (${backend_id})
+        string(TOLOWER "ggml-${backend}" backend_target)
+        add_subdirectory(${backend_target})
+        message(STATUS "Including ${backend} backend")
+        if (NOT GGML_BACKEND_DL)
+            string(TOUPPER "GGML_USE_${backend}" backend_use)
+            target_compile_definitions(ggml PUBLIC ${backend_use})
+        endif()
+    endif()
+endfunction()
+
+function(ggml_add_cpu_backend_variant tag_name)
+    set(GGML_CPU_TAG_NAME ${tag_name})
+    # other: OPENMP LLAMAFILE CPU_HBM
+    foreach (feat NATIVE
+                  AVX AVX2 AVX_VNNI FMA F16C
+                  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
+                  AMX_TILE AMX_INT8 AMX_BF16)
+        set(GGML_${feat} OFF)
+    endforeach()
+
+    foreach (feat ${ARGN})
+        set(GGML_${feat} ON)
+    endforeach()
+
+    ggml_add_cpu_backend_variant_impl(${tag_name})
+endfunction()
+
+ggml_add_backend(CPU)
+
+if (GGML_CPU_ALL_VARIANTS)
+    if (NOT GGML_BACKEND_DL)
+        message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
+    endif()
+    ggml_add_cpu_backend_variant(sandybridge    AVX)
+    ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 FMA)
+    ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512)
+    ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
+    ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
+    if (NOT MSVC)
+        # MSVC doesn't support AMX
+        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
+    endif()
+elseif (GGML_CPU)
+    ggml_add_cpu_backend_variant_impl("")
+endif()
+
+ggml_add_backend(BLAS)
+ggml_add_backend(CANN)
+ggml_add_backend(CUDA)
+ggml_add_backend(HIP)
+ggml_add_backend(Kompute)
+ggml_add_backend(METAL)
+ggml_add_backend(MUSA)
+ggml_add_backend(RPC)
+ggml_add_backend(SYCL)
+ggml_add_backend(Vulkan)
+ggml_add_backend(OpenCL)
+
+foreach (target ggml-base ggml)
+    target_include_directories(${target} PUBLIC    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
+    target_compile_features   (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
+endforeach()
+
+target_link_libraries(ggml-base PRIVATE Threads::Threads)
+
+find_library(MATH_LIBRARY m)
+if (MATH_LIBRARY)
+    if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT})
+        target_link_libraries(ggml-base PRIVATE m)
+    endif()
+endif()
+
+if (CMAKE_SYSTEM_NAME MATCHES "Android")
+    target_link_libraries(ggml-base PRIVATE dl)
+endif()
+
+if (BUILD_SHARED_LIBS)
+    foreach (target ggml-base ggml)
+        set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+        target_compile_definitions(${target} PRIVATE GGML_BUILD)
+        target_compile_definitions(${target} PUBLIC  GGML_SHARED)
+    endforeach()
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-alloc.c
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-alloc.c
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
@@ -0,0 +1,107 @@
+if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
+        (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
+         CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$") AND
+        CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
+    message(STATUS "Using AMX")
+
+    file(GLOB   GGML_HEADERS_AMX "*.h")
+    list(APPEND GGML_HEADERS_AMX "../../include/ggml-amx.h")
+
+    file(GLOB   GGML_SOURCES_AMX "*.cpp")
+
+    add_library(ggml-amx
+                ${GGML_HEADERS_AMX}
+                ${GGML_SOURCES_AMX})
+
+    target_link_libraries(ggml-amx PRIVATE ggml-base)
+    target_include_directories(ggml-amx PRIVATE . ..)
+
+    # this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags
+    # TODO: integrate AMX backend into the CPU backend
+    if (MSVC)
+        # instruction set detection for MSVC only
+        if (GGML_NATIVE)
+            # TODO: improve, should not reference files from the parent folder
+            include(../ggml-cpu/cmake/FindSIMD.cmake)
+        endif ()
+        if (GGML_AVX512)
+            list(APPEND ARCH_FLAGS /arch:AVX512)
+            # MSVC has no compile-time flags enabling specific
+            # AVX512 extensions, neither it defines the
+            # macros corresponding to the extensions.
+            # Do it manually.
+            if (GGML_AVX512_VBMI)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
+            endif()
+            if (GGML_AVX512_VNNI)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
+            endif()
+            if (GGML_AVX512_BF16)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
+            endif()
+            if (GGML_AMX_TILE)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
+            endif()
+            if (GGML_AMX_INT8)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
+            endif()
+            if (GGML_AMX_BF16)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
+            endif()
+        elseif (GGML_AVX2)
+            list(APPEND ARCH_FLAGS /arch:AVX2)
+        elseif (GGML_AVX)
+            list(APPEND ARCH_FLAGS /arch:AVX)
+        endif()
+    else()
+        if (GGML_NATIVE)
+            list(APPEND ARCH_FLAGS -march=native)
+        endif()
+        if (GGML_F16C)
+            list(APPEND ARCH_FLAGS -mf16c)
+        endif()
+        if (GGML_FMA)
+            list(APPEND ARCH_FLAGS -mfma)
+        endif()
+        if (GGML_AVX)
+            list(APPEND ARCH_FLAGS -mavx)
+        endif()
+        if (GGML_AVX2)
+            list(APPEND ARCH_FLAGS -mavx2)
+        endif()
+        if (GGML_AVX512)
+            list(APPEND ARCH_FLAGS -mavx512f)
+            list(APPEND ARCH_FLAGS -mavx512dq)
+            list(APPEND ARCH_FLAGS -mavx512bw)
+        endif()
+        if (GGML_AVX512_VBMI)
+            list(APPEND ARCH_FLAGS -mavx512vbmi)
+        endif()
+        if (GGML_AVX512_VNNI)
+            list(APPEND ARCH_FLAGS -mavx512vnni)
+        endif()
+        if (GGML_AVX512_BF16)
+            list(APPEND ARCH_FLAGS -mavx512bf16)
+        endif()
+        if (GGML_AMX_TILE)
+            list(APPEND ARCH_FLAGS -mamx-tile)
+        endif()
+        if (GGML_AMX_INT8)
+            list(APPEND ARCH_FLAGS -mamx-int8)
+        endif()
+        if (GGML_AMX_BF16)
+            list(APPEND ARCH_FLAGS -mamx-bf16)
+        endif()
+    endif()
+
+    target_compile_options(ggml-amx PRIVATE ${ARCH_FLAGS})
+else()
+    set(GGML_AMX OFF PARENT_SCOPE)
+    message(WARNING "AMX requires x86 and gcc version > 11.0. Turning off GGML_AMX.")
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/common.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/common.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include "ggml.h"
+// hack until AMX is moved into the CPU backend
+#include "../ggml-cpu/ggml-cpu-impl.h" // <immintrin.h>
+
+#include <algorithm>
+#include <memory>
+#include <type_traits>
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+#define TILE_M 16
+#define TILE_N 16
+#define TILE_K 32
+#define VNNI_BLK 4
+
+#define AMX_BLK_SIZE 32
+
+#define TMM0 0
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+#define TMM4 4
+#define TMM5 5
+#define TMM6 6
+#define TMM7 7
+
+// parallel routines
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+inline T div_up(T x, T y) { return (x + y - 1) / y; }
+
+template <typename T>
+inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
+#if 0
+    // onednn partition pattern
+    T& n_my = n_end;
+    if (nth <= 1 || n == 0) {
+        n_start = 0;
+        n_my = n;
+    } else {
+        T n1 = div_up(n, nth);
+        T n2 = n1 - 1;
+        T T1 = n - n2 * nth;
+        n_my = ith < T1 ? n1 : n2;
+        n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2;
+    }
+    n_end += n_start;
+#else
+    // pytorch aten partition pattern
+    T n_my = div_up(n, nth);
+    n_start = ith * n_my;
+    n_end = std::min(n_start + n_my, n);
+#endif
+}
+
+template <typename func_t>
+inline void parallel_for(int nth, int n, const func_t& f) {
+#if defined(_OPENMP)
+#pragma omp parallel num_threads(nth)
+{
+    //int nth = omp_get_num_threads();
+    int ith = omp_get_thread_num();
+    int tbegin, tend;
+    balance211(n, nth, ith, tbegin, tend);
+    f(tbegin, tend);
+}
+#else
+    f(0, n);
+
+    GGML_UNUSED(nth);
+#endif
+}
+
+// quantized types that have AMX support
+inline bool qtype_has_amx_kernels(const enum ggml_type type) {
+    // TODO: fix padding for vnni format
+    return (type == GGML_TYPE_Q4_0) ||
+        (type == GGML_TYPE_Q4_1);
+        //(type == GGML_TYPE_Q8_0) ||
+        //(type == GGML_TYPE_Q4_K) ||
+        //(type == GGML_TYPE_Q5_K) ||
+        //(type == GGML_TYPE_Q6_K) ||
+        //(type == GGML_TYPE_IQ4_XS);
+}
+
+// ggml backend context
+struct ggml_backend_amx_context {
+    int n_threads = GGML_DEFAULT_N_THREADS;
+    std::unique_ptr<char[]> work_data;
+    size_t work_size = 0;
+};
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
@@ -0,0 +1,446 @@
+#include "ggml-amx.h"
+#include "ggml-amx/common.h"
+#include "ggml-amx/mmq.h"
+#include "ggml-backend-impl.h"
+#include "ggml-impl.h"
+
+#if defined(__gnu_linux__)
+#include <sys/syscall.h>
+#include <unistd.h>
+#endif
+
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+
+#if defined(__AMX_INT8__)
+
+// AMX buffer interface
+static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+    free(buffer->context);
+}
+
+static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
+    return (void *)(buffer->context);
+}
+
+static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
+    memset((char *)tensor->data + offset, value, size);
+
+    GGML_UNUSED(buffer);
+}
+
+static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
+    if (qtype_has_amx_kernels(tensor->type)) {
+        ggml_backend_amx_convert_weight(tensor, data, offset, size);
+    } else {
+        memcpy((char *)tensor->data + offset, data, size);
+    }
+
+    GGML_UNUSED(buffer);
+}
+
+static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
+    GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));
+    memcpy(data, (const char *)tensor->data + offset, size);
+
+    GGML_UNUSED(buffer);
+}
+
+static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
+    if (ggml_backend_buffer_is_host(src->buffer)) {
+        if (qtype_has_amx_kernels(src->type)) {
+            ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_backend_amx_get_alloc_size(dst));
+        } else {
+            memcpy(dst->data, src->data, ggml_nbytes(src));
+        }
+        return true;
+    }
+    return false;
+
+    GGML_UNUSED(buffer);
+}
+
+static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
+    memset(buffer->context, value, buffer->size);
+}
+
+static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
+    /* .free_buffer     = */ ggml_backend_amx_buffer_free_buffer,
+    /* .get_base        = */ ggml_backend_amx_buffer_get_base,
+    /* .init_tensor     = */ NULL, // no initialization required
+    /* .memset_tensor   = */ ggml_backend_amx_buffer_memset_tensor,
+    /* .set_tensor      = */ ggml_backend_amx_buffer_set_tensor,
+    /* .get_tensor      = */ ggml_backend_amx_buffer_get_tensor,
+    /* .cpy_tensor      = */ ggml_backend_amx_buffer_cpy_tensor,
+    /* .clear           = */ ggml_backend_amx_buffer_clear,
+    /* .reset           = */ NULL,
+};
+
+static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
+    return "AMX";
+
+    GGML_UNUSED(buft);
+}
+
+static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
+    void * data = aligned_alloc(TENSOR_ALIGNMENT, size);
+    if (data == NULL) {
+        fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
+        return NULL;
+    }
+
+    return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);
+}
+
+static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
+    return TENSOR_ALIGNMENT;
+
+    GGML_UNUSED(buft);
+}
+
+static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) {
+    return ggml_backend_amx_get_alloc_size(tensor);
+
+    GGML_UNUSED(buft);
+}
+
+static bool ggml_backend_amx_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
+    return false;
+
+    GGML_UNUSED(buft);
+}
+
+ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
+    static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
+        /* .iface = */ {
+            /* .get_name         = */ ggml_backend_amx_buffer_type_get_name,
+            /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer,
+            /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment,
+            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
+            /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size,
+            /* .is_host          = */ ggml_backend_amx_buffer_type_is_host,
+        },
+        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0),
+        /* .context = */ NULL,
+    };
+
+    return &ggml_backend_buffer_type_amx;
+}
+
+// backend interface
+
+static const char * ggml_backend_amx_name(ggml_backend_t backend) {
+    return "AMX";
+
+    GGML_UNUSED(backend);
+}
+
+static void ggml_backend_amx_free(ggml_backend_t backend) {
+    ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context;
+    delete ctx;
+    delete backend;
+}
+
+static enum ggml_status ggml_backend_amx_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+    ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context;
+
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        struct ggml_tensor * node = cgraph->nodes[i];
+
+        switch (node->op) {
+        case GGML_OP_MUL_MAT:
+            ggml_backend_amx_mul_mat(ctx, node);
+            break;
+
+        case GGML_OP_NONE:
+        case GGML_OP_RESHAPE:
+        case GGML_OP_VIEW:
+        case GGML_OP_PERMUTE:
+        case GGML_OP_TRANSPOSE:
+            break;
+
+        default:
+            fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));
+            GGML_ASSERT(false);
+        }
+    }
+
+    return GGML_STATUS_SUCCESS;
+
+    GGML_UNUSED(backend);
+}
+
+static struct ggml_backend_i ggml_backend_amx_i = {
+    /* .get_name                = */ ggml_backend_amx_name,
+    /* .free                    = */ ggml_backend_amx_free,
+    /* .set_tensor_async        = */ NULL,
+    /* .get_tensor_async        = */ NULL,
+    /* .cpy_tensor_async        = */ NULL,
+    /* .synchronize             = */ NULL,
+    /* .graph_plan_create       = */ NULL,
+    /* .graph_plan_free         = */ NULL,
+    /* .graph_plan_update       = */ NULL,
+    /* .graph_plan_compute      = */ NULL,
+    /* .graph_compute           = */ ggml_backend_amx_graph_compute,
+    /* .event_record            = */ NULL,
+    /* .event_wait              = */ NULL,
+};
+
+static ggml_guid_t ggml_backend_amx_guid() {
+    static ggml_guid guid = { 0x13, 0xb8, 0xa4, 0xc4, 0xba, 0xfe, 0x51, 0x67, 0x87, 0x44, 0x55, 0x15, 0xb2, 0x35, 0x62, 0x3e };
+    return &guid;
+}
+
+#define ARCH_GET_XCOMP_PERM     0x1022
+#define ARCH_REQ_XCOMP_PERM     0x1023
+#define XFEATURE_XTILECFG       17
+#define XFEATURE_XTILEDATA      18
+
+static bool ggml_amx_init() {
+#if defined(__gnu_linux__)
+    if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
+        fprintf(stderr, "AMX is not ready to be used!\n");
+        return false;
+    }
+    return true;
+#elif defined(_WIN32)
+    return true;
+#endif
+}
+
+ggml_backend_t ggml_backend_amx_init() {
+
+    // invoke a Linux system call to request access to AMX features
+    ggml_amx_init();
+
+    // backend context
+    ggml_backend_amx_context * ctx = new ggml_backend_amx_context;
+
+    // ggml amx backend
+    ggml_backend_t backend = new ggml_backend {
+        /* .guid      = */ ggml_backend_amx_guid(),
+        /* .interface = */ ggml_backend_amx_i,
+        /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0),
+        /* .context   = */ ctx,
+    };
+
+    return backend;
+}
+
+bool ggml_backend_is_amx(ggml_backend_t backend) {
+    return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_amx_guid());
+}
+
+void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
+    GGML_ASSERT(ggml_backend_is_amx(backend_amx));
+
+    ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend_amx->context;
+    ctx->n_threads = n_threads;
+}
+
+// device interface
+
+static const char * ggml_backend_amx_device_get_name(ggml_backend_dev_t dev) {
+    return "AMX";
+
+    GGML_UNUSED(dev);
+}
+
+static const char * ggml_backend_amx_device_get_description(ggml_backend_dev_t dev) {
+    return "Intel Advanced Matrix Extensions";
+
+    GGML_UNUSED(dev);
+}
+
+static void ggml_backend_amx_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+    // TODO
+    *free = 0;
+    *total = 0;
+
+    GGML_UNUSED(dev);
+}
+
+static enum ggml_backend_dev_type ggml_backend_amx_device_get_type(ggml_backend_dev_t dev) {
+    return GGML_BACKEND_DEVICE_TYPE_ACCEL;
+
+    GGML_UNUSED(dev);
+}
+
+static void ggml_backend_amx_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+    props->name        = ggml_backend_amx_device_get_name(dev);
+    props->description = ggml_backend_amx_device_get_description(dev);
+    props->type        = ggml_backend_amx_device_get_type(dev);
+    ggml_backend_amx_device_get_memory(dev, &props->memory_free, &props->memory_total);
+
+    // `buffer_from_host_ptr` is intended to be used in mmap, when memory layout unchanged
+    props->caps = {
+        /* .async                 = */ false,
+        /* .host_buffer           = */ false,
+        /* .buffer_from_host_ptr  = */ false,
+        /* .events                = */ false,
+    };
+}
+
+static ggml_backend_t ggml_backend_amx_device_init(ggml_backend_dev_t dev, const char * params) {
+    return ggml_backend_amx_init();
+
+    GGML_UNUSED(dev);
+    GGML_UNUSED(params);
+}
+
+static ggml_backend_buffer_type_t ggml_backend_amx_device_get_buffer_type(ggml_backend_dev_t dev) {
+    return ggml_backend_amx_buffer_type();
+
+    GGML_UNUSED(dev);
+}
+
+static bool ggml_backend_amx_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
+
+    // handle only 2d gemm for now
+    auto is_contiguous_2d = [](const struct ggml_tensor * t) {
+        return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;
+    };
+
+    switch (op->op) {
+        case GGML_OP_NONE:
+        case GGML_OP_RESHAPE:
+        case GGML_OP_VIEW:
+        case GGML_OP_PERMUTE:
+        case GGML_OP_TRANSPOSE:
+            return true;
+
+        case GGML_OP_MUL_MAT: {
+            const struct ggml_tensor * src0 = op->src[0];
+            const struct ggml_tensor * src1 = op->src[1];
+
+            const enum ggml_type type = src0->type;
+            const int64_t ne0 = op->ne[0];
+
+            // amx kernels enables for Q4_0, Q4_1, Q8_0, F16
+            // Q4_K, Q5_K, Q6_K, IQ4_XS enabled for QK_K = 256
+            bool has_amx_kernels = qtype_has_amx_kernels(type) || (type == GGML_TYPE_F16);
+
+            bool can_use_amx =
+                is_contiguous_2d(src0) &&       // src0 must be contiguous
+                is_contiguous_2d(src1) &&       // src1 must be contiguous
+                src1->type == GGML_TYPE_F32 &&  // src1 must be float32
+                has_amx_kernels &&              // with amx kernel impls
+                ne0 % (TILE_N * 2) == 0;        // out_features is 32x
+
+            return can_use_amx;
+        }
+        default:
+            return false;
+    }
+
+    GGML_UNUSED(dev);
+}
+
+static bool ggml_backend_amx_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
+    return buft->iface.get_name == ggml_backend_amx_buffer_type_get_name;
+
+    GGML_UNUSED(dev);
+}
+
+static const struct ggml_backend_device_i ggml_backend_amx_device_i = {
+    /* .get_name             = */ ggml_backend_amx_device_get_name,
+    /* .get_description      = */ ggml_backend_amx_device_get_description,
+    /* .get_memory           = */ ggml_backend_amx_device_get_memory,
+    /* .get_type             = */ ggml_backend_amx_device_get_type,
+    /* .get_props            = */ ggml_backend_amx_device_get_props,
+    /* .init_backend         = */ ggml_backend_amx_device_init,
+    /* .get_buffer_type      = */ ggml_backend_amx_device_get_buffer_type,
+    /* .get_host_buffer_type = */ NULL,
+    /* .buffer_from_host_ptr = */ NULL,
+    /* .supports_op          = */ ggml_backend_amx_device_supports_op,
+    /* .supports_buft        = */ ggml_backend_amx_device_supports_buft,
+    /* .offload_op           = */ NULL,
+    /* .event_new            = */ NULL,
+    /* .event_free           = */ NULL,
+    /* .event_synchronize    = */ NULL,
+};
+
+// backend reg interface
+
+static const char * ggml_backend_amx_reg_get_name(ggml_backend_reg_t reg) {
+    return "AMX";
+
+    GGML_UNUSED(reg);
+}
+
+static size_t ggml_backend_amx_reg_get_device_count(ggml_backend_reg_t reg) {
+    return 1;
+
+    GGML_UNUSED(reg);
+}
+
+static ggml_backend_dev_t ggml_backend_amx_reg_get_device(ggml_backend_reg_t reg, size_t index) {
+    GGML_ASSERT(index == 0);
+
+    static ggml_backend_device ggml_backend_amx_device = {
+        /* .iface   = */ ggml_backend_amx_device_i,
+        /* .reg     = */ reg,
+        /* .context = */ nullptr,
+    };
+
+    return &ggml_backend_amx_device;
+
+    GGML_UNUSED(reg);
+    GGML_UNUSED(index);
+}
+
+static void * ggml_backend_amx_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+    if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
+        return (void *)ggml_backend_amx_set_n_threads;
+    }
+    return NULL;
+
+    GGML_UNUSED(reg);
+    GGML_UNUSED(name);
+}
+
+static const struct ggml_backend_reg_i ggml_backend_amx_reg_i = {
+    /* .get_name         = */ ggml_backend_amx_reg_get_name,
+    /* .get_device_count = */ ggml_backend_amx_reg_get_device_count,
+    /* .get_device       = */ ggml_backend_amx_reg_get_device,
+    /* .get_proc_address = */ ggml_backend_amx_get_proc_address,
+};
+
+ggml_backend_reg_t ggml_backend_amx_reg(void) {
+    static struct ggml_backend_reg ggml_backend_amx_reg = {
+        /* .iface   = */ ggml_backend_amx_reg_i,
+        /* .context = */ NULL,
+    };
+
+    return &ggml_backend_amx_reg;
+}
+
+#else // if defined(__AMX_INT8__)
+
+ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) {
+    return nullptr;
+}
+
+bool ggml_backend_is_amx(ggml_backend_t backend) {
+    GGML_UNUSED(backend);
+    return false;
+}
+
+ggml_backend_t ggml_backend_amx_init(void) {
+    fprintf(stderr, "GGML is not compiled with AMX support!\n");
+    return nullptr;
+}
+
+void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
+    fprintf(stderr, "GGML is not compiled with AMX support!\n");
+
+    GGML_UNUSED(backend_amx);
+    GGML_UNUSED(n_threads);
+}
+
+ggml_backend_reg_t ggml_backend_amx_reg(void) {
+    return nullptr;
+}
+
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "common.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
+
+void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+
+void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
+
+#ifdef __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
@@ -0,0 +1,255 @@
+#pragma once
+
+// ggml-backend internal header
+
+#include "ggml-backend.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+    #define GGML_BACKEND_API_VERSION 1
+
+    //
+    // Backend buffer type
+    //
+
+    struct ggml_backend_buffer_type_i {
+        const char *          (*get_name)      (ggml_backend_buffer_type_t buft);
+        // allocate a buffer of this type
+        ggml_backend_buffer_t (*alloc_buffer)  (ggml_backend_buffer_type_t buft, size_t size);
+        // tensor alignment
+        size_t                (*get_alignment) (ggml_backend_buffer_type_t buft);
+        // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
+        size_t                (*get_max_size)  (ggml_backend_buffer_type_t buft);
+        // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
+        size_t                (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
+        // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
+        bool                  (*is_host)       (ggml_backend_buffer_type_t buft);
+    };
+
+    struct ggml_backend_buffer_type {
+        struct ggml_backend_buffer_type_i  iface;
+        ggml_backend_dev_t device;
+        void * context;
+    };
+
+    //
+    // Backend buffer
+    //
+
+    struct ggml_backend_buffer_i {
+        // (optional) free the buffer
+        void         (*free_buffer)  (ggml_backend_buffer_t buffer);
+        // base address of the buffer
+        void *       (*get_base)     (ggml_backend_buffer_t buffer);
+        // (optional) initialize a tensor in the buffer (eg. add tensor extras)
+        void         (*init_tensor)  (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+        // tensor data access
+        void         (*memset_tensor)(ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size);
+        void         (*set_tensor)   (ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+        void         (*get_tensor)   (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+        // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
+        bool         (*cpy_tensor)   (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
+        // clear the entire buffer
+        void         (*clear)        (ggml_backend_buffer_t buffer, uint8_t value);
+        // (optional) reset any internal state due to tensor initialization, such as tensor extras
+        void         (*reset)        (ggml_backend_buffer_t buffer);
+    };
+
+    struct ggml_backend_buffer {
+        struct ggml_backend_buffer_i  iface;
+        ggml_backend_buffer_type_t    buft;
+        void * context;
+        size_t size;
+        enum ggml_backend_buffer_usage usage;
+    };
+
+    GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
+                   ggml_backend_buffer_type_t buft,
+            struct ggml_backend_buffer_i      iface,
+                   void *                     context,
+                   size_t                     size);
+
+    // do not use directly, use ggml_backend_tensor_copy instead
+    GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
+
+    // multi-buffer
+    // buffer that contains a collection of buffers
+    GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
+    GGML_API bool                  ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
+    GGML_API void                  ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
+
+    //
+    // Backend (stream)
+    //
+
+    struct ggml_backend_i {
+        const char * (*get_name)(ggml_backend_t backend);
+
+        void (*free)(ggml_backend_t backend);
+
+        // (optional) asynchronous tensor data access
+        void (*set_tensor_async)(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+        void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+        bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
+
+        // (optional) complete all pending operations (required if the backend supports async operations)
+        void (*synchronize)(ggml_backend_t backend);
+
+        // (optional) graph plans (not used currently)
+        // compute graph with a plan
+        ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
+        void                      (*graph_plan_free)   (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+        // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
+        void                      (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
+        // compute the graph with the plan
+        enum ggml_status          (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+
+        // compute graph (always async if supported by the backend)
+        enum ggml_status          (*graph_compute)     (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+
+        // (optional) event synchronization
+        // record an event on this stream
+        void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
+        // wait for an event on on a different stream
+        void (*event_wait)  (ggml_backend_t backend, ggml_backend_event_t event);
+    };
+
+    struct ggml_backend {
+        ggml_guid_t guid;
+        struct ggml_backend_i iface;
+        ggml_backend_dev_t device;
+        void * context;
+    };
+
+    struct ggml_backend_event {
+        struct ggml_backend_device * device;
+        void * context;
+    };
+
+    //
+    // Backend device
+    //
+
+    // Note: if additional properties are needed, we should add a struct with all of them
+    //       the current functions to obtain the properties can remain, since they are more convenient for often used properties
+    struct ggml_backend_device_i {
+        // device name: short identifier for this device, such as "CPU" or "CUDA0"
+        const char * (*get_name)(ggml_backend_dev_t dev);
+
+        // device description: short informative description of the device, could be the model name
+        const char * (*get_description)(ggml_backend_dev_t dev);
+
+        // device memory in bytes
+        void         (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
+
+        // device type
+        enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
+
+        // device properties
+        void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
+
+        // backend (stream) initialization
+        ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
+
+        // preferred buffer type
+        ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
+
+        // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
+        ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
+
+        // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
+        ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
+
+        // check if the backend can compute an operation
+        bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
+
+        // check if the backend can use tensors allocated in a buffer type
+        bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
+
+        // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
+        // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
+        bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
+
+        // (optional) event synchronization
+        ggml_backend_event_t (*event_new)         (ggml_backend_dev_t dev);
+        void                 (*event_free)        (ggml_backend_dev_t dev, ggml_backend_event_t event);
+        void                 (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
+    };
+
+    struct ggml_backend_device {
+        struct ggml_backend_device_i iface;
+        ggml_backend_reg_t reg;
+        void * context;
+    };
+
+    //
+    // Backend (reg)
+    //
+
+    struct ggml_backend_reg_i {
+        const char * (*get_name)(ggml_backend_reg_t reg);
+
+        // enumerate available devices
+        size_t             (*get_device_count)(ggml_backend_reg_t reg);
+        ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
+
+        // (optional) get a pointer to a function in the backend
+        // backends can add custom functions that are not part of the standard ggml-backend interface
+        void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
+    };
+
+    struct ggml_backend_reg {
+        int api_version; // initialize to GGML_BACKEND_API_VERSION
+        struct ggml_backend_reg_i iface;
+        void * context;
+    };
+
+    // Internal backend registry API
+    GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
+
+    // Add backend dynamic loading support to the backend
+
+    // Initialize the backend
+    typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
+    // Optional: obtain a score for the backend based on the system configuration
+    // Higher scores are preferred, 0 means the backend is not supported in the current system
+    typedef int                (*ggml_backend_score_t)(void);
+
+#ifdef GGML_BACKEND_DL
+#    ifdef __cplusplus
+#        define GGML_BACKEND_DL_IMPL(reg_fn)                             \
+            extern "C" {                                                 \
+            GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
+            }                                                            \
+            ggml_backend_reg_t ggml_backend_init(void) {                 \
+                return reg_fn();                                         \
+            }
+#        define GGML_BACKEND_DL_SCORE_IMPL(score_fn)       \
+            extern "C" {                                   \
+            GGML_BACKEND_API int ggml_backend_score(void); \
+            }                                              \
+            int ggml_backend_score(void) {                 \
+                return score_fn();                         \
+            }
+#    else
+#        define GGML_BACKEND_DL_IMPL(reg_fn)                              \
+            GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void);  \
+            ggml_backend_reg_t                  ggml_backend_init(void) { \
+                return reg_fn();                                          \
+            }
+#        define GGML_BACKEND_DL_SCORE_IMPL(score_fn)        \
+            GGML_BACKEND_API int ggml_backend_score(void);  \
+            int                  ggml_backend_score(void) { \
+                return score_fn();                          \
+            }
+#    endif
+#else
+#    define GGML_BACKEND_DL_IMPL(reg_fn)
+#    define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
@@ -0,0 +1,582 @@
+#include "ggml-backend-impl.h"
+#include "ggml-backend.h"
+#include "ggml-impl.h"
+#include <algorithm>
+#include <codecvt>
+#include <cstring>
+#include <filesystem>
+#include <locale>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#ifdef _WIN32
+#    define WIN32_LEAN_AND_MEAN
+#    ifndef NOMINMAX
+#        define NOMINMAX
+#    endif
+#    include <windows.h>
+#elif defined(__APPLE__)
+#    include <mach-o/dyld.h>
+#    include <dlfcn.h>
+#else
+#    include <dlfcn.h>
+#    include <unistd.h>
+#endif
+
+// Backend registry
+#ifdef GGML_USE_CPU
+#include "ggml-cpu.h"
+#endif
+
+#ifdef GGML_USE_CUDA
+#include "ggml-cuda.h"
+#endif
+
+#ifdef GGML_USE_METAL
+#include "ggml-metal.h"
+#endif
+
+#ifdef GGML_USE_SYCL
+#include "ggml-sycl.h"
+#endif
+
+#ifdef GGML_USE_VULKAN
+#include "ggml-vulkan.h"
+#endif
+
+#ifdef GGML_USE_OPENCL
+#include "ggml-opencl.h"
+#endif
+
+#ifdef GGML_USE_BLAS
+#include "ggml-blas.h"
+#endif
+
+#ifdef GGML_USE_RPC
+#include "ggml-rpc.h"
+#endif
+
+#ifdef GGML_USE_CANN
+#include "ggml-cann.h"
+#endif
+
+#ifdef GGML_USE_KOMPUTE
+#include "ggml-kompute.h"
+#endif
+
+// disable C++17 deprecation warning for std::codecvt_utf8
+#if defined(__clang__)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
+static std::wstring utf8_to_utf16(const std::string & str) {
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+    return converter.from_bytes(str);
+}
+
+static std::string utf16_to_utf8(const std::wstring & str) {
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+    return converter.to_bytes(str);
+}
+
+#if defined(__clang__)
+#    pragma clang diagnostic pop
+#endif
+
+#ifdef _WIN32
+
+using dl_handle = std::remove_pointer_t<HMODULE>;
+
+struct dl_handle_deleter {
+    void operator()(HMODULE handle) {
+        FreeLibrary(handle);
+    }
+};
+
+static dl_handle * dl_load_library(const std::wstring & path) {
+    // suppress error dialogs for missing DLLs
+    DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
+    SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
+
+    HMODULE handle = LoadLibraryW(path.c_str());
+
+    SetErrorMode(old_mode);
+
+    return handle;
+}
+
+static void * dl_get_sym(dl_handle * handle, const char * name) {
+    DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
+    SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
+
+    void * p = (void *) GetProcAddress(handle, name);
+
+    SetErrorMode(old_mode);
+
+    return p;
+}
+
+#else
+
+using dl_handle = void;
+
+struct dl_handle_deleter {
+    void operator()(void * handle) {
+        dlclose(handle);
+    }
+};
+
+static void * dl_load_library(const std::wstring & path) {
+    dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
+
+    return handle;
+}
+
+static void * dl_get_sym(dl_handle * handle, const char * name) {
+    return dlsym(handle, name);
+}
+
+#endif
+
+using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
+
+struct ggml_backend_reg_entry {
+    ggml_backend_reg_t reg;
+    dl_handle_ptr handle;
+};
+
+struct ggml_backend_registry {
+    std::vector<ggml_backend_reg_entry> backends;
+    std::vector<ggml_backend_dev_t> devices;
+
+    ggml_backend_registry() {
+#ifdef GGML_USE_CUDA
+        register_backend(ggml_backend_cuda_reg());
+#endif
+#ifdef GGML_USE_METAL
+        register_backend(ggml_backend_metal_reg());
+#endif
+#ifdef GGML_USE_SYCL
+        register_backend(ggml_backend_sycl_reg());
+#endif
+#ifdef GGML_USE_VULKAN
+        register_backend(ggml_backend_vk_reg());
+#endif
+#ifdef GGML_USE_OPENCL
+        register_backend(ggml_backend_opencl_reg());
+#endif
+#ifdef GGML_USE_CANN
+        register_backend(ggml_backend_cann_reg());
+#endif
+#ifdef GGML_USE_BLAS
+        register_backend(ggml_backend_blas_reg());
+#endif
+#ifdef GGML_USE_RPC
+        register_backend(ggml_backend_rpc_reg());
+#endif
+#ifdef GGML_USE_KOMPUTE
+        register_backend(ggml_backend_kompute_reg());
+#endif
+#ifdef GGML_USE_CPU
+        register_backend(ggml_backend_cpu_reg());
+#endif
+    }
+
+    ~ggml_backend_registry() {
+        // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
+        // since backend threads may still be running and accessing resources from the dynamic library
+        for (auto & entry : backends) {
+            if (entry.handle) {
+                entry.handle.release(); // NOLINT
+            }
+        }
+    }
+
+    void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
+        if (!reg) {
+            return;
+        }
+
+#ifndef NDEBUG
+        GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
+            __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
+#endif
+        backends.push_back({ reg, std::move(handle) });
+        for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
+            register_device(ggml_backend_reg_dev_get(reg, i));
+        }
+    }
+
+    void register_device(ggml_backend_dev_t device) {
+#ifndef NDEBUG
+        GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
+#endif
+        devices.push_back(device);
+    }
+
+    ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
+        dl_handle_ptr handle { dl_load_library(path) };
+        if (!handle) {
+            if (!silent) {
+                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
+            }
+            return nullptr;
+        }
+
+        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
+        if (score_fn && score_fn() == 0) {
+            if (!silent) {
+                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
+            }
+            return nullptr;
+        }
+
+        auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
+        if (!backend_init_fn) {
+            if (!silent) {
+                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
+            }
+            return nullptr;
+        }
+
+        ggml_backend_reg_t reg = backend_init_fn();
+        if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
+            if (!silent) {
+                if (!reg) {
+                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
+                } else {
+                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
+                        __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
+                }
+            }
+            return nullptr;
+        }
+
+        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
+
+        register_backend(reg, std::move(handle));
+
+        return reg;
+    }
+
+    void unload_backend(ggml_backend_reg_t reg, bool silent) {
+        auto it = std::find_if(backends.begin(), backends.end(),
+                               [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
+
+        if (it == backends.end()) {
+            if (!silent) {
+                GGML_LOG_ERROR("%s: backend not found\n", __func__);
+            }
+            return;
+        }
+
+        if (!silent) {
+            GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
+        }
+
+        // remove devices
+        devices.erase(
+            std::remove_if(devices.begin(), devices.end(),
+                            [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
+            devices.end());
+
+        // remove backend
+        backends.erase(it);
+    }
+};
+
+static ggml_backend_registry & get_reg() {
+    static ggml_backend_registry reg;
+    return reg;
+}
+
+// Internal API
+void ggml_backend_register(ggml_backend_reg_t reg) {
+    get_reg().register_backend(reg);
+}
+
+void ggml_backend_device_register(ggml_backend_dev_t device) {
+    get_reg().register_device(device);
+}
+
+// Backend (reg) enumeration
+static bool striequals(const char * a, const char * b) {
+    for (; *a && *b; a++, b++) {
+        if (std::tolower(*a) != std::tolower(*b)) {
+            return false;
+        }
+    }
+    return *a == *b;
+}
+
+size_t ggml_backend_reg_count() {
+    return get_reg().backends.size();
+}
+
+ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
+    GGML_ASSERT(index < ggml_backend_reg_count());
+    return get_reg().backends[index].reg;
+}
+
+ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
+    for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
+        ggml_backend_reg_t reg = ggml_backend_reg_get(i);
+        if (striequals(ggml_backend_reg_name(reg), name)) {
+            return reg;
+        }
+    }
+    return nullptr;
+}
+
+// Device enumeration
+size_t ggml_backend_dev_count() {
+    return get_reg().devices.size();
+}
+
+ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
+    GGML_ASSERT(index < ggml_backend_dev_count());
+    return get_reg().devices[index];
+}
+
+ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+        if (striequals(ggml_backend_dev_name(dev), name)) {
+            return dev;
+        }
+    }
+    return nullptr;
+}
+
+ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+        if (ggml_backend_dev_type(dev) == type) {
+            return dev;
+        }
+    }
+    return nullptr;
+}
+
+// Convenience functions
+ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
+    ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
+    if (!dev) {
+        return nullptr;
+    }
+    return ggml_backend_dev_init(dev, params);
+}
+
+ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
+    ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
+    if (!dev) {
+        return nullptr;
+    }
+    return ggml_backend_dev_init(dev, params);
+}
+
+ggml_backend_t ggml_backend_init_best(void) {
+    ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
+    if (!dev) {
+        dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
+    }
+    if (!dev) {
+        return nullptr;
+    }
+    return ggml_backend_dev_init(dev, nullptr);
+}
+
+// Dynamic loading
+ggml_backend_reg_t ggml_backend_load(const char * path) {
+    return get_reg().load_backend(utf8_to_utf16(path), false);
+}
+
+void ggml_backend_unload(ggml_backend_reg_t reg) {
+    get_reg().unload_backend(reg, true);
+}
+
+static std::wstring get_executable_path() {
+#if defined(__APPLE__)
+    // get executable path
+    std::vector<char> path;
+    uint32_t size;
+    while (true) {
+        size = path.size();
+        if (_NSGetExecutablePath(path.data(), &size) == 0) {
+            break;
+        }
+        path.resize(size);
+    }
+    std::string base_path(path.data(), size);
+    // remove executable name
+    auto last_slash = base_path.find_last_of('/');
+    if (last_slash != std::string::npos) {
+        base_path = base_path.substr(0, last_slash);
+    }
+    return utf8_to_utf16(base_path + "/");
+#elif defined(__linux__) || defined(__FreeBSD__)
+    std::string base_path = ".";
+    std::vector<char> path(1024);
+    while (true) {
+        // get executable path
+#    if defined(__linux__)
+        ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
+#    elif defined(__FreeBSD__)
+        ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
+#    endif
+        if (len == -1) {
+            break;
+        }
+        if (len < (ssize_t) path.size()) {
+            base_path = std::string(path.data(), len);
+            // remove executable name
+            auto last_slash = base_path.find_last_of('/');
+            if (last_slash != std::string::npos) {
+                base_path = base_path.substr(0, last_slash);
+            }
+            break;
+        }
+        path.resize(path.size() * 2);
+    }
+
+    return utf8_to_utf16(base_path + "/");
+#elif defined(_WIN32)
+    std::vector<wchar_t> path(MAX_PATH);
+    DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
+    if (len == 0) {
+        return {};
+    }
+    std::wstring base_path(path.data(), len);
+    // remove executable name
+    auto last_slash = base_path.find_last_of('\\');
+    if (last_slash != std::string::npos) {
+        base_path = base_path.substr(0, last_slash);
+    }
+    return base_path + L"\\";
+#else
+    return {};
+#endif
+}
+
+static std::wstring backend_filename_prefix() {
+#ifdef _WIN32
+    return L"ggml-";
+#else
+    return L"libggml-";
+#endif
+}
+
+static std::wstring backend_filename_suffix() {
+#ifdef _WIN32
+    return L".dll";
+#else
+    return L".so";
+#endif
+}
+
+static std::wstring path_separator() {
+#ifdef _WIN32
+    return L"\\";
+#else
+    return L"/";
+#endif
+}
+
+static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
+    // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
+     // TODO: search system paths
+    std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
+    std::vector<std::wstring> search_paths;
+    if (user_search_path == nullptr) {
+        search_paths.push_back(L"." + path_separator());
+        search_paths.push_back(get_executable_path());
+    } else {
+        search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
+    }
+
+    int best_score = 0;
+    std::wstring best_path;
+
+    namespace fs = std::filesystem;
+    for (const auto & search_path : search_paths) {
+        if (!fs::exists(search_path)) {
+            continue;
+        }
+        fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
+        for (const auto & entry : dir_it) {
+            if (entry.is_regular_file()) {
+                std::wstring filename = entry.path().filename().wstring();
+                std::wstring ext = entry.path().extension().wstring();
+                if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+                    dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+                    if (!handle && !silent) {
+                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                    }
+                    if (handle) {
+                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
+                        if (score_fn) {
+                            int s = score_fn();
+#ifndef NDEBUG
+                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+#endif
+                            if (s > best_score) {
+                                best_score = s;
+                                best_path = entry.path().wstring();
+                            }
+                        } else {
+                            if (!silent) {
+                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (best_score == 0) {
+        // try to load the base backend
+        for (const auto & search_path : search_paths) {
+            std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
+            if (fs::exists(path)) {
+                return get_reg().load_backend(path, silent);
+            }
+        }
+        return nullptr;
+    }
+
+    return get_reg().load_backend(best_path, silent);
+}
+
+void ggml_backend_load_all() {
+    ggml_backend_load_all_from_path(nullptr);
+}
+
+void ggml_backend_load_all_from_path(const char * dir_path) {
+#ifdef NDEBUG
+    bool silent = true;
+#else
+    bool silent = false;
+#endif
+
+    ggml_backend_load_best("blas", silent, dir_path);
+    ggml_backend_load_best("cann", silent, dir_path);
+    ggml_backend_load_best("cuda", silent, dir_path);
+    ggml_backend_load_best("hip", silent, dir_path);
+    ggml_backend_load_best("kompute", silent, dir_path);
+    ggml_backend_load_best("metal", silent, dir_path);
+    ggml_backend_load_best("rpc", silent, dir_path);
+    ggml_backend_load_best("sycl", silent, dir_path);
+    ggml_backend_load_best("vulkan", silent, dir_path);
+    ggml_backend_load_best("opencl", silent, dir_path);
+    ggml_backend_load_best("musa", silent, dir_path);
+    ggml_backend_load_best("cpu", silent, dir_path);
+    // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
+    const char * backend_path = std::getenv("GGML_BACKEND_PATH");
+    if (backend_path) {
+        ggml_backend_load(backend_path);
+    }
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
@@ -0,0 +1,87 @@
+if (GGML_STATIC)
+    set(BLA_STATIC ON)
+endif()
+#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
+#    set(BLA_SIZEOF_INTEGER 8)
+#endif()
+
+set(BLA_VENDOR ${GGML_BLAS_VENDOR})
+find_package(BLAS)
+
+if (BLAS_FOUND)
+    message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
+
+    ggml_add_backend_library(ggml-blas
+                             ggml-blas.cpp
+                            )
+
+    if (${GGML_BLAS_VENDOR} MATCHES "Apple")
+        add_compile_definitions(ACCELERATE_NEW_LAPACK)
+        add_compile_definitions(ACCELERATE_LAPACK_ILP64)
+        add_compile_definitions(GGML_BLAS_USE_ACCELERATE)
+    elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
+        # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
+        # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
+        find_package(PkgConfig REQUIRED)
+        if (${GGML_BLAS_VENDOR} MATCHES "Generic")
+            pkg_check_modules(DepBLAS blas)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
+            # As of openblas v0.3.22, the 64-bit is named openblas64.pc
+            pkg_check_modules(DepBLAS openblas64)
+            if (NOT DepBLAS_FOUND)
+                pkg_check_modules(DepBLAS openblas)
+            endif()
+        elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
+            add_compile_definitions(GGML_BLAS_USE_BLIS)
+            pkg_check_modules(DepBLAS blis)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
+            pkg_check_modules(DepBLAS blas-atlas)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
+            pkg_check_modules(DepBLAS flexiblas_api)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
+            add_compile_definitions(GGML_BLAS_USE_MKL)
+            # all Intel* libraries share the same include path
+            pkg_check_modules(DepBLAS mkl-sdl)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
+            # this doesn't provide pkg-config
+            # suggest to assign BLAS_INCLUDE_DIRS on your own
+            if ("${NVHPC_VERSION}" STREQUAL "")
+                message(WARNING "Better to set NVHPC_VERSION")
+            else()
+                set(DepBLAS_FOUND ON)
+                set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
+            endif()
+        endif()
+        if (DepBLAS_FOUND)
+            set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
+        else()
+            message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
+            " detected by pkgconfig, trying to find cblas.h from possible paths...")
+            find_path(BLAS_INCLUDE_DIRS
+                NAMES cblas.h
+                HINTS
+                    /usr/include
+                    /usr/local/include
+                    /usr/include/openblas
+                    /opt/homebrew/opt/openblas/include
+                    /usr/local/opt/openblas/include
+                    /usr/include/x86_64-linux-gnu/openblas/include
+            )
+        endif()
+    endif()
+
+    message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
+
+    target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
+
+    if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
+        add_compile_definitions(GGML_BLAS_USE_MKL)
+    endif()
+
+    target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
+    target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
+else()
+    message(ERROR "BLAS not found, please refer to "
+                  "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+                  " to set correct GGML_BLAS_VENDOR")
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
@@ -0,0 +1,517 @@
+#include "ggml-impl.h"
+#include "ggml-blas.h"
+#include "ggml-backend-impl.h"
+
+#include <future>
+#include <vector>
+#include <cstring>
+
+#if defined(GGML_BLAS_USE_ACCELERATE)
+#   include <Accelerate/Accelerate.h>
+#elif defined(GGML_BLAS_USE_MKL)
+#   include <mkl.h>
+#elif defined(GGML_BLAS_USE_BLIS)
+#   include <blis.h>
+#elif defined(GGML_BLAS_USE_NVPL)
+#   include <nvpl_blas.h>
+#else
+#   include <cblas.h>
+#endif
+
+struct ggml_backend_blas_context {
+    int n_threads = GGML_DEFAULT_N_THREADS;
+    std::unique_ptr<char[]> work_data;
+    size_t work_size = 0;
+#ifndef GGML_USE_OPENMP
+    std::vector<std::future<void>> tasks;
+#endif
+};
+
+static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+
+    GGML_TENSOR_BINARY_OP_LOCALS
+
+    const enum ggml_type type = src0->type;
+
+    GGML_ASSERT(ne0 == ne01);
+    GGML_ASSERT(ne1 == ne11);
+    GGML_ASSERT(ne2 == ne12);
+    GGML_ASSERT(ne3 == ne13);
+
+    // we don't support permuted src0 or src1
+    GGML_ASSERT(nb00 == ggml_type_size(type));
+    GGML_ASSERT(nb10 == ggml_type_size(src1->type));
+
+    // dst cannot be transposed or permuted
+    GGML_ASSERT(nb0 == sizeof(float));
+    GGML_ASSERT(nb0 <= nb1);
+    GGML_ASSERT(nb1 <= nb2);
+    GGML_ASSERT(nb2 <= nb3);
+
+    // broadcast factors
+    const int64_t r2 = ne12/ne02;
+    const int64_t r3 = ne13/ne03;
+
+    const int64_t ne_plane      = ne01*ne00;
+    const size_t  desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
+
+    if (ctx->work_size < desired_wsize) {
+        ctx->work_data.reset(new char[desired_wsize]);
+        ctx->work_size = desired_wsize;
+    }
+    void * wdata = ctx->work_data.get();
+
+    // convert src0 to float
+    if (type != GGML_TYPE_F32) {
+        const auto * type_traits = ggml_get_type_traits(type);
+        ggml_to_float_t const to_float = type_traits->to_float;
+
+        for (int64_t i03 = 0; i03 < ne03; i03++) {
+            for (int64_t i02 = 0; i02 < ne02; i02++) {
+                const void  *       x      = (char *)  src0->data + i02*nb02          + i03*nb03;
+                      float * const wplane = (float *) wdata      + i02*ne_plane      + i03*ne02*ne_plane;
+
+                const int min_cols_per_thread = 4096;
+                const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1);
+                const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1);
+
+#ifdef GGML_USE_OPENMP
+                #pragma omp parallel for num_threads(n_threads)
+                for (int64_t i01 = 0; i01 < ne01; i01++) {
+                    to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                }
+#else
+                for (int i = 1; i < n_threads; i++) {
+                    const int64_t start =       i*ne01/n_threads;
+                    const int64_t end   = (i + 1)*ne01/n_threads;
+                    if (start < end) {
+                        ctx->tasks.push_back(std::async(std::launch::async, [=]() {
+                            for (int64_t i01 = start; i01 < end; i01++) {
+                                to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                            }
+                        }));
+                    }
+                }
+                {
+                    // reuse the current thread for the first task
+                    const int64_t start = 0;
+                    const int64_t end   = ne01/n_threads;
+                    for (int64_t i01 = start; i01 < end; i01++) {
+                        to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                    }
+                }
+#endif
+            }
+        }
+
+#ifndef GGML_USE_OPENMP
+        // wait for all tasks to finish
+        for (auto & task : ctx->tasks) {
+            task.get();
+        }
+        ctx->tasks.clear();
+#endif
+    }
+
+#if defined(OPENBLAS_VERSION)
+    openblas_set_num_threads(ctx->n_threads);
+#endif
+
+#if defined(GGML_BLAS_USE_BLIS)
+    bli_thread_set_num_threads(ctx->n_threads);
+#endif
+
+#if defined(GGML_BLAS_USE_NVPL)
+    nvpl_blas_set_num_threads(ctx->n_threads);
+#endif
+
+    for (int64_t i13 = 0; i13 < ne13; i13++) {
+        for (int64_t i12 = 0; i12 < ne12; i12++) {
+            const int64_t i03 = i13/r3;
+            const int64_t i02 = i12/r2;
+
+            const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
+            const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
+                  float * d = (float *) ((char *)  dst->data + i12*nb2  + i13*nb3);
+
+            if (type != GGML_TYPE_F32) {
+                x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
+            }
+
+            cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+                        ne1, ne01, ne10,
+                        1.0f,   y, ne10,
+                                x, ne00,
+                        0.0f,   d, ne01);
+        }
+    }
+}
+
+static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+
+    GGML_TENSOR_BINARY_OP_LOCALS
+
+    GGML_ASSERT(ne0  == ne00);
+    GGML_ASSERT(ne1  == ne10);
+    GGML_ASSERT(ne2  == ne02);
+    GGML_ASSERT(ne02 == ne12);
+    GGML_ASSERT(ne3  == ne13);
+    GGML_ASSERT(ne03 == ne13);
+
+    // we don't support permuted src0 or src1
+    GGML_ASSERT(nb00 == sizeof(float));
+
+    // dst cannot be transposed or permuted
+    GGML_ASSERT(nb0 == sizeof(float));
+    // GGML_ASSERT(nb0 <= nb1);
+    // GGML_ASSERT(nb1 <= nb2);
+    // GGML_ASSERT(nb2 <= nb3);
+
+    // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
+    // src0: (k,n)
+    // src1: (k,m)
+    // dst:  (m,n)
+    //
+    // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
+    // Also expressed as (major,minor)
+    // a: (m,k): so src1 transposed
+    // b: (k,n): so src0
+    // c: (m,n)
+    //
+    // However, if ggml_is_transposed(src1) is true, then
+    // src1->data already contains a transposed version, so sgemm mustn't
+    // transpose it further.
+
+    int n = src0->ne[0];
+    int k = src0->ne[1];
+    int m = src1->ne[0];
+
+    CBLAS_TRANSPOSE transposeA;
+    int lda;
+
+    if (!ggml_is_transposed(src1)) {
+        transposeA = CblasTrans;
+        lda = m;
+    } else {
+        transposeA = CblasNoTrans;
+        lda = k;
+    }
+
+    float * a = (float *) ((char *) src1->data);
+    float * b = (float *) ((char *) src0->data);
+    float * c = (float *) ((char *) dst->data);
+
+    cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
+
+    GGML_UNUSED(ctx);
+}
+
+// backend interface
+
+static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
+    return "BLAS";
+
+    GGML_UNUSED(backend);
+}
+
+static void ggml_backend_blas_free(ggml_backend_t backend) {
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
+    delete ctx;
+    delete backend;
+}
+
+static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
+
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        struct ggml_tensor * node = cgraph->nodes[i];
+
+        switch (node->op) {
+            case GGML_OP_MUL_MAT:
+                ggml_backend_blas_mul_mat(ctx, node);
+                break;
+
+            case GGML_OP_OUT_PROD:
+                ggml_backend_blas_out_prod(ctx, node);
+                break;
+
+            case GGML_OP_NONE:
+            case GGML_OP_RESHAPE:
+            case GGML_OP_VIEW:
+            case GGML_OP_PERMUTE:
+            case GGML_OP_TRANSPOSE:
+                break;
+
+            default:
+                GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
+        }
+    }
+
+    return GGML_STATUS_SUCCESS;
+
+    GGML_UNUSED(backend);
+}
+
+static struct ggml_backend_i blas_backend_i = {
+    /* .get_name                = */ ggml_backend_blas_get_name,
+    /* .free                    = */ ggml_backend_blas_free,
+    /* .set_tensor_async        = */ NULL,
+    /* .get_tensor_async        = */ NULL,
+    /* .cpy_tensor_async        = */ NULL,
+    /* .synchronize             = */ NULL,
+    /* .graph_plan_create       = */ NULL,
+    /* .graph_plan_free         = */ NULL,
+    /* .graph_plan_update       = */ NULL,
+    /* .graph_plan_compute      = */ NULL,
+    /* .graph_compute           = */ ggml_backend_blas_graph_compute,
+    /* .event_record            = */ NULL,
+    /* .event_wait              = */ NULL,
+};
+
+static ggml_guid_t ggml_backend_blas_guid(void) {
+    static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
+    return &guid;
+}
+
+ggml_backend_t ggml_backend_blas_init(void) {
+    ggml_backend_blas_context * ctx = new ggml_backend_blas_context;
+
+    ggml_backend_t backend = new ggml_backend {
+        /* .guid      = */ ggml_backend_blas_guid(),
+        /* .interface = */ blas_backend_i,
+        /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
+        /* .context   = */ ctx,
+    };
+
+#if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP)
+    if (openblas_get_parallel() != OPENBLAS_OPENMP) {
+        GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
+    }
+#endif
+
+#if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
+    GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
+#endif
+
+    return backend;
+}
+
+bool ggml_backend_is_blas(ggml_backend_t backend) {
+    return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid());
+}
+
+void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) {
+    GGML_ASSERT(ggml_backend_is_blas(backend_blas));
+
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
+    ctx->n_threads = n_threads;
+}
+
+// device interface
+
+static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
+    return "BLAS";
+
+    GGML_UNUSED(dev);
+}
+
+static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
+    #if defined(GGML_BLAS_USE_ACCELERATE)
+        return "Accelerate";
+    #elif defined(GGML_BLAS_USE_MKL)
+        return "MKL";
+    #elif defined(GGML_BLAS_USE_BLIS)
+        return "BLIS";
+    #elif defined(GGML_BLAS_USE_NVPL)
+        return "NVPL";
+    #elif defined(OPENBLAS_VERSION)
+        return "OpenBLAS";
+    #else
+        return "BLAS";
+    #endif
+
+    GGML_UNUSED(dev);
+}
+
+static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+    // TODO
+    *free = 0;
+    *total = 0;
+
+    GGML_UNUSED(dev);
+}
+
+static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
+    return GGML_BACKEND_DEVICE_TYPE_ACCEL;
+
+    GGML_UNUSED(dev);
+}
+
+static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+    props->name        = ggml_backend_blas_device_get_name(dev);
+    props->description = ggml_backend_blas_device_get_description(dev);
+    props->type        = ggml_backend_blas_device_get_type(dev);
+    ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
+    props->caps = {
+        /* .async                 = */ false,
+        /* .host_buffer           = */ false,
+        /* .buffer_from_host_ptr  = */ true,
+        /* .events                = */ false,
+    };
+}
+
+static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) {
+    return ggml_backend_blas_init();
+
+    GGML_UNUSED(dev);
+    GGML_UNUSED(params);
+}
+
+static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
+    return ggml_backend_cpu_buffer_type();
+
+    GGML_UNUSED(dev);
+}
+
+static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
+    return ggml_backend_cpu_buffer_from_ptr(ptr, size);
+
+    GGML_UNUSED(dev);
+    GGML_UNUSED(max_tensor_size);
+}
+
+static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
+    const struct ggml_tensor * src0 = op->src[0];
+    const struct ggml_tensor * src1 = op->src[1];
+
+    switch (op->op) {
+        case GGML_OP_NONE:
+        case GGML_OP_RESHAPE:
+        case GGML_OP_VIEW:
+        case GGML_OP_PERMUTE:
+        case GGML_OP_TRANSPOSE:
+            return true;
+
+        case GGML_OP_MUL_MAT:
+        {
+            // BLAS usually is only faster for large matrices
+            const struct ggml_tensor * src0 = op->src[0];
+            const struct ggml_tensor * src1 = op->src[1];
+
+            const int64_t ne10 = src1->ne[0];
+
+            const int64_t ne0 = op->ne[0];
+            const int64_t ne1 = op->ne[1];
+
+            // TODO: find the optimal value
+            const int64_t min_batch = 32;
+
+            return ggml_is_contiguous(src0) &&
+                   ggml_is_contiguous(src1) &&
+                   src1->type == GGML_TYPE_F32 &&
+                   (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
+        }
+
+        case GGML_OP_OUT_PROD:
+            return op->src[0]->type == GGML_TYPE_F32 &&
+                   op->src[1]->type == GGML_TYPE_F32 &&
+                   ggml_is_matrix(src0) &&
+                   ggml_is_matrix(src1) &&
+                   ggml_is_contiguous(src0) &&
+                   (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
+
+        default:
+            return false;
+
+    }
+
+    GGML_UNUSED(dev);
+}
+
+static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
+    return ggml_backend_buft_is_host(buft);
+
+    GGML_UNUSED(dev);
+}
+
+static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
+    /* .get_name             = */ ggml_backend_blas_device_get_name,
+    /* .get_description      = */ ggml_backend_blas_device_get_description,
+    /* .get_memory           = */ ggml_backend_blas_device_get_memory,
+    /* .get_type             = */ ggml_backend_blas_device_get_type,
+    /* .get_props            = */ ggml_backend_blas_device_get_props,
+    /* .init_backend         = */ ggml_backend_blas_device_init_backend,
+    /* .get_buffer_type      = */ ggml_backend_blas_device_get_buffer_type,
+    /* .get_host_buffer_type = */ NULL,
+    /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr,
+    /* .supports_op          = */ ggml_backend_blas_device_supports_op,
+    /* .supports_buft        = */ ggml_backend_blas_device_supports_buft,
+    /* .offload_op           = */ NULL,
+    /* .event_new            = */ NULL,
+    /* .event_free           = */ NULL,
+    /* .event_synchronize    = */ NULL,
+};
+
+// backend reg interface
+
+static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
+    return "BLAS";
+
+    GGML_UNUSED(reg);
+}
+
+static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
+    return 1;
+
+    GGML_UNUSED(reg);
+}
+
+static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
+    GGML_ASSERT(index == 0);
+
+    static ggml_backend_device ggml_backend_blas_device = {
+        /* .iface   = */ ggml_backend_blas_device_i,
+        /* .reg     = */ reg,
+        /* .context = */ nullptr,
+    };
+
+    return &ggml_backend_blas_device;
+
+    GGML_UNUSED(reg);
+    GGML_UNUSED(index);
+}
+
+static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+    if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
+        return (void *)ggml_backend_blas_set_n_threads;
+    }
+    return NULL;
+
+    GGML_UNUSED(reg);
+    GGML_UNUSED(name);
+}
+
+static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
+    /* .get_name         = */ ggml_backend_blas_reg_get_name,
+    /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
+    /* .get_device       = */ ggml_backend_blas_reg_get_device,
+    /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
+};
+
+ggml_backend_reg_t ggml_backend_blas_reg(void) {
+    static struct ggml_backend_reg ggml_backend_blas_reg = {
+        /* .api_version = */ GGML_BACKEND_API_VERSION,
+        /* .iface       = */ ggml_backend_blas_reg_i,
+        /* .context     = */ NULL,
+    };
+
+    return &ggml_backend_blas_reg;
+}
+
+GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
@@ -0,0 +1,76 @@
+if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
+    set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
+    message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
+endif()
+
+# Auto-detech Soc type and Soc version, if detect failed, will abort build
+set(SOC_VERSION "")
+function(detect_ascend_soc_type SOC_VERSION)
+    execute_process(
+        COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'"
+        OUTPUT_VARIABLE npu_info
+        RESULT_VARIABLE npu_result
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    if("${npu_info}" STREQUAL "" OR ${npu_result})
+        message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.")
+    endif()
+    set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE)
+endfunction()
+
+if(NOT SOC_TYPE)
+    detect_ascend_soc_type(SOC_VERSION)
+    set(SOC_TYPE "${SOC_VERSION}")
+    message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
+endif()
+
+string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
+
+# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
+string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
+set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
+string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
+
+if (CANN_INSTALL_DIR)
+    # Only Support Linux.
+    if (NOT UNIX)
+        message(FATAL_ERROR "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}")
+    endif()
+
+    # Supported platforms: x86-64, arm64
+    if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+    elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
+    else()
+        message(FATAL_ERROR "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}")
+    endif()
+
+    # Set header and libs
+    set(CANN_INCLUDE_DIRS
+        ${CANN_INSTALL_DIR}/include
+        ${CANN_INSTALL_DIR}/include/aclnn
+        ${CANN_INSTALL_DIR}/acllib/include
+    )
+
+    add_subdirectory(kernels)
+    list(APPEND CANN_LIBRARIES
+        ascendcl
+        nnopbase
+        opapi
+        acl_op_compiler
+        ascendc_kernels
+    )
+
+    file(GLOB GGML_SOURCES_CANN "*.cpp")
+
+    ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
+    target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
+    target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
+    target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
+
+    target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
+
+    message(STATUS "CANN: CANN_INCLUDE_DIRS =  ${CANN_INCLUDE_DIRS}")
+    message(STATUS "CANN: CANN_LIBRARIES =  ${CANN_LIBRARIES}")
+else()
+    message(FATAL_ERROR "CANN: Can't find CANN_INSTALL_DIR, did you forget to source set_var.sh?")
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "acl_tensor.h"
+
+#include <algorithm>
+#include <cstring>
+
+aclDataType ggml_cann_type_mapping(ggml_type type) {
+    switch (type) {
+        case GGML_TYPE_F32:
+            return ACL_FLOAT;
+        case GGML_TYPE_F16:
+            return ACL_FLOAT16;
+        case GGML_TYPE_I8:
+            return ACL_INT8;
+        case GGML_TYPE_I16:
+            return ACL_INT16;
+        case GGML_TYPE_I32:
+            return ACL_INT32;
+        case GGML_TYPE_Q4_0:
+            return ACL_INT4;
+        case GGML_TYPE_Q8_0:
+            return ACL_INT8;
+        default:
+            return ACL_DT_UNDEFINED;
+    }
+    return ACL_DT_UNDEFINED;
+}
+
+aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
+                                   size_t* nb, int64_t dims, aclFormat format,
+                                   size_t offset) {
+    // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
+    // added.
+    int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
+
+    int64_t acl_storage_len = 0;
+    if (ne == nullptr) {
+        acl_storage_len = ggml_nbytes(tensor);
+        for (int i = 0; i < GGML_MAX_DIMS; i++) {
+            acl_ne[i] = tensor->ne[i];
+            // The step size of acl is in elements.
+            acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
+        }
+    } else {
+        // With bcast
+        for (int i = 0; i < dims; i++) {
+            acl_storage_len += (ne[i] - 1) * nb[i];
+            acl_ne[i] = ne[i];
+            acl_stride[i] = nb[i] / ggml_element_size(tensor);
+        }
+    }
+
+    // Reverse ne and stride.
+    int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
+    std::reverse(acl_ne, acl_ne + final_dims);
+    std::reverse(acl_stride, acl_stride + final_dims);
+
+    aclTensor* acl_tensor = aclCreateTensor(
+        acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
+        offset / ggml_element_size(tensor), format, &acl_storage_len, 1,
+        tensor->data);
+
+    return acl_tensor;
+}
+
+bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
+            return true;
+        }
+    }
+    return false;
+}
+
+int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
+                                  const ggml_tensor* src1,
+                                  int64_t* bcast_src0_ne,
+                                  int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
+                                  size_t* bcast_src1_nb) {
+    GGML_ASSERT(ggml_can_repeat(src1, src0));
+    int bcast_dim_cnt = 0;
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        int64_t nr = src0->ne[i] / src1->ne[i];
+        bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
+        bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
+        bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
+        bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
+        bcast_dim_cnt++;
+        if (nr != 1) {
+            // Need to add an extra dim.
+            bcast_src0_ne[bcast_dim_cnt] = nr;
+            bcast_src1_ne[bcast_dim_cnt] = 1;
+            bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
+                                           bcast_src0_ne[bcast_dim_cnt - 1];
+            bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
+                                           bcast_src1_ne[bcast_dim_cnt - 1];
+            bcast_dim_cnt++;
+        }
+    }
+    return bcast_dim_cnt;
+}
+
+int64_t ggml_cann_get_mulmat_bcast_shape(
+    const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
+    const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
+    int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
+    size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
+    // input and dst shoule in same shape, except first two dims.
+    GGML_ASSERT(input_ne[2] == dst_ne[2]);
+    GGML_ASSERT(input_ne[3] == dst_ne[3]);
+
+    int bcast_dim_cnt = 0;
+
+    // For mul_mat, a dimension needs to be added before the dimension that
+    // weight needs to be expanded to satisfy the bcast rule of matrix
+    // multiplication.
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        int64_t nr = input_ne[i] / weight_ne[i];
+        // Do not use bcast in the first two dimensions because we only support
+        // the bcast batch dimension. Just copy them.
+        if (i < 2 || nr == 1) {
+            bcast_input_ne[bcast_dim_cnt] = input_ne[i];
+            bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
+            bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
+
+            bcast_input_nb[bcast_dim_cnt] = input_nb[i];
+            bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
+            bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
+            bcast_dim_cnt++;
+        } else {
+            // Need to add an extra dim.
+            bcast_input_ne[bcast_dim_cnt] = nr;
+            bcast_dst_ne[bcast_dim_cnt] = nr;
+            bcast_weight_ne[bcast_dim_cnt] = 1;
+            bcast_input_nb[bcast_dim_cnt] = input_nb[i];
+            bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
+            bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
+            bcast_dim_cnt++;
+
+            bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
+            bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
+            bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
+            bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
+                                            bcast_input_ne[bcast_dim_cnt - 1];
+            bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
+                                          bcast_dst_ne[bcast_dim_cnt - 1];
+            bcast_weight_nb[bcast_dim_cnt] =
+                bcast_weight_nb[bcast_dim_cnt - 1] *
+                bcast_weight_ne[bcast_dim_cnt - 1];
+            bcast_dim_cnt++;
+        }
+    }
+    return bcast_dim_cnt;
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CANN_ACL_TENSOR_H
+#define CANN_ACL_TENSOR_H
+
+#include <algorithm>
+#include <cstring>
+
+#include <aclnn/aclnn_base.h>
+#include "common.h"
+
+/**
+ * @brief	Maps a ggml_type to its corresponding aclDataType.
+ *
+ * @details	This function takes a ggml_type as input and returns the corresponding
+ *			aclDataType. It supports mapping for various ggml_types. If the input type
+ *			does not match any of the predefined ggml_types, the function returns
+ *          ACL_DT_UNDEFINED.
+ *
+ * @param	type    The ggml_type to be mapped.
+ * @return	The corresponding aclDataType. If the input type is not recognized,
+ *			ACL_DT_UNDEFINED is returned.
+ */
+aclDataType ggml_cann_type_mapping(ggml_type type);
+
+/**
+ * @brief   Creates an ACL tensor from a ggml_tensor with optional shape.
+ *
+ * @details This function creates an ACL tensor based on the properties of the
+ *          provided ggml_tensor. It supports customer shape by adjusting dimensions
+ *          and strides accordingly. If customer shape is applied, additional
+ *          dimensions and strides are calculated based on the provided parameters.
+ *
+ * @param   tensor      Pointer to the ggml_tensor to be converted to ACL tensor.
+ * @param   ne          Pointer to an array containing dimensions. Defaults to nullptr
+ *                      if no customer shape is applied.
+ * @param   nb          Pointer to an array containing strides. Defaults to nullptr
+ *                      if no customer shape is applied.
+ * @param   dims        Number of dimensions in the tensor. Defaults to 0 if no customer
+ *                      shape is applied.
+ * @param   format      ACL tensor format. Defaults to ACL_FORMAT_ND.
+ * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0.
+ * @return  Pointer to the created ACL tensor.
+ */
+aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
+                             size_t* nb = nullptr, int64_t dims = 0,
+                             aclFormat format = ACL_FORMAT_ND,
+                             size_t offset = 0);
+
+/**
+ * @brief   Template for creating an ACL tensor from provided parameters. typename TYPE
+ *          should be size_t or float.
+ *
+ * @details This function creates an ACL tensor using the provided data pointer,
+ *          data type, dimensions, strides, format, offset, and additional parameters.
+ *          It calculates necessary dimensions and strides based on the provided ne and nb
+ *          arrays, adjusting them for the ACL tensor creation. The ACL storage length
+ *          is also calculated based on the provided dimensions and strides.
+ *
+ * @param   data_ptr    Pointer to the data buffer for the ACL tensor.
+ * @param   dtype       ACL data type of the tensor.
+ * @param   type_size   Size of each element in the tensor data buffer.
+ * @param   ne          Pointer to an array containing tensor dimensions.
+ * @param   nb          Pointer to an array containing tensor strides.
+ * @param   dims        Number of dimensions of the tensor.
+ * @param   format      ACL tensor format. Defaults to ACL_FORMAT_ND.
+ * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0.
+ * @return  Pointer to the created ACL tensor.
+ */
+template<typename TYPE>
+aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
+                                   TYPE type_size, int64_t* ne, TYPE* nb,
+                                   int64_t dims,
+                                   aclFormat format = ACL_FORMAT_ND,
+                                   size_t offset = 0) {
+    int64_t tmp_ne[GGML_MAX_DIMS * 2];
+    int64_t tmp_stride[GGML_MAX_DIMS * 2];
+
+    memcpy(tmp_ne, ne, dims * sizeof(int64_t));
+    for (int i = 0; i < dims; i++) {
+        tmp_stride[i] = nb[i] / type_size;
+    }
+
+    std::reverse(tmp_ne, tmp_ne + dims);
+    std::reverse(tmp_stride, tmp_stride + dims);
+
+    int64_t acl_storage_len = 0;
+    for (int i = 0; i < dims; i++) {
+        acl_storage_len += (ne[i] - 1) * nb[i];
+    }
+
+    aclTensor* acl_tensor =
+        aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
+                        format, &acl_storage_len, 1, data_ptr);
+
+    return acl_tensor;
+}
+
+/**
+ * @brief   Checks if tensors require broadcasting based on their shapes.
+ *
+ * @details This function determines if two ggml_tensors need to be broadcasted for
+ *          element-wise operations. Broadcasting is necessary if the shapes of the
+ *          tensors are not identical and no dimension in either tensor equals 1.
+ *
+ * @param   t0      Pointer to the first ggml_tensor.
+ * @param   t1      Pointer to the second ggml_tensor.
+ * @return  True if broadcasting is needed, False otherwise.
+ *
+ * @remarks This function iterates over the dimensions of t0 and t1. It checks if each
+ *          dimension in t1 differs from t0's corresponding dimension and is not equal
+ *          to 1. If such a dimension is found, broadcasting is required to align t1
+ *          with t0 for element-wise operations.
+ */
+bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
+
+/**
+ * @brief   Computes broadcast shapes and strides for two ggml_tensors.
+ *
+ * @details This function calculates the broadcast shapes and strides for two ggml_tensors,
+ *          following the broadcasting rules similar to numpy. It adjusts dimensions and
+ *          strides to ensure compatibility for element-wise operations where one tensor
+ *          can be broadcasted to match the shape of another tensor.
+ *
+ * @param   src0                Pointer to the first ggml_tensor.
+ * @param   src1                Pointer to the second ggml_tensor.
+ * @param   bcast_ne_src0       Output array to store broadcasted dimensions for src0.
+ * @param   bcast_ne_src1       Output array to store broadcasted dimensions for src1.
+ * @param   bcast_nb_src0       Output array to store broadcasted strides for src0.
+ * @param   bcast_nb_src1       Output array to store broadcasted strides for src1.
+ * @return  Number of dimensions in the broadcasted shape.
+ *
+ * @pre     ggml_can_repeat(src1, src0) must return true, indicating src1 can be broadcasted
+ *          to match src0.
+ *
+ * @remarks This function iterates over the dimensions of src0 and src1, calculating the
+ *          necessary broadcast dimensions and strides. If a dimension requires broadcasting
+ *          (i.e., its size in src1 is smaller than in src0), an additional dimension is
+ *          added with size calculated to match src0's dimension. This adjustment ensures
+ *          that src1 can be element-wise broadcasted to src0's shape.
+ *
+ *  How it works:
+ *
+ *  if dim0 has padding.
+ *  a -> (2, 2) padding = 2
+ *   a: [[1, 2, *, *]
+ *       [2, 3, *, *]]
+ *  nb = (8, 4, 2)
+ *
+ *  if a should bcast with b -> (2, 4)
+ *  b' -> (2, 2, 2)
+ *  b : [[1, 2, 3, 4, *, *]
+ *       [5, 6, 7, 8, *, *]]
+ *  nb = (12, 6, 1)
+ *
+ *  after bcast:
+ *  a' -> (2, 1, 2)
+ *  a': [[[1, 2], *, *]
+ *       [[2, 3], *, *]]
+ *  nb = (8, 4, 2, 1)
+ *
+ *  b' : [[[1, 2], [3, 4], *, *]
+ *        [[5, 6], [7, 8], *, *]]
+ *  nb = (12, 6, 2, 1)
+ *  \endcode
+ *
+ *  dim1 in a inserted dim, should add nb for dim1,
+ *  and all other nb moves to next in order.
+ */
+int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
+                        int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
+                        size_t* bcast_nb_src0, size_t* bcast_nb_src1);
+
+// Bcast macro to avoid duplicate code.
+#define BCAST_SHAPE(src0, src1)                                              \
+    int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2];                            \
+    int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2];                            \
+    size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2];                             \
+    size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2];                             \
+    int64_t bcast_dims = ggml_cann_get_bcast_shape(                          \
+        src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \
+        bcast_##src1##_nb);
+
+#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
+
+/**
+ * @brief Calculates broadcast shapes for matrix multiplication.
+ *
+ * @details This function computes the broadcast shapes required for matrix multiplication
+ *          based on the input, weight, and destination tensor shapes. It ensures that the
+ *          dimensions of weight tensors are expanded appropriately to satisfy matrix
+ *          multiplication broadcast rules.
+ *
+ * @param input_ne      Array containing the dimensions of the input tensor.
+ * @param weight_ne     Array containing the dimensions of the weight tensor.
+ * @param dst_ne        Array containing the dimensions of the destination tensor.
+ * @param input_nb      Array containing the strides of the input tensor.
+ * @param weight_nb     Array containing the strides of the weight tensor.
+ * @param dst_nb        Array containing the strides of the destination tensor.
+ * @param bcast_input_ne    Output array for broadcasted input tensor dimensions.
+ * @param bcast_weight_ne   Output array for broadcasted weight tensor dimensions.
+ * @param bcast_dst_ne      Output array for broadcasted destination tensor dimensions.
+ * @param bcast_input_nb    Output array for broadcasted input tensor strides.
+ * @param bcast_weight_nb   Output array for broadcasted weight tensor strides.
+ * @param bcast_dst_nb      Output array for broadcasted destination tensor strides.
+ * @return The number of dimensions in the broadcasted tensors.
+ *
+ * @remarks This function iterates over the tensor dimensions and calculates the broadcast
+ *          shapes needed for matrix multiplication. It ensures that dimensions where
+ *          weight tensor requires expansion are appropriately handled to conform with
+ *          broadcasting rules.
+ * @note compare with ggml_cann_get_bcast_shape, mul_mat broadcast need add this new dim
+ *       before cast dim.
+ * @sa ggml_cann_get_bcast_shape
+ */
+int64_t ggml_cann_get_mulmat_bcast_shape(
+    const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
+    const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
+    int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
+    size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb);
+
+// Bcast macro to avoid duplicate code.
+#define BCAST_MUL_MAT_SHAPE(input, weight, dst)                         \
+    int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2];                      \
+    int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2];                     \
+    int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2];                        \
+    size_t bcast_##input##_nb[GGML_MAX_DIMS * 2];                       \
+    size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2];                      \
+    size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2];                         \
+    int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape(              \
+        input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
+        bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne,      \
+        bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
+
+#define BCAST_MUL_MAT_PARAM(tensor) \
+    bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
+
+#endif  // CANN_ACL_TENSOR_H
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
@@ -0,0 +1,592 @@
+#ifndef CANN_ACLNN_OPS
+#define CANN_ACLNN_OPS
+
+/**
+ * @file    acl_tensor
+ * @brief   This file contains related functions of ggml_tensor and acl_tensor.
+ *          Contains conversion from ggml_tensor to acl_tensor, broadcast and other
+ *          functions.
+ * @author  hipudding <huafengchun@gmail.com>
+ * @author  wangshuai09 <391746016@qq.com>
+ * @date    July 15, 2024
+ *
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <aclnnop/aclnn_add.h>
+#include <aclnnop/aclnn_arange.h>
+#include <aclnnop/aclnn_argsort.h>
+#include <aclnnop/aclnn_cat.h>
+#include <aclnnop/aclnn_clamp.h>
+#include <aclnnop/aclnn_div.h>
+#include <aclnnop/aclnn_gelu.h>
+#include <aclnnop/aclnn_hardsigmoid.h>
+#include <aclnnop/aclnn_hardswish.h>
+#include <aclnnop/aclnn_leaky_relu.h>
+#include <aclnnop/aclnn_mul.h>
+#include <aclnnop/aclnn_relu.h>
+#include <aclnnop/aclnn_silu.h>
+#include <aclnnop/aclnn_tanh.h>
+#include "acl_tensor.h"
+#include "common.h"
+
+/**
+ * @brief   Repeats a ggml tensor along each dimension to match the dimensions
+ *          of another tensor.
+ *
+ * @details This function repeats the elements of a source ggml tensor along
+ *          each dimension to create a destination tensor with the specified
+ *          dimensions. The operation is performed using the ACL backend and
+ *          executed asynchronously on the device.
+ *
+ * @param   ctx The CANN context used for operations.
+ * @param   dst The ggml tensor representing the destination, which op is
+ *              GGML_OP_REPEAT and specifies the desired dimensions.
+ */
+void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Adds two ggml tensors using the CANN backend.
+ *
+ * @details This function performs an element-wise addition of two tensors. In
+ *          case the tensors do not have the same shape, one or both tensors
+ *          will be broadcasted to match the shape of the other before the
+ *          addition is performed.The formula for the operation is given by:
+ *          \f[
+ *              \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
+ *          \f]
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The ggml tensor representing the destination, result of the
+ *            addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
+ */
+void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Applies the Leaky ReLU activation function to a tensor using the CANN
+ *          backend.
+ *
+ * @details This function computes the Leaky ReLU activation for each element of
+ *          the input tensor. The Leaky ReLU function allows a small gradient
+ *          when the unit is not active (i.e., when the input is negative). The
+ *          Leaky ReLU function is defined as:
+ *          \f[
+ *              \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
+ *               src)
+ *          \f]
+ *          `negativeSlope` is in dst->params.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the result of the Leaky ReLU
+ *            activation is stored, which op is `GGML_OP_LEAKY_RELU`
+ */
+void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief    Concatenates multiple tensors along a specified dimension using the
+ *           CANN backend.
+ *
+ * @param ctx        The CANN context used for operations.
+ * @param tensorList A pointer to the list of tensors to be concatenated.
+ * @param dst        The destination tensor where the result of the
+ *                   concatenation is stored. dst->op is `GGML_OP_CONCAT`.
+ * @param concat_dim The dimension along which the tensors are concatenated.
+ *
+ * @attention tensorList length should be 2 and the dimension using for concat
+ *            default to 1.
+ */
+void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Generates a sequence of evenly spaced values within a specified
+ *          interval for a ggml tensor using the CANN backend.
+ *
+ * @details This function creates a sequence of numbers over a specified i
+ *          nterval, starting from `start`, ending before `stop`, and
+ *          incrementing by `step`. The sequence is stored in the destination
+ *          tensor `dst`.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the generated sequence will be stored.
+ *            `start`, 'stop' and 'step' are in dst->op_params and dst->op is
+ *            `GGML_OP_ARANGE`.
+ */
+void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the square of the elements of a ggml tensor using the CANN
+ *          backend.
+ * @details The function sets the second source tensor of the destination
+ *          tensor `dst` to be equal to the first source tensor. This is
+ *          effectively squaring the elements since the multiplication becomes
+ *          `element * element`.
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the squared values will be stored，
+ *            which dst->op is `GGML_OP_SQR`.
+ */
+void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Applies a clamp operation to the elements of a ggml tensor using the
+ *          CANN backend.
+ *
+ * @details This function clamps the elements of the input tensor `src` to a
+ *          specified range defined by `min` and `max` values. The result is
+ *          stored in the destination tensor `dst`. The operation is defined as:
+ *          \f[
+ *              y = \max(\min(x, max\_value), min\_value)
+ *           \f]
+ *          where `x` is an element of the input tensor, and `y` is the
+ *          corresponding element in the output tensor.
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the clamped values will be stored.
+ *            dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
+ */
+void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Scales the elements of a ggml tensor by a constant factor using the
+ *          CANN backend.
+ *
+ * @details This function multiplies each element of the input tensor `src` by
+ *          a scaling factor `scale`, storing the result in the destination
+ *          tensor `dst`. The operation is defined as:
+ *          \f[
+ *             dst = src \times scale
+ *          \f]
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the scaled values will be stored.
+ *            dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
+ */
+void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Sorts the elements of a ggml tensor and returns the indices that
+ *          would sort the tensor using the CANN backend.
+ *
+ * @details This function performs an argsort operation on the input tensor
+ *          `src`. It sorts the elements of `src` in either ascending or
+ *          descending order, depending on the `GGML_SORT_ORDER_DESC`,
+ *          and returns the indices that would sort the original tensor.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the sorted indices will be stored.
+ *            dst->op is `GGML_OP_ARGSORT`.
+ */
+void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the Layer Normalization for a ggml tensor using the CANN
+ *          backend.
+ *
+ * @details This function applies the Layer Normalization operation on the
+ *          input tensor `src` and stores the result in the destination tensor
+ *          `dst`. Layer Normalization normalizes the features at each sample in
+ *          a mini-batch independently. It is commonly used in neural networks
+ *          to normalize the activations of a layer by adjusting and scaling
+ *          the outputs.
+ *          The operation is defined as:
+ *          \f[
+ *              \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
+ *          \f]
+ *          `Var` defaults dst->ne[0]. `eps` is in dst->params.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the normalized values will be stored.
+ * @attention `Var` defaults to dst->ne[0].
+ */
+void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief  Computes the Group Normalization for a ggml tensor using the CANN
+ *         backend.
+ *
+ * @brief  This function applies the Group Normalization operation on the input
+ *         tensor `src` and stores the result in the destination tensor `dst`.
+ *         Group Normalization divides the channels into groups and normalizes
+ *         the features within each group across spatial locations.
+ *         It is commonly used in convolutional neural networks to improve
+ *         training stability and performance.
+ *         The operation is defined as:
+ *         \f[
+ *             \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
+ *         \f]
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the normalized values will be stored.
+ *            `n_groups` is in dst->params, which split C channel to `n_groups`.
+ *            dst->op is `GGML_OP_GROUP_NORM`.
+ *
+ * @attention eps defaults to 1e-6f.
+ */
+void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the accumulation of tensors using the CANN backend.
+ *
+ * @details This function performs an accumulation operation on two tensors.
+ *          Depending on the `inplace` flag, it either updates the destination
+ *          tensor `dst` in place by adding `alpha * src1` to it, or it creates
+ *          a new tensor as the result of `src0 + alpha * src1` and stores it in
+ *          `dst`.
+ *          The operation is defined as:
+ *          \f[
+ *               dst = src0 + alpha \times src1
+ *          \f]
+ *          if `inplace` is `true`, `src0` is equal to 'dst'.
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the accumulated values will be stored.
+ *            `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
+ */
+void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the sum of elements along the last dimension of a ggml tensor
+ *          using the CANN backend.
+ *
+ * @details This function performs a reduction sum operation along the last
+ *          dimension of the input tensor `src`. The result of the sum is stored
+ *          in the destination tensor `dst`.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the reduced values will be stored。
+ *            dst->op is `GGML_OP_SUM_ROWS`.
+ *
+ * @attention `reduce_dims` defaults to 3, which means the last dimension.
+ */
+void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Upsamples a ggml tensor using nearest neighbor interpolation using
+ *          the CANN backend.
+ *
+ * @details This function performs upsampling of the input tensor `src` using
+ *          nearest neighbor interpolation. The upsampling is applied to the
+ *          height and width dimensions (last two dimensions) of the tensor. The
+ *          result is stored in the destination tensor `dst`, which must have
+ *          the appropriate dimensions for the upsampled output.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the upsampled values will be stored.
+ *            dst->op is `GGML_OP_UPSCALE`.
+ */
+void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
+                                  ggml_tensor* dst);
+
+/**
+ * @brief   Pads a ggml tensor to match the dimensions of the destination tensor
+ *          using the CANN backend.
+ *
+ * @details This function pads the input tensor `src` so that it matches the
+ *          dimensions of the destination tensor `dst`. The amount of padding
+ *          is calculated based on the difference in sizes between `src` and
+ *          `dst` along each dimension. The padded tensor is stored in `dst`.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor, which specifies the target dimensions for
+ *            padding. dst->op is `GGML_OP_PAD`.
+ */
+void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Executes a 2D pooling operation on a ggml tensor using the CANN
+ *          backend.
+ *
+ * @details This function dispatches the execution of a 2D pooling operation on
+ *          the input tensor `dst`. The type of pooling (average or max) is
+ *          determined by the `op` parameter, which is read from the operation
+ *          parameters of `dst`. The function supports average pooling
+ *          (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
+ *          invalid operation is encountered, the function asserts a failure.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor on which the pooling operation is to be
+ *            performed. dst->op is `GGML_OP_POOL_2D`.
+ */
+void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Duplicates a ggml tensor using the CANN backend.
+ *
+ * @details This function duplicates the contents of the source tensor `src` to
+ *          the destination tensor `dst`. The function supports various tensor
+ *          types and configurations, including handling of extra data, type
+ *          conversions, and special cases for contiguous and non-contiguous
+ *          tensors.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the duplicated data will be stored.
+ *            dst->op is `GGML_OP_DUP`
+ *
+ * @attention Only support Fp16/FP32. Not support when src and dst have
+ *            different shape and dst is no-contiguous.
+ * @note:     This func need to simplify.
+ */
+void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the Root Mean Square (RMS) normalization of a ggml tensor
+ *          using the CANN backend.
+ *
+ * @details This function applies RMS normalization to the input tensor `src`
+ *          and stores the result in the destination tensor `dst`. RMS
+ *          normalization involves computing the root mean square of the input
+ *          tensor along a specified dimension and then dividing each element of
+ *          the tensor by this value, adjusted by a small epsilon value to
+ *          prevent division by zero.
+ *          The operation is defined as:
+ *          \f[
+ *               \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
+ *               \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
+ *          \f]
+ *          `eps` is in dst->op_params.
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the normalized values will be stored.
+ *            dst->op is `GGML_OP_RMS_NORM`.
+ */
+void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Applies a diagonal mask to the tensor with a specified value.
+ *
+ * @details This function creates a mask tensor filled with ones, then applies
+ *          an upper triangular and lower triangular operation to it based on
+ *          the number of past elements specified. Afterward, it adds the masked
+ *          tensor to the destination tensor in-place.
+ *
+ * @param ctx The backend CANN context used for operations.
+ * @param dst The destination tensor where the result will be stored. dst->op is
+ *            `GGML_OP_DIAG_MASK`
+ * @param value The value to use for masking.
+ */
+void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
+
+/**
+ * @brief   Performs an image-to-column transformation on the input tensor.
+ *
+ * @details This function takes an input tensor and applies an image-to-column
+ *          operation, converting spatial dimensions into column-like
+ *          structures suitable for convolutional operations. It supports both
+ *          half-precision (F16) and single-precision (F32) floating-point data
+ *          types.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor that stores the result of the operation.
+ *            dst->op is `GGML_OP_IM2COL`.
+ */
+void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes time step embeddings using sine and cosine functions.
+ *
+ * @details This function calculates time step embeddings by applying sine and
+ *          cosine transformations to a given input tensor, which is typically
+ *          used in temporal models like diffusion models or transformers to
+ *          encode time information effectively.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor where the result of the embedding operation
+ *            will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
+ */
+void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+// @see ggml_cann_dup.
+void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Computes the softmax activation with optional masking.
+ *
+ * @details This function computes the softmax activation over the input tensor,
+ *          optionally applying a mask and scaling factor. It supports both FP16
+ *          and FP32 data types and can handle masking by broadcasting the mask
+ *          across rows if necessary.
+ *          The function performs the following steps:
+ *          1. Multiplies the input tensor by a scale factor.
+ *          2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
+ *          3. Broadcasts the mask tensor if its dimensions do not match the
+ *             input tensor's dimensions.
+ *          4. Adds the mask to the scaled input tensor.
+ *          5. Applies the softmax activation function along the specified
+ *             dimension.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor where the result will be stored. dst->op is
+ *            `GGML_OP_SOFTMAX`.
+ */
+void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Extracts specific rows from a tensor based on indices.
+ *
+ * @details This function retrieves rows from a source tensor src0 according to
+ *          the indices provided in another tensor src1 and stores the result in
+ *          a destination tensor (\p dst). It supports different data types
+ *          including F32, F16, Q4_0, and Q8_0.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor where the extracted rows will be stored.
+ *            dst->op is `GGML_OP_GET_ROWS`.
+ */
+void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Executes matrix multiplication for the given tensor.
+ *
+ * @details This function performs matrix multiplication on the source tensors
+ *          associated with the destination tensor. It supports matrix
+ *          multiplication F32, F16, and Q8_0.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor for storing the result of the matrix
+ *            multiplication. dst->op is `GGML_OP_MUL_MAT`.
+ */
+void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
+ *
+ * @details This function implements the RoPE mechanism, which is a method to
+ *          encode positional information into sequence data, particularly
+ *          useful in transformer models. It supports both F32 and F16 data
+ *          types.
+ *
+ * @param ctx The backend CANN context for executing operations.
+ * @param dst The destination tensor where the RoPE-transformed data will be
+ *            stored. dst->op is `GGML_OP_ROPE`.
+ *
+ * @note The function currently does not support cases where the n_dims is less
+ *       than the input tensor's first dimension.
+ * @note The function currently does not support cases where the freq_factors is
+ *       not NULL.
+ * @note The function currently does not support cases where the ext_factor is
+ *       not equal 0.
+ * @note The function currently does not support cases where the freq_scale is
+ *       not equal 1.
+ */
+void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
+                                       aclTensor*, uint64_t*, aclOpExecutor**),
+          aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
+void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
+    ggml_tensor* src0 = dst->src[0];
+    ggml_tensor* src1 = dst->src[1];
+    GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
+
+    aclTensor* acl_src0;
+    aclTensor* acl_src1;
+    aclTensor* acl_dst;
+
+    // Need bcast
+    if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
+        BCAST_SHAPE(src0, src1)
+        acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
+        acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
+        acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
+    } else {
+        acl_src0 = ggml_cann_create_tensor(src0);
+        acl_src1 = ggml_cann_create_tensor(src1);
+        acl_dst = ggml_cann_create_tensor(dst);
+    }
+
+    uint64_t workspaceSize = 0;
+    aclOpExecutor* executor;
+    void* workspaceAddr = nullptr;
+
+    ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
+                               &executor));
+    if (workspaceSize > 0) {
+        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
+        workspaceAddr = workspace_allocator.get();
+    }
+
+    aclrtStream main_stream = ctx.stream();
+    ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
+
+    ACL_CHECK(aclDestroyTensor(acl_src0));
+    ACL_CHECK(aclDestroyTensor(acl_src1));
+    ACL_CHECK(aclDestroyTensor(acl_dst));
+}
+
+// Activation functions template.
+template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
+                                       aclOpExecutor**),
+          aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
+                              const aclrtStream)>
+void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
+    ggml_tensor* src = dst->src[0];
+
+    GGML_ASSERT(src->type == GGML_TYPE_F32);
+    GGML_ASSERT(dst->type == GGML_TYPE_F32);
+
+    aclTensor* acl_src = ggml_cann_create_tensor(src);
+    aclTensor* acl_dst = ggml_cann_create_tensor(dst);
+
+    uint64_t workspaceSize = 0;
+    aclOpExecutor* executor;
+    void* workspaceAddr = nullptr;
+
+    ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
+    if (workspaceSize > 0) {
+        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
+        workspaceAddr = workspace_allocator.get();
+    }
+
+    aclrtStream main_stream = ctx.stream();
+    ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
+
+    ACL_CHECK(aclDestroyTensor(acl_src));
+    ACL_CHECK(aclDestroyTensor(acl_dst));
+}
+
+// Activation functions template for const aclTensors.
+template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
+                                       uint64_t*, aclOpExecutor**),
+          aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
+                              const aclrtStream)>
+void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
+    ggml_tensor* src = dst->src[0];
+
+    GGML_ASSERT(src->type == GGML_TYPE_F32);
+    GGML_ASSERT(dst->type == GGML_TYPE_F32);
+
+    aclTensor* acl_src = ggml_cann_create_tensor(src);
+    aclTensor* acl_dst = ggml_cann_create_tensor(dst);
+
+    uint64_t workspaceSize = 0;
+    aclOpExecutor* executor;
+    void* workspaceAddr = nullptr;
+
+    ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
+    if (workspaceSize > 0) {
+        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
+        workspaceAddr = workspace_allocator.get();
+    }
+
+    aclrtStream main_stream = ctx.stream();
+    ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
+
+    ACL_CHECK(aclDestroyTensor(acl_src));
+    ACL_CHECK(aclDestroyTensor(acl_dst));
+}
+
+#endif  // CANN_ACLNN_OPS
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/common.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/common.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CANN_COMMON_H
+#define CANN_COMMON_H
+
+#include <acl/acl.h>
+
+#include <cstdio>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../include/ggml-cann.h"
+#include "../include/ggml.h"
+
+#define MATRIX_ROW_PADDING 512
+#define GGML_CANN_MAX_STREAMS 8
+
+/**
+ * @brief Handles CANN-related errors by printing an error message and
+ *        terminating the program.
+ * @param stmt The statement that caused the error.
+ * @param func The function in which the error occurred.
+ * @param file The file in which the error occurred.
+ * @param line The line number at which the error occurred.
+ * @param msg The error message.
+ */
+[[noreturn]] void ggml_cann_error(const char* stmt, const char* func,
+                                  const char* file, int line, const char* msg);
+
+/**
+ * @brief Checks the result of a CANN function call and invokes the error
+ *        handler if the call fails.
+ * @param stmt The CANN function call to check.
+ * @param success The success code that indicates the call was successful.
+ * @param error_fn The function to call to retrieve the error message.
+ */
+#define ACL_CHECK_GEN(stmt, success, error_fn)                                \
+    do {                                                                      \
+        int err_code = (stmt);                                                \
+        if (err_code != (success)) {                                          \
+            ggml_cann_error(#stmt, __func__, __FILE__, __LINE__, error_fn()); \
+        }                                                                     \
+    } while (0);
+
+#define ACL_CHECK(stmt) ACL_CHECK_GEN(stmt, 0, aclGetRecentErrMsg)
+
+/**
+ * @brief Contains information about CANN devices.
+ */
+struct ggml_cann_device_info {
+    /**
+     * @brief Number of CANN devices available.
+     */
+    int32_t device_count;
+
+    /**
+     * @brief Information about a single CANN device.
+     */
+    struct cann_device_info {
+        int cc;                 /**< Compute capability.                   */
+        size_t smpb;            /**< Maximum shared memory per block.      */
+        bool vmm;               /**< Virtual memory support.               */
+        size_t vmm_granularity; /**< Granularity of virtual memory.        */
+        size_t total_vram;      /**< Total video RAM available on the device. */
+    };
+
+    cann_device_info devices[GGML_CANN_MAX_DEVICES] =
+        {}; /**< Array of CANN device information. */
+};
+
+const ggml_cann_device_info& ggml_cann_info();
+
+void ggml_cann_set_device(int32_t device);
+int32_t ggml_cann_get_device();
+
+/**
+ * @brief Abstract base class for memory pools used by CANN.
+ */
+struct ggml_cann_pool {
+    /**
+     * @brief Virtual destructor for the memory pool.
+     */
+    virtual ~ggml_cann_pool() = default;
+
+    /**
+     * @brief Allocates memory from the pool.
+     *
+     * @param size         The size of the memory block to allocate.
+     * @param actual_size  Pointer to a variable where the actual allocated size
+     *                     will be stored.
+     * @return             Pointer to the allocated memory block.
+     */
+    virtual void* alloc(size_t size, size_t* actual_size) = 0;
+
+    /**
+     * @brief Frees a previously allocated memory block.
+     *
+     * @param ptr   Pointer to the memory block to free.
+     * @param size  Size of the memory block to free.
+     * @note Note that all CANN opertors are running async. Make sure memory is
+     *       still avaiable before this operator finished.
+     */
+    virtual void free(void* ptr, size_t size) = 0;
+};
+
+/**
+ * @brief RAII wrapper for managing memory allocations from a CANN memory pool.
+ */
+struct ggml_cann_pool_alloc {
+    ggml_cann_pool* pool = nullptr; /**< Pointer to the memory pool. */
+    void* ptr = nullptr;    /**< Pointer to the allocated memory block. */
+    size_t actual_size = 0; /**< Actual size of the allocated memory block. */
+
+    /**
+     * @brief Default constructor.
+     */
+    ggml_cann_pool_alloc() = default;
+
+    /**
+     * @brief Constructor that initializes the memory pool.
+     * @param pool Reference to the memory pool.
+     */
+    explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {}
+
+    /**
+     * @brief Constructor that initializes the memory pool and allocates memory.
+     * @param pool Reference to the memory pool.
+     * @param size Size of the memory block to allocate.
+     */
+    ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) {
+        alloc(size);
+    }
+
+    /**
+     * @brief Destructor that frees the allocated memory block.
+     */
+    ~ggml_cann_pool_alloc() {
+        if (ptr != nullptr) {
+            pool->free(ptr, actual_size);
+        }
+    }
+
+    /**
+     * @brief Allocates memory from the pool.
+     * @param size Size of the memory block to allocate.
+     * @return Pointer to the allocated memory block.
+     */
+    void* alloc(size_t size) {
+        GGML_ASSERT(pool != nullptr);
+        GGML_ASSERT(ptr == nullptr);
+        ptr = pool->alloc(size, &this->actual_size);
+        return ptr;
+    }
+
+    /**
+     * @brief Allocates memory from a specific memory pool.
+     * @param pool Reference to the memory pool.
+     * @param size Size of the memory block to allocate.
+     * @return Pointer to the allocated memory block.
+     */
+    void* alloc(ggml_cann_pool& pool, size_t size) {
+        this->pool = &pool;
+        return alloc(size);
+    }
+
+    /**
+     * @brief Gets the pointer to the allocated memory block.
+     * @return Pointer to the allocated memory block.
+     */
+    void* get() { return ptr; }
+
+    // Deleted copy constructor
+    ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete;
+
+    // Deleted move constructor
+    ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete;
+
+    // Deleted copy assignment operator
+    ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete;
+
+    // Deleted move assignment operator
+    ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
+};
+
+/**
+ * @brief Context for managing CANN backend operations.
+ */
+struct ggml_backend_cann_context {
+    int32_t device;                  /**< Device ID. */
+    std::string name;                /**< Name of the device. */
+    std::string description;         /**< Description of the device. */
+    aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
+
+    aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
+
+    /**
+     * @brief Constructor for initializing the context with a given device.
+     * @param device Device ID.
+     */
+    explicit ggml_backend_cann_context(int device)
+        : device(device), name("CANN" + std::to_string(device)) {
+        ggml_cann_set_device(device);
+        description = aclrtGetSocName();
+    }
+
+    /**
+     * @brief Destructor for cleaning up resources.
+     */
+    ~ggml_backend_cann_context() {
+        ggml_cann_set_device(device);
+        if (copy_event != nullptr) {
+            ACL_CHECK(aclrtDestroyEvent(copy_event));
+        }
+        for (int i = 0; i < GGML_CANN_MAX_STREAMS; ++i) {
+            if (streams[i] != nullptr) {
+                ACL_CHECK(aclrtDestroyStream(streams[i]));
+            }
+        }
+    }
+
+    /**
+     * @brief Get or create a stream for a given index.
+     * @param stream Index of the stream.
+     * @return The stream corresponding to the given index.
+     */
+    aclrtStream stream(int stream) {
+        if (streams[stream] == nullptr) {
+            ggml_cann_set_device(device);
+            ACL_CHECK(aclrtCreateStream(&streams[stream]));
+        }
+        return streams[stream];
+    }
+
+    /**
+     * @brief Get or create the default stream (index 0).
+     * @return The default stream.
+     */
+    aclrtStream stream() { return stream(0); }
+
+    // TODO: each stream should have a memory pool.
+    std::unique_ptr<ggml_cann_pool>
+        mem_pool; /**< Memory pool for the device. */
+
+    /**
+     * @brief Create a new memory pool for a given device.
+     * @param device Device ID.
+     * @return A unique pointer to the new memory pool.
+     */
+    static std::unique_ptr<ggml_cann_pool> new_pool_for_device(int device);
+
+    /**
+     * @brief Get or create the memory pool for the context.
+     * @return Reference to the memory pool.
+     */
+    ggml_cann_pool& pool() {
+        if (mem_pool == nullptr) {
+            mem_pool = new_pool_for_device(device);
+        }
+        return *mem_pool;
+    }
+};
+
+#endif  // CANN_COMMON_H
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
@@ -0,0 +1,30 @@
+file(GLOB SRC_FILES
+    get_row_f32.cpp
+    get_row_f16.cpp
+    get_row_q4_0.cpp
+    get_row_q8_0.cpp
+    quantize_f32_q8_0.cpp
+    quantize_f16_q8_0.cpp
+    quantize_float_to_q4_0.cpp
+    dup.cpp
+)
+
+set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
+set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
+
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+ascendc_library(ascendc_kernels STATIC
+    ${SRC_FILES}
+)
+
+message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
+ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
+# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
@@ -0,0 +1,19 @@
+#ifndef ASCENDC_KERNELS_H
+#define ASCENDC_KERNELS_H
+
+#include "aclrtlaunch_ascendc_get_row_f32.h"
+#include "aclrtlaunch_ascendc_get_row_f16.h"
+#include "aclrtlaunch_ascendc_get_row_q8_0.h"
+#include "aclrtlaunch_ascendc_get_row_q4_0.h"
+
+#include "aclrtlaunch_ascendc_quantize_f32_q8_0.h"
+#include "aclrtlaunch_ascendc_quantize_f16_q8_0.h"
+#include "aclrtlaunch_ascendc_quantize_f16_to_q4_0.h"
+#include "aclrtlaunch_ascendc_quantize_f32_to_q4_0.h"
+
+#include "aclrtlaunch_ascendc_dup_by_rows_fp16.h"
+#include "aclrtlaunch_ascendc_dup_by_rows_fp32.h"
+#include "aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16.h"
+#include "aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32.h"
+
+#endif  // ASCENDC_KERNELS_H
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
@@ -0,0 +1,236 @@
+#include "kernel_operator.h"
+
+#include <cmath>
+
+using namespace AscendC;
+
+#define BUFFER_NUM 2
+const int64_t SUPPORTED_MAX_DIM = 65535;  // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
+
+template <typename SRC_T, typename DST_T>
+class DupByRows {
+   public:
+    __aicore__ inline DupByRows() {}
+    __aicore__ inline void init(GM_ADDR src, GM_ADDR dst, int64_t *input_ne_ub,
+                                size_t *input_nb_ub) {
+        /* Dup by rows when src is contigous on first dimension and dst is
+        contiguous, each kernel process one row.
+        */
+
+        // Input has four dims.
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        // param
+        num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
+        num_elem = input_ne_ub[0];
+
+        // index for (ne[1], ne[2], ne[3]): (idx_ne1, idx_ne2, idx_ne3)
+        idx_ne3 = op_block_idx / (input_ne_ub[1] * input_ne_ub[2]);
+        idx_ne2 = (op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2]))
+                  / (input_ne_ub[1]);
+        idx_ne1 = op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2])
+                - idx_ne2 * input_ne_ub[1];
+
+        // src may not contiguous in dim [1,2,3], so stride decited by ne&nb
+        src_stride = input_nb_ub[3] * idx_ne3 + input_nb_ub[2] * idx_ne2
+                     + input_nb_ub[1] * idx_ne1;
+
+        // dst is contiguous
+        dst_stride = op_block_idx * (input_ne_ub[0] * sizeof(DST_T));
+
+        src_gm.SetGlobalBuffer(reinterpret_cast<__gm__ SRC_T *>(src +
+                                                                src_stride));
+        dst_gm.SetGlobalBuffer(reinterpret_cast<__gm__ DST_T *>(dst +
+                                                                dst_stride));
+
+        pipe.InitBuffer(src_queue, BUFFER_NUM, (sizeof(SRC_T) * num_elem +
+                                                32 - 1) / 32 * 32);
+        pipe.InitBuffer(dst_queue, BUFFER_NUM, (sizeof(DST_T) * num_elem +
+                                                32 - 1) / 32 * 32);
+    }
+
+    __aicore__ inline void copy_in() {
+        LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
+        const size_t elem_per_block = 32 / sizeof(SRC_T);
+        size_t tail = num_elem % elem_per_block;
+        size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
+        DataCopy(src_local, src_gm, cpy_elements_len);
+        src_queue.EnQue(src_local);
+    }
+
+    __aicore__ inline void copy_out() {
+        LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
+#ifdef ASCEND_310P
+        const size_t elem_per_block = 32 / sizeof(DST_T);
+        size_t tail = num_elem % elem_per_block;
+        size_t len = num_elem & ~(elem_per_block - 1);
+        if (len > 0) {
+            DataCopy(dst_gm, dst_local, len);
+        }
+        if(tail != 0) {
+            for (size_t i = tail; i < elem_per_block; i++) {
+                dst_local[len + i].SetValue(0, 0);
+            }
+            SetAtomicAdd<float>();
+            DataCopy(dst_gm[len], dst_local[len], elem_per_block);
+            SetAtomicNone();
+        }
+#else
+        DataCopyExtParams dataCopyParams;
+        dataCopyParams.blockCount = 1;
+        dataCopyParams.blockLen = num_elem * sizeof(DST_T);
+        DataCopyPad(dst_gm, dst_local, dataCopyParams);
+#endif
+        dst_queue.FreeTensor(dst_local);
+    }
+
+    __aicore__ inline void dup() {
+        // main process, copy one row data from src to dst.
+        copy_in();
+
+        LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
+        LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
+
+        int32_t BLOCK_NUM = 32 / sizeof(DST_T);
+        DataCopy(dst_local, src_local, (num_elem + BLOCK_NUM - 1)
+                                        / BLOCK_NUM * BLOCK_NUM);
+        dst_queue.EnQue<DST_T>(dst_local);
+
+        src_queue.FreeTensor(src_local);
+        copy_out();
+    }
+
+    __aicore__ inline void dup_with_cast() {
+        // main process, copy one row data from src to dst.
+        // cast dtype from src to dst.
+        copy_in();
+
+        LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
+        LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
+
+        Cast(dst_local, src_local, RoundMode::CAST_NONE, num_elem);
+        dst_queue.EnQue<DST_T>(dst_local);
+
+        src_queue.FreeTensor(src_local);
+        copy_out();
+    }
+
+   private:
+
+    TPipe pipe;
+    GlobalTensor<SRC_T> src_gm;
+    GlobalTensor<DST_T> dst_gm;
+
+    int64_t num_rows;
+    int64_t num_elem;
+    int64_t idx_ne3;
+    int64_t idx_ne2;
+    int64_t idx_ne1;
+    int64_t src_stride;
+    int64_t dst_stride;
+
+    TQue<QuePosition::VECIN, BUFFER_NUM> src_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> dst_queue;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16(
+                                                        GM_ADDR src_gm,
+                                                        GM_ADDR dst_gm,
+                                                        GM_ADDR input_ne_gm,
+                                                        GM_ADDR input_nb_gm,
+                                                        GM_ADDR output_ne_gm,
+                                                        GM_ADDR output_nb_gm) {
+
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    DupByRows<half, half> op;
+    op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
+    op.dup();
+}
+
+extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32(
+                                                        GM_ADDR src_gm,
+                                                        GM_ADDR dst_gm,
+                                                        GM_ADDR input_ne_gm,
+                                                        GM_ADDR input_nb_gm,
+                                                        GM_ADDR output_ne_gm,
+                                                        GM_ADDR output_nb_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    DupByRows<float_t, float_t> op;
+    op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
+    op.dup();
+}
+
+extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16(
+                                                        GM_ADDR src_gm,
+                                                        GM_ADDR dst_gm,
+                                                        GM_ADDR input_ne_gm,
+                                                        GM_ADDR input_nb_gm,
+                                                        GM_ADDR output_ne_gm,
+                                                        GM_ADDR output_nb_gm) {
+
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    DupByRows<float_t, half> op;
+    op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
+    op.dup_with_cast();
+}
+
+extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32(
+                                                        GM_ADDR src_gm,
+                                                        GM_ADDR dst_gm,
+                                                        GM_ADDR input_ne_gm,
+                                                        GM_ADDR input_nb_gm,
+                                                        GM_ADDR output_ne_gm,
+                                                        GM_ADDR output_nb_gm) {
+
+    // copy params from gm to ub.
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    DupByRows<half, float_t> op;
+    op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
+    op.dup_with_cast();
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
@@ -0,0 +1,197 @@
+#include "kernel_operator.h"
+
+// optimize me. Use template to avoid copy code.
+using namespace AscendC;
+
+#define BUFFER_NUM 2
+
+class GET_ROW_F16 {
+   public:
+    __aicore__ inline GET_ROW_F16() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
+                                int64_t *input_ne_ub, size_t *input_nb_ub,
+                                int64_t *indices_ne_ub, size_t *indices_nb_ub,
+                                int64_t *output_ne_ub, size_t *output_nb_ub) {
+        // TODO, use template for F16/f32
+        int64_t op_block_num = GetBlockNum();
+        op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
+
+            indices_ne[i] = indices_ne_ub[i];
+            indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
+
+            output_ne[i] = output_ne_ub[i];
+            output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
+        }
+
+        // Indices has two dims. n_elements = all rows should get.
+        // dr, all rows should this thread get.
+        uint64_t n_elements =
+            indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
+        dr = n_elements / op_block_num;
+
+        uint64_t tails = n_elements % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        input_gm.SetGlobalBuffer((__gm__ half *)input);
+        indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
+        output_gm.SetGlobalBuffer((__gm__ float *)output);
+
+        uint64_t input_local_buffer_size = ((input_ne[0] * sizeof(half) + 31)
+                                             & ~31);
+        uint64_t output_local_buffer_size = ((input_ne[0] * sizeof(float) + 31)
+                                              & ~31);
+
+        local_buffer_elems = input_local_buffer_size / sizeof(half);
+
+        // TODO, consider long row that can't put in UB.
+        // All data should asign to 32. It's ok because all data is align to 32.
+        pipe.InitBuffer(input_queue, BUFFER_NUM, input_local_buffer_size);
+        pipe.InitBuffer(output_queue, BUFFER_NUM, output_local_buffer_size);
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset, size_t len) {
+        size_t origin_len = len;
+        LocalTensor<half> input_local = input_queue.AllocTensor<half>();
+        const size_t elem_per_block = 32 / sizeof(half);
+        size_t tail = len % elem_per_block;
+        len = len & ~(elem_per_block - 1);
+        if(tail != 0) {
+            len += elem_per_block;
+        }
+        DataCopy(input_local, input_gm[offset], len);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset, size_t len) {
+        LocalTensor<float> output_local = output_queue.DeQue<float>();
+        const size_t elem_per_block = 32 / sizeof(float);
+        size_t tail = len % elem_per_block;
+        len = len & ~(elem_per_block - 1);
+        if (len > 0) {
+            DataCopy(output_gm[offset], output_local, len);
+        }
+
+        if(tail != 0) {
+#ifdef ASCEND_310P
+            for (size_t i = tail; i < elem_per_block; i++) {
+                output_local[len + i].SetValue(0, 0);
+            }
+            SetAtomicAdd<float>();
+            DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
+            SetAtomicNone();
+#else
+            DataCopyExtParams dataCopyParams;
+            dataCopyParams.blockCount = 1;
+            dataCopyParams.blockLen = tail * sizeof(float);
+            DataCopyPad(output_gm[offset + len], output_local[len],
+                        dataCopyParams);
+#endif
+        }
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline void calculate_row(int64_t idx) {
+        const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
+        const int64_t indices_ne1_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
+            indices_ne[0];
+        const int64_t indices_ne0_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
+             indices_ne1_idx * indices_ne[0]);
+
+        const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
+                                       indices_ne1_idx * indices_stride[1] +
+                                       indices_ne2_idx * indices_stride[2];
+        const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
+
+        const int64_t input_offset = selected_row_idx * input_stride[1] +
+                                     indices_ne1_idx * input_stride[2] +
+                                     indices_ne2_idx * input_stride[3];
+
+        const int64_t output_offset = indices_ne0_idx * output_stride[1] +
+                                      indices_ne1_idx * output_stride[2] +
+                                      indices_ne2_idx * output_stride[3];
+
+        copy_in(input_offset, input_ne[0]);
+        LocalTensor<half> input_local = input_queue.DeQue<half>();
+        LocalTensor<float> output_local = output_queue.AllocTensor<float>();
+
+        Cast(output_local, input_local, RoundMode::CAST_NONE,
+             local_buffer_elems);
+        output_queue.EnQue(output_local);
+        copy_out(output_offset, input_ne[0]);
+
+        input_queue.FreeTensor(input_local);
+    }
+
+    __aicore__ inline void calculate() {
+        for (int64_t i = ir; i < ir + dr; i++) {
+            calculate_row(i);
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t indices_ne[4];
+    size_t indices_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    size_t local_buffer_elems;
+
+    int64_t ir;
+    int64_t dr;
+
+    TPipe pipe;
+    GlobalTensor<half> input_gm;
+    GlobalTensor<int32_t> indices_gm;
+    GlobalTensor<float> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    int64_t op_block_idx;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_get_row_f16(
+    GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
+    GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
+    GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t indices_ne_ub[4];
+    size_t indices_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
+    copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    GET_ROW_F16 op;
+    op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
+            indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
+    op.calculate();
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
@@ -0,0 +1,190 @@
+#include "kernel_operator.h"
+
+// optimize me. Use template to avoid copy code.
+using namespace AscendC;
+
+#define BUFFER_NUM 2
+
+class GET_ROW_F32 {
+   public:
+    __aicore__ inline GET_ROW_F32() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
+                                int64_t *input_ne_ub, size_t *input_nb_ub,
+                                int64_t *indices_ne_ub, size_t *indices_nb_ub,
+                                int64_t *output_ne_ub, size_t *output_nb_ub) {
+        int64_t op_block_num = GetBlockNum();
+        op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
+
+            indices_ne[i] = indices_ne_ub[i];
+            indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
+
+            output_ne[i] = output_ne_ub[i];
+            output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
+        }
+
+        // Indices has two dims. n_elements = all rows should get.
+        // dr, all rows should this thread get.
+        uint64_t n_elements =
+            indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
+        dr = n_elements / op_block_num;
+
+        uint64_t tails = n_elements % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        input_gm.SetGlobalBuffer((__gm__ float *)input);
+        indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
+        output_gm.SetGlobalBuffer((__gm__ float *)output);
+
+        uint64_t local_buffer_size = ((input_ne[0] * sizeof(float) + 31) & ~31);
+        local_buffer_elems = local_buffer_size / sizeof(float);
+
+        // TODO, consider long row that can't put in UB.
+        // All data should asign to 32. It's ok because all data is align to 32.
+        pipe.InitBuffer(input_queue, BUFFER_NUM, local_buffer_size);
+        pipe.InitBuffer(output_queue, BUFFER_NUM, local_buffer_size);
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset, size_t len) {
+        LocalTensor<float> input_local = input_queue.AllocTensor<float>();
+        const size_t elem_per_block = 32 / sizeof(float);
+        size_t tail = len % elem_per_block;
+        len = len & ~(elem_per_block - 1);
+        if(tail != 0) {
+            len += elem_per_block;
+        }
+        DataCopy(input_local, input_gm[offset], len);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset, size_t len) {
+        LocalTensor<float> output_local = output_queue.DeQue<float>();
+        const size_t elem_per_block = 32 / sizeof(float);
+        size_t tail = len % elem_per_block;
+        len = len & ~(elem_per_block - 1);
+        if (len > 0) {
+            DataCopy(output_gm[offset], output_local, len);
+        }
+
+        if(tail != 0) {
+#ifdef ASCEND_310P
+            for (size_t i = tail; i < elem_per_block; i++) {
+                output_local[len + i].SetValue(0, 0);
+            }
+            SetAtomicAdd<float>();
+            DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
+            SetAtomicNone();
+#else
+            DataCopyExtParams dataCopyParams;
+            dataCopyParams.blockCount = 1;
+            dataCopyParams.blockLen = tail * sizeof(float);
+            DataCopyPad(output_gm[offset + len], output_local[len],
+                        dataCopyParams);
+#endif
+        }
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline void calculate_row(int64_t idx) {
+        const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
+        const int64_t indices_ne1_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
+            indices_ne[0];
+        const int64_t indices_ne0_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
+             indices_ne1_idx * indices_ne[0]);
+
+        const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
+                                       indices_ne1_idx * indices_stride[1] +
+                                       indices_ne2_idx * indices_stride[2];
+        const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
+
+        const int64_t input_offset = selected_row_idx * input_stride[1] +
+                                     indices_ne1_idx * input_stride[2] +
+                                     indices_ne2_idx * input_stride[3];
+
+        const int64_t output_offset = indices_ne0_idx * output_stride[1] +
+                                      indices_ne1_idx * output_stride[2] +
+                                      indices_ne2_idx * output_stride[3];
+
+        copy_in(input_offset, input_ne[0]);
+        LocalTensor<float> input_local = input_queue.DeQue<float>();
+        LocalTensor<float> output_local = output_queue.AllocTensor<float>();
+
+        DataCopy(output_local, input_local, local_buffer_elems);
+        output_queue.EnQue(output_local);
+        copy_out(output_offset, input_ne[0]);
+
+        input_queue.FreeTensor(input_local);
+    }
+
+    __aicore__ inline void calculate() {
+        for (int64_t i = ir; i < ir + dr; i++) {
+            calculate_row(i);
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t indices_ne[4];
+    size_t indices_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    size_t local_buffer_elems;
+
+    int64_t ir;
+    int64_t dr;
+
+    TPipe pipe;
+    GlobalTensor<float> input_gm;
+    GlobalTensor<int32_t> indices_gm;
+    GlobalTensor<float> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    int64_t op_block_idx;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_get_row_f32(
+    GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
+    GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
+    GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t indices_ne_ub[4];
+    size_t indices_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
+    copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    GET_ROW_F32 op;
+    op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
+            indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
+    op.calculate();
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
@@ -0,0 +1,204 @@
+#include "kernel_operator.h"
+
+// optimize me. Use template to avoid copy code.
+using namespace AscendC;
+#ifdef ASCEND_310P // 310P not support 4bit get row
+    extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
+        GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
+        GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
+        GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
+        // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
+        printf("Ascend310P not support 4bit get row.\n");
+    }
+#else
+
+#define BUFFER_NUM 2
+
+#define QK4_0 32
+
+class GET_ROW_Q4_0 {
+   public:
+    __aicore__ inline GET_ROW_Q4_0() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
+                                int64_t *input_ne_ub, int64_t *indices_ne_ub,
+                                size_t *indices_nb_ub, int64_t *output_ne_ub,
+                                size_t *output_nb_ub) {
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            indices_ne[i] = indices_ne_ub[i];
+            indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
+            scale_ne[i] = input_ne_ub[i];
+            output_ne[i] = output_ne_ub[i];
+            output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
+        }
+
+        // one scale for a group.
+        scale_ne[0] /= QK4_0;
+
+        input_stride[0] = 1;
+        scale_stride[0] = 1;
+        output_stride[0] = 1;
+        for (int i = 1; i < 4; i++) {
+            input_stride[i] = input_stride[i - 1] * input_ne[i - 1];
+            scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
+        }
+
+        group_size_in_row = input_ne[0] / QK4_0;
+        int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] *
+                               input_ne[3] / 2;
+
+        // Indices has two dims. n_elements = all rows should get.
+        // dr, all rows should this thread get.
+        uint64_t n_elements =
+            indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
+        dr = n_elements / op_block_num;
+
+        uint64_t tails = n_elements % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        input_gm.SetGlobalBuffer((__gm__ int4b_t *)input);
+        scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset));
+        indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
+        output_gm.SetGlobalBuffer((__gm__ float *)output);
+
+        pipe.InitBuffer(input_queue, BUFFER_NUM, QK4_0 * sizeof(int4b_t));
+        pipe.InitBuffer(cast_queue, BUFFER_NUM, QK4_0 * sizeof(half));
+        pipe.InitBuffer(output_queue, BUFFER_NUM, QK4_0 * sizeof(float));
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset) {
+        LocalTensor<int4b_t> input_local = input_queue.AllocTensor<int4b_t>();
+        // 32 * sizeof(int4b_t) = 16, which is not aligned to 32, why no error?
+        DataCopy(input_local, input_gm[offset], QK4_0);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset) {
+        LocalTensor<float> output_local = output_queue.DeQue<float>();
+        DataCopy(output_gm[offset], output_local, QK4_0);
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline void calculate_group(int64_t idx, int64_t group) {
+        const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
+        const int64_t indices_ne1_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
+            indices_ne[0];
+        const int64_t indices_ne0_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
+             indices_ne1_idx * indices_ne[0]);
+
+        const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
+                                       indices_ne1_idx * indices_stride[1] +
+                                       indices_ne2_idx * indices_stride[2];
+        const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
+
+        const int64_t input_offset = selected_row_idx * input_stride[1] +
+                                     indices_ne1_idx * input_stride[2] +
+                                     indices_ne2_idx * input_stride[3] +
+                                     group * QK4_0;
+        const int64_t scale_offset = selected_row_idx * scale_stride[1] +
+                                     indices_ne1_idx * scale_stride[2] +
+                                     indices_ne2_idx * scale_stride[3] + group;
+        const int64_t output_offset = indices_ne0_idx * output_stride[1] +
+                                      indices_ne1_idx * output_stride[2] +
+                                      indices_ne2_idx * output_stride[3] +
+                                      group * QK4_0;
+
+        copy_in(input_offset);
+        LocalTensor<int4b_t> input_local = input_queue.DeQue<int4b_t>();
+        LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
+        LocalTensor<float> output_local = output_queue.AllocTensor<float>();
+
+        // TODO: cast more data to speed up.
+        Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
+        Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
+
+        // Only mul need compile by group.
+        half scale = scale_gm.GetValue(scale_offset);
+
+        Muls(output_local, output_local, (float)scale, QK4_0);
+
+        input_queue.FreeTensor(input_local);
+        cast_queue.FreeTensor(cast_local);
+        output_queue.EnQue(output_local);
+
+        copy_out(output_offset);
+    }
+
+    __aicore__ inline void calculate() {
+        for (int64_t i = ir; i < ir + dr; i++) {
+            for (int64_t j = 0; j < group_size_in_row; j++) {
+                calculate_group(i, j);
+            }
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t scale_ne[4];
+    size_t scale_stride[4];
+
+    int64_t indices_ne[4];
+    size_t indices_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    int64_t ir;
+    int64_t dr;
+
+    int64_t group_size_in_row;
+
+    TPipe pipe;
+    GlobalTensor<int4b_t> input_gm;
+    GlobalTensor<half> scale_gm;
+    GlobalTensor<int32_t> indices_gm;
+    GlobalTensor<float> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
+    GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
+    GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
+    GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
+    int64_t input_ne_ub[4];
+    int64_t indices_ne_ub[4];
+    size_t indices_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
+    copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    GET_ROW_Q4_0 op;
+    op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub,
+            indices_nb_ub, output_ne_ub, output_nb_ub);
+    op.calculate();
+}
+
+#endif // #ifdef ASCEND_310P
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
@@ -0,0 +1,191 @@
+#include "kernel_operator.h"
+
+// optimize me. Use template to avoid copy code.
+using namespace AscendC;
+
+#define BUFFER_NUM 2
+
+#define QK8_0 32
+
+class GET_ROW_Q8_0 {
+   public:
+    __aicore__ inline GET_ROW_Q8_0() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
+                                int64_t *input_ne_ub, int64_t *indices_ne_ub,
+                                size_t *indices_nb_ub, int64_t *output_ne_ub,
+                                size_t *output_nb_ub) {
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            indices_ne[i] = indices_ne_ub[i];
+            indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
+            scale_ne[i] = input_ne_ub[i];
+            output_ne[i] = output_ne_ub[i];
+            output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
+        }
+
+        // one scale for a group.
+        scale_ne[0] /= QK8_0;
+
+        input_stride[0] = 1;
+        scale_stride[0] = 1;
+        output_stride[0] = 1;
+        for (int i = 1; i < 4; i++) {
+            input_stride[i] = input_stride[i - 1] * input_ne[i - 1];
+            scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
+        }
+
+        group_size_in_row = input_ne[0] / QK8_0;
+        int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] *
+                               input_ne[3] * sizeof(int8_t);
+
+        // Indices has two dims. n_elements = all rows should get.
+        // dr, all rows should this thread get.
+        uint64_t n_elements =
+            indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
+        dr = n_elements / op_block_num;
+
+        uint64_t tails = n_elements % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        input_gm.SetGlobalBuffer((__gm__ int8_t *)input);
+        scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset));
+        indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
+        output_gm.SetGlobalBuffer((__gm__ float *)output);
+
+        pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
+        pipe.InitBuffer(cast_queue, BUFFER_NUM, QK8_0 * sizeof(half));
+        pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(float));
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset) {
+        LocalTensor<int8_t> input_local = input_queue.AllocTensor<int8_t>();
+        DataCopy(input_local, input_gm[offset], QK8_0);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset) {
+        LocalTensor<float> output_local = output_queue.DeQue<float>();
+        DataCopy(output_gm[offset], output_local, QK8_0);
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline void calculate_group(int64_t idx, int64_t group) {
+        const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
+        const int64_t indices_ne1_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
+            indices_ne[0];
+        const int64_t indices_ne0_idx =
+            (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
+             indices_ne1_idx * indices_ne[0]);
+
+        const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
+                                       indices_ne1_idx * indices_stride[1] +
+                                       indices_ne2_idx * indices_stride[2];
+        const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
+
+        const int64_t input_offset = selected_row_idx * input_stride[1] +
+                                     indices_ne1_idx * input_stride[2] +
+                                     indices_ne2_idx * input_stride[3] +
+                                     group * QK8_0;
+        const int64_t scale_offset = selected_row_idx * scale_stride[1] +
+                                     indices_ne1_idx * scale_stride[2] +
+                                     indices_ne2_idx * scale_stride[3] + group;
+        const int64_t output_offset = indices_ne0_idx * output_stride[1] +
+                                      indices_ne1_idx * output_stride[2] +
+                                      indices_ne2_idx * output_stride[3] +
+                                      group * QK8_0;
+
+        copy_in(input_offset);
+        LocalTensor<int8_t> input_local = input_queue.DeQue<int8_t>();
+        LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
+        LocalTensor<float> output_local = output_queue.AllocTensor<float>();
+
+        // TODO: cast more data to speed up.
+        Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0);
+        Cast(output_local, cast_local, RoundMode::CAST_NONE, QK8_0);
+
+        // Only mul need compile by group.
+        half scale = scale_gm.GetValue(scale_offset);
+        Muls(output_local, output_local, (float)scale, QK8_0);
+
+        input_queue.FreeTensor(input_local);
+        cast_queue.FreeTensor(cast_local);
+        output_queue.EnQue(output_local);
+
+        copy_out(output_offset);
+    }
+
+    __aicore__ inline void calculate() {
+        for (int64_t i = ir; i < ir + dr; i++) {
+            for (int64_t j = 0; j < group_size_in_row; j++) {
+                calculate_group(i, j);
+            }
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t scale_ne[4];
+    size_t scale_stride[4];
+
+    int64_t indices_ne[4];
+    size_t indices_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    int64_t ir;
+    int64_t dr;
+
+    int64_t group_size_in_row;
+
+    TPipe pipe;
+    GlobalTensor<int8_t> input_gm;
+    GlobalTensor<half> scale_gm;
+    GlobalTensor<int32_t> indices_gm;
+    GlobalTensor<float> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_get_row_q8_0(
+    GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
+    GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
+    GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
+    int64_t input_ne_ub[4];
+    int64_t indices_ne_ub[4];
+    size_t indices_nb_ub[4];
+    int64_t output_ne_ub[4];
+    size_t output_nb_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
+    copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+    copy_to_ub(output_nb_gm, output_nb_ub, 32);
+
+    GET_ROW_Q8_0 op;
+    op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub,
+            indices_nb_ub, output_ne_ub, output_nb_ub);
+    op.calculate();
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
@@ -0,0 +1,218 @@
+#include "kernel_operator.h"
+
+using namespace AscendC;
+#ifdef ASCEND_310P
+    extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
+        GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+        GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+        // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
+        printf("Ascend310P not support f16->8bit quantization.\n");
+    }
+#else
+
+#define BUFFER_NUM 2
+#define QK8_0 32
+
+class QUANTIZE_F16_Q8_0 {
+   public:
+    __aicore__ inline QUANTIZE_F16_Q8_0() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR output,
+                                int64_t *input_ne_ub, size_t *input_nb_ub,
+                                int64_t *output_ne_ub) {
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
+
+            output_ne[i] = output_ne_ub[i];
+        }
+
+        output_stride[0] = 1;
+        for (int i = 1; i < 4; i++) {
+            output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
+        }
+
+        scale_ne = input_ne;
+        scale_stride[0] = 1;
+        scale_stride[1] = input_ne[0] / QK8_0;
+        for (int i = 2; i < 4; i++) {
+            scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
+        }
+
+        // split input tensor by rows.
+        uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
+        dr = nr / op_block_num;
+
+        uint64_t tails = nr % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        group_size_in_row = scale_stride[1];
+        int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] *
+                              output_ne[3] * sizeof(uint8_t);
+
+        input_gm.SetGlobalBuffer((__gm__ half *)input);
+        output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
+        scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size + ir *
+                                                 group_size_in_row *
+                                                 sizeof(half)));
+
+        pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(half));
+        pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
+        pipe.InitBuffer(work_queue, 1, 32);
+        pipe.InitBuffer(max_queue, 1, 32);
+        pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float));
+        pipe.InitBuffer(scale_queue, 1, 32);
+        pipe.InitBuffer(cast_queue ,1 ,QK8_0 * sizeof(float));
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset) {
+        LocalTensor<half> input_local = input_queue.AllocTensor<half>();
+        DataCopy(input_local, input_gm[offset], QK8_0);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset) {
+        LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>();
+        DataCopy(output_gm[offset], output_local, QK8_0);
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline half calculate_group(int64_t row, int64_t group) {
+        const int64_t i3 = row / (input_ne[1] * input_ne[2]);
+        const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
+        const int64_t i1 =
+            row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
+
+        const int64_t input_offset = i1 * input_stride[1] +
+                                     i2 * input_stride[2] +
+                                     i3 * input_stride[3] + QK8_0 * group;
+
+        const int64_t output_offset = i1 * output_stride[1] +
+                                      i2 * output_stride[2] +
+                                      i3 * output_stride[3] + QK8_0 * group;
+
+        copy_in(input_offset);
+        LocalTensor<half> input_local = input_queue.DeQue<half>();
+        LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>();
+        LocalTensor<float> work_local = work_queue.AllocTensor<float>();
+        LocalTensor<float> abs_local = abs_queue.AllocTensor<float>();
+        LocalTensor<float> max_local = max_queue.AllocTensor<float>();
+        LocalTensor<float> cast_local = cast_queue.AllocTensor<float>();
+
+        Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0);
+        Abs(abs_local, cast_local, QK8_0);
+        ReduceMax(max_local, abs_local, work_local, QK8_0);
+
+        pipe_barrier(PIPE_ALL);
+        float d = max_local.GetValue(0);
+        d = d / ((1 << 7) - 1);
+        if (d != 0) {
+            Muls(cast_local, cast_local, 1.0f / d, QK8_0);
+        }
+
+        Cast(cast_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
+        Cast(input_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
+        Cast(output_local, input_local, RoundMode::CAST_ROUND, QK8_0);
+        output_queue.EnQue(output_local);
+        copy_out(output_offset);
+
+        input_queue.FreeTensor(input_local);
+        work_queue.FreeTensor(work_local);
+        abs_queue.FreeTensor(abs_local);
+        max_queue.FreeTensor(max_local);
+        cast_queue.FreeTensor(cast_local);
+        return (half)d;
+    }
+
+    __aicore__ inline void calculate() {
+        LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
+        uint32_t scale_local_offset = 0;
+        uint32_t scale_global_offset = 0;
+        for (int64_t i = ir; i < ir + dr; i++) {
+            for (int64_t j = 0; j < group_size_in_row; j++) {
+                half scale = calculate_group(i, j);
+                scale_local.SetValue(scale_local_offset++, scale);
+                if (scale_local_offset == 16) {
+                    scale_local_offset = 0;
+                    // TODO: OPTIMIZE ME
+                    pipe_barrier(PIPE_ALL);
+                    DataCopy(scale_gm[scale_global_offset], scale_local, 16);
+                    pipe_barrier(PIPE_ALL);
+                    scale_global_offset += 16;
+                }
+            }
+        }
+
+        if (scale_local_offset != 0) {
+            pipe_barrier(PIPE_ALL);
+            DataCopyExtParams dataCopyParams;
+            dataCopyParams.blockCount = 1;
+            dataCopyParams.blockLen = scale_local_offset * sizeof(half);
+            DataCopyPad(scale_gm[scale_global_offset], scale_local,
+                        dataCopyParams);
+            pipe_barrier(PIPE_ALL);
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t *scale_ne;
+    size_t scale_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    int64_t group_size_in_row;
+
+    int64_t ir;
+    int64_t dr;
+
+    TPipe pipe;
+    GlobalTensor<half> input_gm;
+    GlobalTensor<half> scale_gm;
+    GlobalTensor<int8_t> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    TQue<QuePosition::VECIN, 1> work_queue;
+    TQue<QuePosition::VECOUT, 1> max_queue;
+    TQue<QuePosition::VECIN, 1> abs_queue;
+    TQue<QuePosition::VECOUT, 1> scale_queue;
+    TQue<QuePosition::VECOUT, 1> cast_queue;
+
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
+    GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+    GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+
+    QUANTIZE_F16_Q8_0 op;
+    op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
+    op.calculate();
+}
+
+#endif // #ifdef ASCEND_310P
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
@@ -0,0 +1,216 @@
+#include "kernel_operator.h"
+
+using namespace AscendC;
+#ifdef ASCEND_310P // 310P not support f32->8bit quantization
+    extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
+        GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+        GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+        // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
+        printf("Ascend310P not support f32->8bit quantization.\n");
+    }
+#else
+
+#define BUFFER_NUM 2
+#define QK8_0 32
+
+class QUANTIZE_F32_Q8_0 {
+   public:
+    __aicore__ inline QUANTIZE_F32_Q8_0() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR output,
+                                int64_t *input_ne_ub, size_t *input_nb_ub,
+                                int64_t *output_ne_ub) {
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
+
+            output_ne[i] = output_ne_ub[i];
+        }
+
+        output_stride[0] = 1;
+        for (int i = 1; i < 4; i++) {
+            output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
+        }
+
+        scale_ne = input_ne;
+        scale_stride[0] = 1;
+        scale_stride[1] = input_ne[0] / QK8_0;
+        for (int i = 2; i < 4; i++) {
+            scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
+        }
+
+        // split input tensor by rows.
+        uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
+        dr = nr / op_block_num;
+
+        uint64_t tails = nr % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        group_size_in_row = scale_stride[1];
+        int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] *
+                              output_ne[3] * sizeof(uint8_t);
+
+        input_gm.SetGlobalBuffer((__gm__ float *)input);
+        output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
+        scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size +
+                                                 ir * group_size_in_row *
+                                                 sizeof(half)));
+
+        pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(float));
+        pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
+        pipe.InitBuffer(work_queue, 1, 32);
+        pipe.InitBuffer(max_queue, 1, 32);
+        pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float));
+        pipe.InitBuffer(cast_queue, 1, QK8_0 * sizeof(half));
+        pipe.InitBuffer(scale_queue, 1, 32);
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset) {
+        LocalTensor<float> input_local = input_queue.AllocTensor<float>();
+        DataCopy(input_local, input_gm[offset], QK8_0);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset) {
+        LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>();
+        DataCopy(output_gm[offset], output_local, QK8_0);
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline half calculate_group(int64_t row, int64_t group) {
+        const int64_t i3 = row / (input_ne[1] * input_ne[2]);
+        const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
+        const int64_t i1 =
+            row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
+
+        const int64_t input_offset = i1 * input_stride[1] +
+                                     i2 * input_stride[2] +
+                                     i3 * input_stride[3] + QK8_0 * group;
+
+        const int64_t output_offset = i1 * output_stride[1] +
+                                      i2 * output_stride[2] +
+                                      i3 * output_stride[3] + QK8_0 * group;
+
+        copy_in(input_offset);
+        LocalTensor<float> input_local = input_queue.DeQue<float>();
+        LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>();
+        LocalTensor<float> work_local = work_queue.AllocTensor<float>();
+        LocalTensor<float> abs_local = abs_queue.AllocTensor<float>();
+        LocalTensor<float> max_local = max_queue.AllocTensor<float>();
+        LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
+
+        Abs(abs_local, input_local, QK8_0);
+        ReduceMax(max_local, abs_local, work_local, QK8_0);
+        pipe_barrier(PIPE_ALL);
+        float d = max_local.GetValue(0);
+        d = d / ((1 << 7) - 1);
+        if (d != 0) {
+            Muls(input_local, input_local, 1.0f / d, QK8_0);
+        }
+
+        Cast(input_local, input_local, RoundMode::CAST_ROUND, QK8_0);
+        Cast(cast_local, input_local, RoundMode::CAST_ROUND, QK8_0);
+        Cast(output_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
+        output_queue.EnQue(output_local);
+        copy_out(output_offset);
+
+        input_queue.FreeTensor(input_local);
+        work_queue.FreeTensor(work_local);
+        abs_queue.FreeTensor(abs_local);
+        max_queue.FreeTensor(max_local);
+        cast_queue.FreeTensor(cast_local);
+
+        return (half)d;
+    }
+
+    __aicore__ inline void calculate() {
+        LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
+        uint32_t scale_local_offset = 0;
+        uint32_t scale_global_offset = 0;
+        for (int64_t i = ir; i < ir + dr; i++) {
+            for (int64_t j = 0; j < group_size_in_row; j++) {
+                half scale = calculate_group(i, j);
+                scale_local.SetValue(scale_local_offset++, scale);
+                if (scale_local_offset == 16) {
+                    scale_local_offset = 0;
+                    // TODO: OPTIMIZE ME
+                    pipe_barrier(PIPE_ALL);
+                    DataCopy(scale_gm[scale_global_offset], scale_local, 16);
+                    pipe_barrier(PIPE_ALL);
+                    scale_global_offset += 16;
+                }
+            }
+        }
+
+        if (scale_local_offset != 0) {
+            pipe_barrier(PIPE_ALL);
+            DataCopyExtParams dataCopyParams;
+            dataCopyParams.blockCount = 1;
+            dataCopyParams.blockLen = scale_local_offset * sizeof(half);
+            DataCopyPad(scale_gm[scale_global_offset], scale_local,
+                        dataCopyParams);
+            pipe_barrier(PIPE_ALL);
+        }
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t *scale_ne;
+    size_t scale_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    int64_t group_size_in_row;
+
+    int64_t ir;
+    int64_t dr;
+
+    TPipe pipe;
+    GlobalTensor<float> input_gm;
+    GlobalTensor<half> scale_gm;
+    GlobalTensor<int8_t> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    TQue<QuePosition::VECIN, 1> work_queue;
+    TQue<QuePosition::VECOUT, 1> max_queue;
+    TQue<QuePosition::VECIN, 1> abs_queue;
+    TQue<QuePosition::VECIN, 1> cast_queue;
+    TQue<QuePosition::VECOUT, 1> scale_queue;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
+    GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+    GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+
+    QUANTIZE_F32_Q8_0 op;
+    op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
+    op.calculate();
+}
+
+#endif // #ifdef ASCEND_310P
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
@@ -0,0 +1,295 @@
+#include "kernel_operator.h"
+
+using namespace AscendC;
+#ifdef ASCEND_310P // 310P not support float->4bit quantization
+    extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
+        GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+        GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+        // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
+        printf("Ascend310P not support f32->4bit quantization.\n");
+    }
+
+    extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
+        GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+        GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+        // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
+        printf("Ascend310P not support f16->4bit quantization.\n");
+    }
+#else
+
+#define BUFFER_NUM 2
+#define Group_Size 32
+
+template <typename SRC_T>
+class QUANTIZE_FLOAT_TO_Q4_0 {
+   public:
+    __aicore__ inline QUANTIZE_FLOAT_TO_Q4_0() {}
+    __aicore__ inline void init(GM_ADDR input, GM_ADDR output,
+                                int64_t *input_ne_ub, size_t *input_nb_ub,
+                                int64_t *output_ne_ub) {
+        // TODO: fix test_case CPY(type_src=f16,type_dst=q4_0,ne=[256,4,4,4],
+        //                         permute=[0,0,0,0]):
+        // [CPY] NMSE = 0.000008343 > 0.000001000 FAIL
+        int64_t op_block_num = GetBlockNum();
+        int64_t op_block_idx = GetBlockIdx();
+
+        // input stride of data elements
+        for (int i = 0; i < 4; i++) {
+            input_ne[i] = input_ne_ub[i];
+            input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
+            output_ne[i] = output_ne_ub[i];
+        }
+
+        // output stride of data elements
+        output_stride[0] = 1;
+        for (int i = 1; i < 4; i++) {
+            output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
+        }
+
+        // scale saved one by one after data:. [group1_scale, group2_scale, ...]
+        scale_ne = input_ne;
+        scale_stride[0] = 1;
+        scale_stride[1] = input_ne[0] / Group_Size;
+        for (int i = 2; i < 4; i++) {
+            scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
+        }
+
+        // split input tensor by rows.
+        uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
+        dr = nr / op_block_num;
+
+        uint64_t tails = nr % op_block_num;
+        if (op_block_idx < tails) {
+            dr += 1;
+            ir = dr * op_block_idx;
+        } else {
+            ir = dr * op_block_idx + tails;
+        }
+
+        group_size_in_row = scale_stride[1];
+        int64_t scale_offset = output_ne[0] * output_ne[1] * output_ne[2] *
+                              output_ne[3] * sizeof(uint8_t) / 2;
+
+        input_gm.SetGlobalBuffer((__gm__ SRC_T *)input);
+        output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
+        scale_gm.SetGlobalBuffer((__gm__ half *)(output + scale_offset + ir *
+                                                 group_size_in_row *
+                                                 sizeof(half)));
+
+        pipe.InitBuffer(input_queue, BUFFER_NUM, Group_Size * sizeof(SRC_T));
+        pipe.InitBuffer(output_queue, BUFFER_NUM,
+                            Group_Size * sizeof(int8_t) / 2);
+        pipe.InitBuffer(cast_queue , 1, Group_Size * sizeof(float));
+        pipe.InitBuffer(work_queue, 1, Group_Size * sizeof(float));
+        pipe.InitBuffer(max_queue, 1, Group_Size * sizeof(float));
+        pipe.InitBuffer(min_queue, 1, Group_Size * sizeof(float));
+        pipe.InitBuffer(scale_queue, 1, Group_Size / 2 * sizeof(half));
+        pipe.InitBuffer(int8_queue, 1, Group_Size * sizeof(int8_t));
+        pipe.InitBuffer(half_queue, 1, Group_Size * sizeof(half));
+    }
+
+    __aicore__ inline void copy_in(uint32_t offset) {
+        LocalTensor<SRC_T> input_local = input_queue.AllocTensor<SRC_T>();
+        DataCopy(input_local, input_gm[offset], Group_Size);
+        input_queue.EnQue(input_local);
+    }
+
+    __aicore__ inline void copy_out(uint32_t offset) {
+        // reinterpretcast Group_Size(32) * int4b_t to Group_Size / 2 * int8_t,
+        // and using DataCopyPad to avoid 32 bits align.
+        LocalTensor<int4b_t> output_local = output_queue.DeQue<int4b_t>();
+        LocalTensor<int8_t> output_int8_local =
+                                    output_local.ReinterpretCast<int8_t>();
+
+        DataCopyExtParams dataCopyParams;
+        dataCopyParams.blockCount = 1;
+        dataCopyParams.blockLen = Group_Size / 2  * sizeof(int8_t);
+        DataCopyPad(output_gm[offset], output_int8_local, dataCopyParams);
+
+        output_queue.FreeTensor(output_local);
+    }
+
+    __aicore__ inline void input_to_cast(LocalTensor<float> cast_local,
+                                         LocalTensor<float> input_local) {
+        DataCopy(cast_local, input_local, Group_Size);
+    }
+
+    __aicore__ inline void input_to_cast(LocalTensor<float> cast_local,
+                                         LocalTensor<half> input_local) {
+        Cast(cast_local, input_local, RoundMode::CAST_NONE, Group_Size);
+    }
+
+    __aicore__ inline half calculate_group(int64_t row, int64_t group) {
+        const int64_t i3 = row / (input_ne[1] * input_ne[2]);
+        const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
+        const int64_t i1 =
+            row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
+
+        const int64_t input_offset = i1 * input_stride[1] +
+                                     i2 * input_stride[2] +
+                                     i3 * input_stride[3] + Group_Size * group;
+
+        // output_offset is stride for output_gm which datatype is int8_t and
+        // divided by 2 is needed for int4b_t.
+        const int64_t output_offset = (i1 * output_stride[1] +
+                                       i2 * output_stride[2] +
+                                       i3 * output_stride[3] +
+                                       Group_Size * group) / 2;
+        copy_in(input_offset);
+
+        LocalTensor<SRC_T> input_local = input_queue.DeQue<SRC_T>();
+        LocalTensor<int4b_t> output_local = output_queue.AllocTensor<int4b_t>();
+        LocalTensor<float> cast_local = cast_queue.AllocTensor<float>();
+        LocalTensor<float> work_local = work_queue.AllocTensor<float>();
+        LocalTensor<float> max_local = max_queue.AllocTensor<float>();
+        LocalTensor<float> min_local = min_queue.AllocTensor<float>();
+        LocalTensor<int8_t> int8_local = int8_queue.AllocTensor<int8_t>();
+        LocalTensor<half> half_local = half_queue.AllocTensor<half>();
+
+        input_to_cast(cast_local, input_local);
+
+        ReduceMax(max_local, cast_local, work_local, Group_Size);
+        ReduceMin(min_local, cast_local, work_local, Group_Size);
+        const float max_value = max_local.GetValue(0);
+        const float min_value = min_local.GetValue(0);
+        float d = max_value;
+        if (min_value < 0 && (-1 * min_value) > max_value) {
+            d = min_value;
+        }
+
+        d = d / (-8);
+        if (d != 0) {
+            Muls(cast_local, cast_local, 1.0f / d, Group_Size);
+        }
+
+        // range: [-8,8] -> [0.5,16.5] -> [0,16] -> [0,15] -> [-8,7]
+        float scalar = 8.5f;
+        Adds(cast_local, cast_local, scalar, Group_Size);
+        Cast(cast_local, cast_local, RoundMode::CAST_FLOOR, Group_Size);
+        scalar = 15.0f;
+        Mins(cast_local, cast_local, scalar, Group_Size);
+        scalar = -8.0f;
+        Adds(cast_local, cast_local, scalar, Group_Size);
+
+        // float->half->int4b
+        Cast(half_local, cast_local, RoundMode::CAST_NONE, Group_Size);
+        Cast(output_local, half_local, RoundMode::CAST_NONE, Group_Size);
+
+        output_queue.EnQue(output_local);
+        copy_out(output_offset);
+
+        input_queue.FreeTensor(input_local);
+        work_queue.FreeTensor(work_local);
+        max_queue.FreeTensor(max_local);
+        min_queue.FreeTensor(min_local);
+        int8_queue.FreeTensor(int8_local);
+        half_queue.FreeTensor(half_local);
+        cast_queue.FreeTensor(cast_local);
+        return (half)d;
+    }
+
+    __aicore__ inline void calculate() {
+        LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
+        uint32_t scale_local_offset = 0;
+        uint32_t scale_global_offset = 0;
+        for (int64_t i = ir; i < ir + dr; i++) {
+            for (int64_t j = 0; j < group_size_in_row; j++) {
+                half scale = calculate_group(i, j);
+                scale_local.SetValue(scale_local_offset++, scale);
+                // Copy Group_Size/2 length data each time.
+                if (scale_local_offset == Group_Size / 2) {
+                    scale_local_offset = 0;
+                    // TODO: OPTIMIZE ME
+                    pipe_barrier(PIPE_ALL);
+                    DataCopy(scale_gm[scale_global_offset], scale_local,
+                                      Group_Size / 2);
+                    pipe_barrier(PIPE_ALL);
+                    scale_global_offset += Group_Size / 2;
+                }
+            }
+        }
+
+        if (scale_local_offset != 0) {
+            pipe_barrier(PIPE_ALL);
+            DataCopyExtParams dataCopyParams;
+            dataCopyParams.blockCount = 1;
+            dataCopyParams.blockLen = scale_local_offset * sizeof(half);
+            DataCopyPad(scale_gm[scale_global_offset], scale_local,
+                        dataCopyParams);
+            pipe_barrier(PIPE_ALL);
+        }
+        scale_queue.FreeTensor(scale_local);
+    }
+
+   private:
+    int64_t input_ne[4];
+    size_t input_stride[4];
+
+    int64_t *scale_ne;
+    size_t scale_stride[4];
+
+    int64_t output_ne[4];
+    size_t output_stride[4];
+
+    int64_t group_size_in_row;
+
+    int64_t ir;
+    int64_t dr;
+
+    TPipe pipe;
+    GlobalTensor<SRC_T> input_gm;
+    GlobalTensor<half> scale_gm;
+    GlobalTensor<int8_t> output_gm;
+    TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
+    TQue<QuePosition::VECIN, BUFFER_NUM> work_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> max_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> min_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> scale_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> cast_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> int8_queue;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> half_queue;
+};
+
+template <typename T>
+__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
+    auto gm_ptr = (__gm__ uint8_t *)gm;
+    auto ub_ptr = (uint8_t *)(ub);
+    for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
+        *ub_ptr = *gm_ptr;
+    }
+}
+
+extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
+    GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+    GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+
+    QUANTIZE_FLOAT_TO_Q4_0<half> op;
+    op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
+    op.calculate();
+}
+
+extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
+    GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
+    GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
+    int64_t input_ne_ub[4];
+    size_t input_nb_ub[4];
+    int64_t output_ne_ub[4];
+
+    copy_to_ub(input_ne_gm, input_ne_ub, 32);
+    copy_to_ub(input_nb_gm, input_nb_ub, 32);
+    copy_to_ub(output_ne_gm, output_ne_ub, 32);
+
+    QUANTIZE_FLOAT_TO_Q4_0<float> op;
+    op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
+    op.calculate();
+}
+
+#endif // #ifdef ASCEND_310P
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-common.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-common.h
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
@@ -0,0 +1,342 @@
+function(ggml_add_cpu_backend_variant_impl tag_name)
+    if (tag_name)
+        set(GGML_CPU_NAME ggml-cpu-${tag_name})
+    else()
+        set(GGML_CPU_NAME ggml-cpu)
+    endif()
+
+    ggml_add_backend_library(${GGML_CPU_NAME})
+
+    list (APPEND GGML_CPU_SOURCES
+        ggml-cpu/ggml-cpu.c
+        ggml-cpu/ggml-cpu.cpp
+        ggml-cpu/ggml-cpu-aarch64.cpp
+        ggml-cpu/ggml-cpu-aarch64.h
+        ggml-cpu/ggml-cpu-hbm.cpp
+        ggml-cpu/ggml-cpu-hbm.h
+        ggml-cpu/ggml-cpu-quants.c
+        ggml-cpu/ggml-cpu-quants.h
+        ggml-cpu/ggml-cpu-traits.cpp
+        ggml-cpu/ggml-cpu-traits.h
+        ggml-cpu/amx/amx.cpp
+        ggml-cpu/amx/amx.h
+        ggml-cpu/amx/mmq.cpp
+        ggml-cpu/amx/mmq.h
+        ggml-cpu/ggml-cpu-impl.h
+        )
+
+    target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
+    target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
+
+    if (APPLE AND GGML_ACCELERATE)
+        find_library(ACCELERATE_FRAMEWORK Accelerate)
+        if (ACCELERATE_FRAMEWORK)
+            message(STATUS "Accelerate framework found")
+
+            target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
+            target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
+            target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
+
+            target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
+        else()
+            message(WARNING "Accelerate framework not found")
+        endif()
+    endif()
+
+    if (GGML_OPENMP)
+        find_package(OpenMP)
+        if (OpenMP_FOUND)
+            target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
+
+            target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
+        else()
+            message(WARNING "OpenMP not found")
+        endif()
+    endif()
+
+    if (GGML_LLAMAFILE)
+        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
+
+        list(APPEND GGML_CPU_SOURCES
+                    ggml-cpu/llamafile/sgemm.cpp
+                    ggml-cpu/llamafile/sgemm.h)
+    endif()
+
+    if (GGML_CPU_HBM)
+        find_library(memkind memkind REQUIRED)
+
+        message(STATUS "Using memkind for CPU HBM")
+
+        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
+
+        target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
+    endif()
+
+    if (CMAKE_OSX_ARCHITECTURES      STREQUAL "arm64" OR
+        CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
+        (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
+            CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
+
+        message(STATUS "ARM detected")
+
+        if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
+            message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
+        else()
+            check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
+            if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
+                list(APPEND ARCH_FLAGS -mfp16-format=ieee)
+            endif()
+
+            if (GGML_NATIVE)
+                # -mcpu=native does not always enable all the features in some compilers,
+                # so we check for them manually and enable them if available
+
+                execute_process(
+                    COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
+                    INPUT_FILE "/dev/null"
+                    OUTPUT_QUIET
+                    ERROR_VARIABLE ARM_MCPU
+                    RESULT_VARIABLE ARM_MCPU_RESULT
+                )
+                if (NOT ARM_MCPU_RESULT)
+                    string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
+                endif()
+                if ("${ARM_MCPU_FLAG}" STREQUAL "")
+                    set(ARM_MCPU_FLAG -mcpu=native)
+                    message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
+                endif()
+
+                include(CheckCXXSourceRuns)
+
+                function(check_arm_feature tag code)
+                    set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
+                    set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
+                    check_cxx_source_runs(
+                        "${code}"
+                        GGML_MACHINE_SUPPORTS_${tag}
+                    )
+                    if (GGML_MACHINE_SUPPORTS_${tag})
+                        set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
+                    else()
+                        set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
+                    endif()
+                    set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
+                endfunction()
+
+                check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
+                check_arm_feature(i8mm    "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
+                check_arm_feature(sve     "#include <arm_sve.h>\nint main()  { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
+
+                list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
+            else()
+                if (GGML_CPU_ARM_ARCH)
+                    list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
+                endif()
+            endif()
+
+            # show enabled features
+            if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+                set(FEAT_INPUT_FILE "NUL")
+            else()
+                set(FEAT_INPUT_FILE "/dev/null")
+            endif()
+
+            execute_process(
+                COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
+                INPUT_FILE ${FEAT_INPUT_FILE}
+                OUTPUT_VARIABLE ARM_FEATURE
+                RESULT_VARIABLE ARM_FEATURE_RESULT
+            )
+            if (ARM_FEATURE_RESULT)
+                message(WARNING "Failed to get ARM features")
+            else()
+                foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
+                    string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
+                    if (NOT ${feature_pos} EQUAL -1)
+                        message(STATUS "ARM feature ${feature} enabled")
+                    endif()
+                endforeach()
+            endif()
+        endif()
+    elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
+            (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
+            CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
+
+        message(STATUS "x86 detected")
+
+        if (MSVC)
+            # instruction set detection for MSVC only
+            if (GGML_NATIVE)
+                include(ggml-cpu/cmake/FindSIMD.cmake)
+            endif ()
+            if (GGML_AVX512)
+                list(APPEND ARCH_FLAGS /arch:AVX512)
+                # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
+                # MSVC has no compile-time flags enabling specific
+                # AVX512 extensions, neither it defines the
+                # macros corresponding to the extensions.
+                # Do it manually.
+                list(APPEND ARCH_DEFINITIONS GGML_AVX512)
+                if (GGML_AVX512_VBMI)
+                    list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
+                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+                        list(APPEND ARCH_FLAGS -mavx512vbmi)
+                    endif()
+                endif()
+                if (GGML_AVX512_VNNI)
+                    list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
+                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+                        list(APPEND ARCH_FLAGS -mavx512vnni)
+                    endif()
+                endif()
+                if (GGML_AVX512_BF16)
+                    list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
+                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+                        list(APPEND ARCH_FLAGS -mavx512bf16)
+                    endif()
+                endif()
+                if (GGML_AMX_TILE)
+                    list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
+                endif()
+                if (GGML_AMX_INT8)
+                    list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
+                endif()
+                if (GGML_AMX_BF16)
+                    list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
+                endif()
+            elseif (GGML_AVX2)
+                list(APPEND ARCH_FLAGS /arch:AVX2)
+                list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
+            elseif (GGML_AVX)
+                list(APPEND ARCH_FLAGS /arch:AVX)
+                list(APPEND ARCH_DEFINITIONS GGML_AVX)
+            else ()
+                list(APPEND ARCH_FLAGS /arch:SSE4.2)
+                list(APPEND ARCH_DEFINITIONS GGML_SSE42)
+            endif()
+            if (GGML_AVX_VNNI)
+                list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
+            endif()
+        else ()
+            if (GGML_NATIVE)
+                list(APPEND ARCH_FLAGS -march=native)
+            else ()
+                list(APPEND ARCH_FLAGS -msse4.2)
+                list(APPEND ARCH_DEFINITIONS GGML_SSE42)
+                if (GGML_F16C)
+                    list(APPEND ARCH_FLAGS -mf16c)
+                    list(APPEND ARCH_DEFINITIONS GGML_F16C)
+                endif()
+                if (GGML_FMA)
+                    list(APPEND ARCH_FLAGS -mfma)
+                    list(APPEND ARCH_DEFINITIONS GGML_FMA)
+                endif()
+                if (GGML_AVX)
+                    list(APPEND ARCH_FLAGS -mavx)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX)
+                endif()
+                if (GGML_AVX2)
+                    list(APPEND ARCH_FLAGS -mavx2)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX2)
+                endif()
+                if (GGML_AVX_VNNI)
+                    list(APPEND ARCH_FLAGS -mavxvnni)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
+                endif()
+                if (GGML_AVX512)
+                    list(APPEND ARCH_FLAGS -mavx512f)
+                    list(APPEND ARCH_FLAGS -mavx512cd)
+                    list(APPEND ARCH_FLAGS -mavx512vl)
+                    list(APPEND ARCH_FLAGS -mavx512dq)
+                    list(APPEND ARCH_FLAGS -mavx512bw)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX512)
+                endif()
+                if (GGML_AVX512_VBMI)
+                    list(APPEND ARCH_FLAGS -mavx512vbmi)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
+                endif()
+                if (GGML_AVX512_VNNI)
+                    list(APPEND ARCH_FLAGS -mavx512vnni)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
+                endif()
+                if (GGML_AVX512_BF16)
+                    list(APPEND ARCH_FLAGS -mavx512bf16)
+                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
+                endif()
+                if (GGML_AMX_TILE)
+                    list(APPEND ARCH_FLAGS -mamx-tile)
+                    list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
+                endif()
+                if (GGML_AMX_INT8)
+                    list(APPEND ARCH_FLAGS -mamx-int8)
+                    list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
+                endif()
+                if (GGML_AMX_BF16)
+                    list(APPEND ARCH_FLAGS -mamx-bf16)
+                    list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
+                endif()
+            endif()
+        endif()
+    elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
+        message(STATUS "PowerPC detected")
+        execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
+        if (${POWER_M} MATCHES "POWER10")
+            list(APPEND ARCH_FLAGS -mcpu=power10)
+        elseif (${POWER_M} MATCHES "POWER9")
+            list(APPEND ARCH_FLAGS -mcpu=power9)
+        elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
+            list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
+        else()
+            list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
+        endif()
+    elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
+        message(STATUS "loongarch64 detected")
+
+        list(APPEND ARCH_FLAGS -march=loongarch64)
+        if (GGML_LASX)
+            list(APPEND ARCH_FLAGS -mlasx)
+        endif()
+        if (GGML_LSX)
+            list(APPEND ARCH_FLAGS -mlsx)
+        endif()
+    elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
+        message(STATUS "RISC-V detected")
+        if (GGML_RVV)
+            list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
+        endif()
+    else()
+        message(STATUS "Unknown architecture")
+    endif()
+
+    if (GGML_CPU_AARCH64)
+        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
+    endif()
+
+    message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
+    target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
+    target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
+    target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
+
+    if (GGML_BACKEND_DL)
+        if (GGML_NATIVE)
+            # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
+            message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
+        endif()
+
+        # The feature detection code is compiled as a separate target so that
+        # it can be built without the architecture flags
+        # Since multiple variants of the CPU backend may be included in the same
+        # build, using set_source_files_properties() to set the arch flags is not possible
+        set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
+        add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
+        target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
+        target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
+        target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
+        set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+        target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
+    endif()
+
+    if (EMSCRIPTEN)
+        set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
+    endif()
+endfunction()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
@@ -0,0 +1,220 @@
+#include "amx.h"
+#include "common.h"
+#include "mmq.h"
+#include "ggml-backend-impl.h"
+#include "ggml-backend.h"
+#include "ggml-impl.h"
+#include "ggml-cpu.h"
+#include "ggml-cpu-traits.h"
+
+#if defined(__gnu_linux__)
+#include <sys/syscall.h>
+#include <unistd.h>
+#endif
+
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+
+#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
+
+// AMX type_trais
+namespace ggml::cpu::amx {
+class tensor_traits : public ggml::cpu::tensor_traits {
+    bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override {
+        size = ggml_backend_amx_desired_wsize(op);
+        return true;
+    }
+
+    bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override {
+        if (op->op == GGML_OP_MUL_MAT) {
+            ggml_backend_amx_mul_mat(params, op);
+            return true;
+        }
+        return false;
+    }
+};
+
+static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) {
+    static tensor_traits traits;
+    return &traits;
+}
+}  // namespace ggml::cpu::amx
+
+// AMX buffer interface
+static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+    free(buffer->context);
+}
+
+static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
+    return (void *) (buffer->context);
+}
+
+static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
+    tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
+
+    GGML_UNUSED(buffer);
+}
+
+static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
+                                                  uint8_t value, size_t offset, size_t size) {
+    memset((char *) tensor->data + offset, value, size);
+
+    GGML_UNUSED(buffer);
+}
+
+static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
+                                               const void * data, size_t offset, size_t size) {
+    if (qtype_has_amx_kernels(tensor->type)) {
+        GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type));
+        ggml_backend_amx_convert_weight(tensor, data, offset, size);
+    } else {
+        memcpy((char *) tensor->data + offset, data, size);
+    }
+
+    GGML_UNUSED(buffer);
+}
+
+/*
+// need to figure what we need to do with buffer->extra.
+static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
+    GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));
+    memcpy(data, (const char *)tensor->data + offset, size);
+
+    GGML_UNUSED(buffer);
+}
+
+static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
+    if (ggml_backend_buffer_is_host(src->buffer)) {
+        if (qtype_has_amx_kernels(src->type)) {
+            ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst));
+        } else {
+            memcpy(dst->data, src->data, ggml_nbytes(src));
+        }
+        return true;
+    }
+    return false;
+
+    GGML_UNUSED(buffer);
+}
+*/
+
+static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
+    memset(buffer->context, value, buffer->size);
+}
+
+static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
+    /* .free_buffer     = */ ggml_backend_amx_buffer_free_buffer,
+    /* .get_base        = */ ggml_backend_amx_buffer_get_base,
+    /* .init_tensor     = */ ggml_backend_amx_buffer_init_tensor,
+    /* .memset_tensor   = */ ggml_backend_amx_buffer_memset_tensor,
+    /* .set_tensor      = */ ggml_backend_amx_buffer_set_tensor,
+    /* .get_tensor      = */ nullptr,
+    /* .cpy_tensor      = */ nullptr,
+    /* .clear           = */ ggml_backend_amx_buffer_clear,
+    /* .reset           = */ nullptr,
+};
+
+static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
+    return "AMX";
+
+    GGML_UNUSED(buft);
+}
+
+static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
+    void * data = ggml_aligned_malloc(size);
+    if (data == NULL) {
+        fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
+        return NULL;
+    }
+
+    return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);
+}
+
+static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
+    return TENSOR_ALIGNMENT;
+
+    GGML_UNUSED(buft);
+}
+
+namespace ggml::cpu::amx {
+class extra_buffer_type : ggml::cpu::extra_buffer_type {
+    bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
+        // handle only 2d gemm for now
+        auto is_contiguous_2d = [](const struct ggml_tensor * t) {
+            return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;
+        };
+
+        if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) &&  // src0 must be contiguous
+            is_contiguous_2d(op->src[1]) &&                               // src1 must be contiguous
+            op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() &&
+            op->ne[0] % (TILE_N * 2) == 0 &&                              // out_features is 32x
+            (qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == GGML_TYPE_F16))) {
+            // src1 must be host buffer
+            if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
+                return false;
+            }
+            // src1 must be float32
+            if (op->src[1]->type == GGML_TYPE_F32) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {
+        if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer &&
+            op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) {
+            return (ggml::cpu::tensor_traits *) op->src[0]->extra;
+        }
+
+        return nullptr;
+    }
+};
+}  // namespace ggml::cpu::amx
+
+static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
+    return ggml_backend_amx_get_alloc_size(tensor);
+
+    GGML_UNUSED(buft);
+}
+
+#define ARCH_GET_XCOMP_PERM     0x1022
+#define ARCH_REQ_XCOMP_PERM     0x1023
+#define XFEATURE_XTILECFG       17
+#define XFEATURE_XTILEDATA      18
+
+static bool ggml_amx_init() {
+#if defined(__gnu_linux__)
+    if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
+        fprintf(stderr, "AMX is not ready to be used!\n");
+        return false;
+    }
+    return true;
+#elif defined(_WIN32)
+    return true;
+#endif
+}
+
+ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
+    static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
+        /* .iface = */ {
+                        /* .get_name         = */ ggml_backend_amx_buffer_type_get_name,
+                        /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer,
+                        /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment,
+                        /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
+                        /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size,
+                        /* .is_host          = */ nullptr,
+                        },
+        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
+        /* .context = */ new ggml::cpu::amx::extra_buffer_type(),
+    };
+
+    if (!ggml_amx_init()) {
+        return nullptr;
+    }
+
+    return &ggml_backend_buffer_type_amx;
+}
+
+#endif  // defined(__AMX_INT8__) && defined(__AVX512VNNI__)
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h
@@ -0,0 +1,8 @@
+#include "ggml-backend.h"
+#include "ggml-cpu-impl.h"
+
+// GGML internal header
+
+#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
+ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include "ggml.h"
+#include "ggml-cpu-impl.h"
+
+#include <algorithm>
+#include <memory>
+#include <type_traits>
+
+#if defined(GGML_USE_OPENMP)
+#include <omp.h>
+#endif
+
+#define TILE_M 16
+#define TILE_N 16
+#define TILE_K 32
+#define VNNI_BLK 4
+
+#define AMX_BLK_SIZE 32
+
+#define TMM0 0
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+#define TMM4 4
+#define TMM5 5
+#define TMM6 6
+#define TMM7 7
+
+// parallel routines
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+inline T div_up(T x, T y) { return (x + y - 1) / y; }
+
+template <typename T>
+inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
+#if 0
+    // onednn partition pattern
+    T& n_my = n_end;
+    if (nth <= 1 || n == 0) {
+        n_start = 0;
+        n_my = n;
+    } else {
+        T n1 = div_up(n, nth);
+        T n2 = n1 - 1;
+        T T1 = n - n2 * nth;
+        n_my = ith < T1 ? n1 : n2;
+        n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2;
+    }
+    n_end += n_start;
+#else
+    // pytorch aten partition pattern
+    T n_my = div_up(n, nth);
+    n_start = ith * n_my;
+    n_end = std::min(n_start + n_my, n);
+#endif
+}
+
+template <typename func_t>
+inline void parallel_for(int n, const func_t& f) {
+#if defined(GGML_USE_OPENMP)
+#pragma omp parallel
+{
+    int nth = omp_get_num_threads();
+    int ith = omp_get_thread_num();
+    int tbegin, tend;
+    balance211(n, nth, ith, tbegin, tend);
+    f(tbegin, tend);
+}
+#else
+    f(0, n);
+#endif
+}
+
+template <typename func_t>
+inline void parallel_for_ggml(const ggml_compute_params * params, int n, const func_t & f) {
+    int tbegin, tend;
+    balance211(n, params->nth, params->ith, tbegin, tend);
+    f(tbegin, tend);
+}
+
+// quantized types that have AMX support
+inline bool qtype_has_amx_kernels(const enum ggml_type type) {
+    // TODO: fix padding for vnni format
+    return (type == GGML_TYPE_Q4_0) ||
+        (type == GGML_TYPE_Q4_1) ||
+        (type == GGML_TYPE_Q8_0) ||
+        (type == GGML_TYPE_Q4_K) ||
+        (type == GGML_TYPE_Q5_K) ||
+        (type == GGML_TYPE_Q6_K) ||
+        (type == GGML_TYPE_IQ4_XS);
+}
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h
@@ -0,0 +1,10 @@
+#pragma once
+#include "common.h"
+
+size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
+
+size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
+
+void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+
+void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
@@ -0,0 +1,100 @@
+include(CheckCSourceRuns)
+
+set(AVX_CODE "
+    #include <immintrin.h>
+    int main()
+    {
+        __m256 a;
+        a = _mm256_set1_ps(0);
+        return 0;
+    }
+")
+
+set(AVX512_CODE "
+    #include <immintrin.h>
+    int main()
+    {
+        __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    0, 0, 0, 0, 0, 0, 0, 0);
+        __m512i b = a;
+        __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
+        return 0;
+    }
+")
+
+set(AVX2_CODE "
+    #include <immintrin.h>
+    int main()
+    {
+        __m256i a = {0};
+        a = _mm256_abs_epi16(a);
+        __m256i x;
+        _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
+        return 0;
+    }
+")
+
+set(FMA_CODE "
+    #include <immintrin.h>
+    int main()
+    {
+        __m256 acc = _mm256_setzero_ps();
+        const __m256 d = _mm256_setzero_ps();
+        const __m256 p = _mm256_setzero_ps();
+        acc = _mm256_fmadd_ps( d, p, acc );
+        return 0;
+    }
+")
+
+macro(check_sse type flags)
+    set(__FLAG_I 1)
+    set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
+    foreach (__FLAG ${flags})
+        if (NOT ${type}_FOUND)
+            set(CMAKE_REQUIRED_FLAGS ${__FLAG})
+            check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
+            if (HAS_${type}_${__FLAG_I})
+                set(${type}_FOUND TRUE CACHE BOOL "${type} support")
+                set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
+            endif()
+            math(EXPR __FLAG_I "${__FLAG_I}+1")
+        endif()
+    endforeach()
+    set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
+
+    if (NOT ${type}_FOUND)
+        set(${type}_FOUND FALSE CACHE BOOL "${type} support")
+        set(${type}_FLAGS "" CACHE STRING "${type} flags")
+    endif()
+
+    mark_as_advanced(${type}_FOUND ${type}_FLAGS)
+endmacro()
+
+# flags are for MSVC only!
+check_sse("AVX" " ;/arch:AVX")
+if (NOT ${AVX_FOUND})
+    set(GGML_AVX OFF)
+else()
+    set(GGML_AVX ON)
+endif()
+
+check_sse("AVX2" " ;/arch:AVX2")
+check_sse("FMA" " ;/arch:AVX2")
+if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
+    set(GGML_AVX2 OFF)
+else()
+    set(GGML_AVX2 ON)
+endif()
+
+check_sse("AVX512" " ;/arch:AVX512")
+if (NOT ${AVX512_FOUND})
+    set(GGML_AVX512 OFF)
+else()
+    set(GGML_AVX512 ON)
+endif()
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
@@ -0,0 +1,323 @@
+#include "ggml-backend-impl.h"
+
+#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#include <cstring>
+#include <vector>
+#include <bitset>
+#include <array>
+#include <string>
+
+// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
+struct cpuid_x86 {
+    bool SSE3(void) { return f_1_ecx[0]; }
+    bool PCLMULQDQ(void) { return f_1_ecx[1]; }
+    bool MONITOR(void) { return f_1_ecx[3]; }
+    bool SSSE3(void) { return f_1_ecx[9]; }
+    bool FMA(void) { return f_1_ecx[12]; }
+    bool CMPXCHG16B(void) { return f_1_ecx[13]; }
+    bool SSE41(void) { return f_1_ecx[19]; }
+    bool SSE42(void) { return f_1_ecx[20]; }
+    bool MOVBE(void) { return f_1_ecx[22]; }
+    bool POPCNT(void) { return f_1_ecx[23]; }
+    bool AES(void) { return f_1_ecx[25]; }
+    bool XSAVE(void) { return f_1_ecx[26]; }
+    bool OSXSAVE(void) { return f_1_ecx[27]; }
+    bool AVX(void) { return f_1_ecx[28]; }
+    bool F16C(void) { return f_1_ecx[29]; }
+    bool RDRAND(void) { return f_1_ecx[30]; }
+
+    bool MSR(void) { return f_1_edx[5]; }
+    bool CX8(void) { return f_1_edx[8]; }
+    bool SEP(void) { return f_1_edx[11]; }
+    bool CMOV(void) { return f_1_edx[15]; }
+    bool CLFSH(void) { return f_1_edx[19]; }
+    bool MMX(void) { return f_1_edx[23]; }
+    bool FXSR(void) { return f_1_edx[24]; }
+    bool SSE(void) { return f_1_edx[25]; }
+    bool SSE2(void) { return f_1_edx[26]; }
+
+    bool FSGSBASE(void) { return f_7_ebx[0]; }
+    bool BMI1(void) { return f_7_ebx[3]; }
+    bool HLE(void) { return is_intel && f_7_ebx[4]; }
+    bool AVX2(void) { return f_7_ebx[5]; }
+    bool BMI2(void) { return f_7_ebx[8]; }
+    bool ERMS(void) { return f_7_ebx[9]; }
+    bool INVPCID(void) { return f_7_ebx[10]; }
+    bool RTM(void) { return is_intel && f_7_ebx[11]; }
+    bool AVX512F(void) { return f_7_ebx[16]; }
+    bool AVX512DQ(void) { return f_7_ebx[17]; }
+    bool RDSEED(void) { return f_7_ebx[18]; }
+    bool ADX(void) { return f_7_ebx[19]; }
+    bool AVX512PF(void) { return f_7_ebx[26]; }
+    bool AVX512ER(void) { return f_7_ebx[27]; }
+    bool AVX512CD(void) { return f_7_ebx[28]; }
+    bool AVX512BW(void) { return f_7_ebx[30]; }
+    bool AVX512VL(void) { return f_7_ebx[31]; }
+
+    bool SHA(void) { return f_7_ebx[29]; }
+
+    bool PREFETCHWT1(void) { return f_7_ecx[0]; }
+
+    bool LAHF(void) { return f_81_ecx[0]; }
+    bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
+    bool ABM(void) { return is_amd && f_81_ecx[5]; }
+    bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
+    bool XOP(void) { return is_amd && f_81_ecx[11]; }
+    bool TBM(void) { return is_amd && f_81_ecx[21]; }
+
+    bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
+    bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
+    bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
+    bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
+    bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
+
+    bool AVX512_VBMI(void) { return f_7_ecx[1]; }
+    bool AVX512_VNNI(void) { return f_7_ecx[11]; }
+    bool AVX512_FP16(void) { return f_7_edx[23]; }
+    bool AVX512_BF16(void) { return f_7_1_eax[5]; }
+    bool AVX_VNNI(void) { return f_7_1_eax[4]; }
+
+    bool AMX_TILE(void) { return f_7_edx[24]; }
+    bool AMX_INT8(void) { return f_7_edx[25]; }
+    bool AMX_FP16(void) { return f_7_1_eax[21]; }
+    bool AMX_BF16(void) { return f_7_edx[22]; }
+
+#ifdef _MSC_VER
+    static void cpuid(int cpu_info[4], int eax) {
+        __cpuid(cpu_info, eax);
+    }
+    static void cpuidex(int cpu_info[4], int eax, int ecx) {
+        __cpuidex(cpu_info, eax, ecx);
+    }
+#else
+    static void cpuid(int cpu_info[4], int eax) {
+        __asm__ __volatile__(
+            "cpuid"
+            : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+            : "a"(eax), "c"(0));
+    }
+    static void cpuidex(int cpu_info[4], int eax, int ecx) {
+        __asm__ __volatile__(
+            "cpuid"
+            : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+            : "a"(eax), "c"(ecx));
+    }
+#endif
+
+    cpuid_x86() {
+        std::array<int, 4> cpui;
+        std::vector<std::array<int, 4>> data;
+
+        // calling __cpuid with 0x0 as the function_id argument
+        // gets the number of the highest valid function ID.
+        cpuid(cpui.data(), 0);
+        int n_ids = cpui[0];
+
+        for (int i = 0; i <= n_ids; ++i) {
+            cpuidex(cpui.data(), i, 0);
+            data.push_back(cpui);
+        }
+
+        // capture vendor string
+        char vendor[0x20] = {};
+        *reinterpret_cast<int *>(vendor)     = data[0][1];
+        *reinterpret_cast<int *>(vendor + 4) = data[0][3];
+        *reinterpret_cast<int *>(vendor + 8) = data[0][2];
+        this->vendor = vendor;
+        if (this->vendor == "GenuineIntel") {
+            is_intel = true;
+        } else if (this->vendor == "AuthenticAMD") {
+            is_amd = true;
+        }
+
+        // load bitset with flags for function 0x00000001
+        if (n_ids >= 1) {
+            f_1_ecx = data[1][2];
+            f_1_edx = data[1][3];
+        }
+
+        // load bitset with flags for function 0x00000007
+        if (n_ids >= 7) {
+            f_7_ebx = data[7][1];
+            f_7_ecx = data[7][2];
+            f_7_edx = data[7][3];
+            cpuidex(cpui.data(), 7, 1);
+            f_7_1_eax = cpui[0];
+        }
+
+        // calling __cpuid with 0x80000000 as the function_id argument
+        // gets the number of the highest valid extended ID.
+        cpuid(cpui.data(), 0x80000000);
+        unsigned int n_ex_ids = cpui[0];
+
+        std::vector<std::array<int, 4>> ext_data;
+        for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
+            cpuidex(cpui.data(), i, 0);
+            ext_data.push_back(cpui);
+        }
+
+        // load bitset with flags for function 0x80000001
+        if (n_ex_ids >= 0x80000001) {
+            f_81_ecx = ext_data[1][2];
+            f_81_edx = ext_data[1][3];
+        }
+
+        // interpret CPU brand string if reported
+        char brand[0x40] = {};
+        if (n_ex_ids >= 0x80000004) {
+            std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
+            std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
+            std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
+            this->brand = brand;
+        }
+    }
+
+    bool is_intel = false;
+    bool is_amd = false;
+    std::string vendor;
+    std::string brand;
+    std::bitset<32> f_1_ecx;
+    std::bitset<32> f_1_edx;
+    std::bitset<32> f_7_ebx;
+    std::bitset<32> f_7_ecx;
+    std::bitset<32> f_7_edx;
+    std::bitset<32> f_7_1_eax;
+    std::bitset<32> f_81_ecx;
+    std::bitset<32> f_81_edx;
+};
+
+#if 0
+void test_x86_is() {
+    cpuid_x86 is;
+    printf("CPU Vendor: %s\n", is.vendor.c_str());
+    printf("Brand: %s\n", is.brand.c_str());
+    printf("is_intel: %d\n", is.is_intel);
+    printf("is_amd: %d\n", is.is_amd);
+    printf("sse3: %d\n", is.SSE3());
+    printf("pclmulqdq: %d\n", is.PCLMULQDQ());
+    printf("ssse3: %d\n", is.SSSE3());
+    printf("fma: %d\n", is.FMA());
+    printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
+    printf("sse41: %d\n", is.SSE41());
+    printf("sse42: %d\n", is.SSE42());
+    printf("movbe: %d\n", is.MOVBE());
+    printf("popcnt: %d\n", is.POPCNT());
+    printf("aes: %d\n", is.AES());
+    printf("xsave: %d\n", is.XSAVE());
+    printf("osxsave: %d\n", is.OSXSAVE());
+    printf("avx: %d\n", is.AVX());
+    printf("f16c: %d\n", is.F16C());
+    printf("rdrand: %d\n", is.RDRAND());
+    printf("msr: %d\n", is.MSR());
+    printf("cx8: %d\n", is.CX8());
+    printf("sep: %d\n", is.SEP());
+    printf("cmov: %d\n", is.CMOV());
+    printf("clflush: %d\n", is.CLFSH());
+    printf("mmx: %d\n", is.MMX());
+    printf("fxsr: %d\n", is.FXSR());
+    printf("sse: %d\n", is.SSE());
+    printf("sse2: %d\n", is.SSE2());
+    printf("fsgsbase: %d\n", is.FSGSBASE());
+    printf("bmi1: %d\n", is.BMI1());
+    printf("hle: %d\n", is.HLE());
+    printf("avx2: %d\n", is.AVX2());
+    printf("bmi2: %d\n", is.BMI2());
+    printf("erms: %d\n", is.ERMS());
+    printf("invpcid: %d\n", is.INVPCID());
+    printf("rtm: %d\n", is.RTM());
+    printf("avx512f: %d\n", is.AVX512F());
+    printf("rdseed: %d\n", is.RDSEED());
+    printf("adx: %d\n", is.ADX());
+    printf("avx512pf: %d\n", is.AVX512PF());
+    printf("avx512er: %d\n", is.AVX512ER());
+    printf("avx512cd: %d\n", is.AVX512CD());
+    printf("sha: %d\n", is.SHA());
+    printf("prefetchwt1: %d\n", is.PREFETCHWT1());
+    printf("lahf: %d\n", is.LAHF());
+    printf("lzcnt: %d\n", is.LZCNT());
+    printf("abm: %d\n", is.ABM());
+    printf("sse4a: %d\n", is.SSE4a());
+    printf("xop: %d\n", is.XOP());
+    printf("tbm: %d\n", is.TBM());
+    printf("syscall: %d\n", is.SYSCALL());
+    printf("mmxext: %d\n", is.MMXEXT());
+    printf("rdtscp: %d\n", is.RDTSCP());
+    printf("3dnowext: %d\n", is._3DNOWEXT());
+    printf("3dnow: %d\n", is._3DNOW());
+    printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
+    printf("avx512_vnni: %d\n", is.AVX512_VNNI());
+    printf("avx512_fp16: %d\n", is.AVX512_FP16());
+    printf("avx512_bf16: %d\n", is.AVX512_BF16());
+    printf("amx_tile: %d\n", is.AMX_TILE());
+    printf("amx_int8: %d\n", is.AMX_INT8());
+    printf("amx_fp16: %d\n", is.AMX_FP16());
+    printf("amx_bf16: %d\n", is.AMX_BF16());
+}
+#endif
+
+static int ggml_backend_cpu_x86_score() {
+    // FIXME: this does not check for OS support
+
+    int score = 0;
+    cpuid_x86 is;
+
+#ifdef GGML_FMA
+    if (!is.FMA()) { return 0; }
+    score += 1;
+#endif
+#ifdef GGML_F16C
+    if (!is.F16C()) { return 0; }
+    score += 1<<1;
+#endif
+#ifdef GGML_SSE42
+    if (!is.SSE42()) { return 0; }
+    score += 1<<2;
+#endif
+#ifdef GGML_AVX
+    if (!is.AVX()) { return 0; }
+    score += 1<<4;
+#endif
+#ifdef GGML_AVX2
+    if (!is.AVX2()) { return 0; }
+    score += 1<<5;
+#endif
+#ifdef GGML_AVX_VNNI
+    if (!is.AVX_VNNI()) { return 0; }
+    score += 1<<6;
+#endif
+#ifdef GGML_AVX512
+    if (!is.AVX512F()) { return 0; }
+    if (!is.AVX512CD()) { return 0; }
+    if (!is.AVX512VL()) { return 0; }
+    if (!is.AVX512DQ()) { return 0; }
+    if (!is.AVX512BW()) { return 0; }
+    score += 1<<7;
+#endif
+#ifdef GGML_AVX512_VBMI
+    if (!is.AVX512_VBMI()) { return 0; }
+    score += 1<<8;
+#endif
+#ifdef GGML_AVX512_BF16
+    if (!is.AVX512_BF16()) { return 0; }
+    score += 1<<9;
+#endif
+#ifdef GGML_AVX512_VNNI
+    if (!is.AVX512_VNNI()) { return 0; }
+    score += 1<<10;
+#endif
+#ifdef GGML_AMX_INT8
+    if (!is.AMX_INT8()) { return 0; }
+    score += 1<<11;
+#endif
+
+    return score;
+}
+
+GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
+
+#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "ggml-cpu-traits.h"
+#include "ggml.h"
+
+// GGML internal header
+
+ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
@@ -0,0 +1,55 @@
+#ifdef GGML_USE_CPU_HBM
+
+#include "ggml-backend.h"
+#include "ggml-backend-impl.h"
+#include "ggml-cpu.h"
+#include "ggml-impl.h"
+
+#include "ggml-cpu-hbm.h"
+
+// buffer type HBM
+
+#include <hbwmalloc.h>
+
+static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
+    return "CPU_HBM";
+
+    GGML_UNUSED(buft);
+}
+
+static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+    hbw_free(buffer->context);
+}
+
+static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
+                                                                           size_t                     size) {
+    void * ptr;
+    int    result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
+    if (result != 0) {
+        GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
+        return NULL;
+    }
+
+    ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
+    buffer->buft                 = buft;
+    buffer->iface.free_buffer    = ggml_backend_cpu_hbm_buffer_free_buffer;
+
+    return buffer;
+}
+
+ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
+    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
+        /* .iface    = */ {
+                           /* .get_name         = */ ggml_backend_cpu_hbm_buffer_type_get_name,
+                           /* .alloc_buffer     = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
+                           /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment,
+                           /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
+                           /* .get_alloc_size   = */ nullptr,  // defaults to ggml_nbytes
+                           /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host,
+                           },
+        /* .context  = */ nullptr,
+    };
+
+    return &ggml_backend_cpu_buffer_type_hbm;
+}
+#endif
--- a/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
+++ b/packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "ggml-backend.h"
+#include "ggml.h"
+
+// GGML CPU internal header
+
+ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
--- a/Show More
+++ b/Show More