You've already forked joplin
							
							
				mirror of
				https://github.com/laurent22/joplin.git
				synced 2025-10-31 00:07:48 +02:00 
			
		
		
		
	Android: Switch default library used for Whisper voice typing (#11881)
This commit is contained in:
		| @@ -811,12 +811,11 @@ packages/app-mobile/services/e2ee/crypto.js | ||||
| packages/app-mobile/services/plugins/PlatformImplementation.js | ||||
| packages/app-mobile/services/profiles/index.js | ||||
| packages/app-mobile/services/voiceTyping/VoiceTyping.js | ||||
| packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js | ||||
| packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js | ||||
| packages/app-mobile/services/voiceTyping/utils/unzip.android.js | ||||
| packages/app-mobile/services/voiceTyping/utils/unzip.js | ||||
| packages/app-mobile/services/voiceTyping/vosk.android.js | ||||
| packages/app-mobile/services/voiceTyping/vosk.js | ||||
| packages/app-mobile/services/voiceTyping/whisper.test.js | ||||
| packages/app-mobile/services/voiceTyping/whisper.js | ||||
| packages/app-mobile/setupQuickActions.js | ||||
| packages/app-mobile/tools/buildInjectedJs/BundledFile.js | ||||
|   | ||||
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -786,12 +786,11 @@ packages/app-mobile/services/e2ee/crypto.js | ||||
| packages/app-mobile/services/plugins/PlatformImplementation.js | ||||
| packages/app-mobile/services/profiles/index.js | ||||
| packages/app-mobile/services/voiceTyping/VoiceTyping.js | ||||
| packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js | ||||
| packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js | ||||
| packages/app-mobile/services/voiceTyping/utils/unzip.android.js | ||||
| packages/app-mobile/services/voiceTyping/utils/unzip.js | ||||
| packages/app-mobile/services/voiceTyping/vosk.android.js | ||||
| packages/app-mobile/services/voiceTyping/vosk.js | ||||
| packages/app-mobile/services/voiceTyping/whisper.test.js | ||||
| packages/app-mobile/services/voiceTyping/whisper.js | ||||
| packages/app-mobile/setupQuickActions.js | ||||
| packages/app-mobile/tools/buildInjectedJs/BundledFile.js | ||||
|   | ||||
| @@ -33,6 +33,7 @@ | ||||
| 		"/packages/app-desktop/build/", | ||||
| 		"/packages/app-desktop/utils/checkForUpdatesUtilsTestData.ts", | ||||
| 		"/packages/app-desktop/vendor/", | ||||
| 		"/packages/app-mobile/android/vendor/", | ||||
| 		"/packages/app-mobile/ios/Pods/", | ||||
| 		"/packages/app-mobile/lib/rnInjectedJs", | ||||
| 		"/packages/app-mobile/pluginAssets", | ||||
|   | ||||
| @@ -70,6 +70,13 @@ def enableProguardInReleaseBuilds = false | ||||
| def jscFlavor = 'org.webkit:android-jsc:+' | ||||
|  | ||||
| android { | ||||
|  | ||||
|     externalNativeBuild { | ||||
|         cmake { | ||||
|             path file('src/main/cpp/CMakeLists.txt') | ||||
|             version '3.22.1' | ||||
|         } | ||||
|     } | ||||
|     ndkVersion rootProject.ext.ndkVersion | ||||
|     buildToolsVersion rootProject.ext.buildToolsVersion | ||||
|     compileSdk rootProject.ext.compileSdkVersion | ||||
| @@ -81,12 +88,17 @@ android { | ||||
|         targetSdkVersion rootProject.ext.targetSdkVersion | ||||
| 		versionCode 2097764 | ||||
| 		versionName "3.3.1" | ||||
| 		ndk { | ||||
| 			abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64" | ||||
| 		} | ||||
|         ndk { | ||||
|             abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64" | ||||
|         } | ||||
|  | ||||
|         // Needed to fix: The number of method references in a .dex file cannot exceed 64K | ||||
|         multiDexEnabled true | ||||
|         externalNativeBuild { | ||||
|             cmake { | ||||
|                 cppFlags '-DCMAKE_BUILD_TYPE=Release' | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     signingConfigs { | ||||
|         debug { | ||||
| @@ -95,14 +107,14 @@ android { | ||||
|             keyAlias 'androiddebugkey' | ||||
|             keyPassword 'android' | ||||
|         } | ||||
| 		release { | ||||
| 			if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) { | ||||
| 				storeFile file(JOPLIN_RELEASE_STORE_FILE) | ||||
| 				storePassword JOPLIN_RELEASE_STORE_PASSWORD | ||||
| 				keyAlias JOPLIN_RELEASE_KEY_ALIAS | ||||
| 				keyPassword JOPLIN_RELEASE_KEY_PASSWORD | ||||
| 			} | ||||
| 		} | ||||
|         release { | ||||
|             if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) { | ||||
|                 storeFile file(JOPLIN_RELEASE_STORE_FILE) | ||||
|                 storePassword JOPLIN_RELEASE_STORE_PASSWORD | ||||
|                 keyAlias JOPLIN_RELEASE_KEY_ALIAS | ||||
|                 keyPassword JOPLIN_RELEASE_KEY_PASSWORD | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     buildTypes { | ||||
|         debug { | ||||
| @@ -127,10 +139,6 @@ dependencies { | ||||
|     } else { | ||||
|         implementation jscFlavor | ||||
|     } | ||||
|  | ||||
|     // Needed for Whisper speech-to-text | ||||
|     implementation 'com.microsoft.onnxruntime:onnxruntime-android:latest.release' | ||||
|     implementation 'com.microsoft.onnxruntime:onnxruntime-extensions-android:latest.release' | ||||
| } | ||||
|  | ||||
| apply from: file("../../node_modules/@react-native-community/cli-platform-android/native_modules.gradle"); applyNativeModulesAppBuildGradle(project) | ||||
|   | ||||
							
								
								
									
										64
									
								
								packages/app-mobile/android/app/src/main/cpp/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								packages/app-mobile/android/app/src/main/cpp/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
|  | ||||
| # For more information about using CMake with Android Studio, read the | ||||
| # documentation: https://d.android.com/studio/projects/add-native-code.html. | ||||
| # For more examples on how to use CMake, see https://github.com/android/ndk-samples. | ||||
|  | ||||
| # Sets the minimum CMake version required for this project. | ||||
| cmake_minimum_required(VERSION 3.22.1) | ||||
|  | ||||
| # Declares the project name. The project name can be accessed via ${ PROJECT_NAME}, | ||||
| # Since this is the top level CMakeLists.txt, the project name is also accessible | ||||
| # with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level | ||||
| # build script scope). | ||||
| project("joplin") | ||||
|  | ||||
| # Creates and names a library, sets it as either STATIC | ||||
| # or SHARED, and provides the relative paths to its source code. | ||||
| # You can define multiple libraries, and CMake builds them for you. | ||||
| # Gradle automatically packages shared libraries with your APK. | ||||
| # | ||||
| # In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define | ||||
| # the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME} | ||||
| # is preferred for the same purpose. | ||||
| # | ||||
| # In order to load a library into your app from Java/Kotlin, you must call | ||||
| # System.loadLibrary() and pass the name of the library defined here; | ||||
| # for GameActivity/NativeActivity derived applications, the same library name must be | ||||
| # used in the AndroidManifest.xml file. | ||||
| add_library(${CMAKE_PROJECT_NAME} SHARED | ||||
| 	# List C/C++ source files with relative paths to this CMakeLists.txt. | ||||
| 	whisperWrapper.cpp | ||||
| 	utils/WhisperSession.cpp | ||||
| 	utils/findLongestSilence.cpp | ||||
| 	utils/findLongestSilence_test.cpp | ||||
| ) | ||||
|  | ||||
|  | ||||
|  | ||||
| set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../vendor/whisper.cpp) | ||||
|  | ||||
| # Based on the Whisper.cpp Android example: | ||||
| set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -O3 ") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections") | ||||
|  | ||||
| # Whisper: See https://stackoverflow.com/a/76290722 | ||||
| add_subdirectory(${WHISPER_LIB_DIR} ./whisper) | ||||
|  | ||||
| # Directories for header files | ||||
| target_include_directories( | ||||
| 	${CMAKE_PROJECT_NAME} | ||||
| 	PUBLIC | ||||
| 	${PROJECT_BASE_DIR}/shared | ||||
| 	${WHISPER_LIB_DIR}/include | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Specifies libraries CMake should link to your target library. You | ||||
| # can link libraries from various origins, such as libraries defined in this | ||||
| # build script, prebuilt third-party libraries, or Android system libraries. | ||||
| target_link_libraries(${CMAKE_PROJECT_NAME} | ||||
| 	whisper | ||||
| 	# List libraries link to the target library | ||||
| 	android | ||||
| 	log | ||||
| ) | ||||
| @@ -0,0 +1,154 @@ | ||||
| #include "WhisperSession.h" | ||||
|  | ||||
| #include <utility> | ||||
| #include <sstream> | ||||
| #include <algorithm> | ||||
| #include "whisper.h" | ||||
| #include "findLongestSilence.h" | ||||
| #include "androidUtil.h" | ||||
|  | ||||
| WhisperSession::WhisperSession(const std::string& modelPath, std::string lang, std::string prompt) | ||||
| 	: lang_ {std::move(lang)}, prompt_ {std::move(prompt)} { | ||||
| 	whisper_context_params contextParams = whisper_context_default_params(); | ||||
|  | ||||
| 	// Lifetime(pModelPath): Whisper.cpp creates a copy of pModelPath and stores it in a std::string. | ||||
| 	// whisper_init_from_file_with_params doesn't seem to otherwise save pModelPath. As such, it's | ||||
| 	// safe to pass a pointer to a std::string's representation: | ||||
| 	const char *pModelPath = modelPath.c_str(); | ||||
| 	pContext_ = whisper_init_from_file_with_params(pModelPath, contextParams); | ||||
|  | ||||
| 	if (pContext_ == nullptr) { | ||||
| 		throw std::runtime_error("Unable to initialize the Whisper context."); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| WhisperSession::~WhisperSession() { | ||||
| 	if (pContext_ != nullptr) { | ||||
| 		whisper_free(pContext_); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| whisper_full_params | ||||
| WhisperSession::buildWhisperParams_() { | ||||
| 	whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); | ||||
| 	// WHISPER_SAMPLING_BEAM_SEARCH is an alternative to greedy: | ||||
| 	// params.beam_search = { .beam_size = 2 }; | ||||
| 	params.print_realtime = false; | ||||
|     // Disable timestamps: They make creating custom Whisper models more difficult: | ||||
| 	params.print_timestamps = false; | ||||
|     params.no_timestamps = true; | ||||
|  | ||||
| 	params.print_progress = false; | ||||
| 	params.translate = false; | ||||
| 	params.offset_ms = 0; | ||||
| 	params.single_segment = true; | ||||
| 	// Avoid non-speech tokens (e.g. "(crackle)"). For now, this is disabled because it seems to | ||||
| 	// cause increased hallucinations (e.g. repeated "Thank you"s). | ||||
| 	// params.suppress_nst = true; | ||||
| 	params.temperature = 0; // Initial randomness | ||||
| 	// There's also a temperature_inc variable, which is used when decoding fails (Whisper increases | ||||
| 	// the temperature by temperature_inc and retries). | ||||
|  | ||||
| 	// Following the whisper streaming example in setting prompt_tokens to nullptr | ||||
| 	// when using VAD (Voice Activity Detection) | ||||
| 	params.initial_prompt = prompt_.c_str(); | ||||
| 	params.prompt_tokens = nullptr; | ||||
| 	params.prompt_n_tokens = 0; | ||||
|  | ||||
| 	// Lifetime: lifetime(params) < lifetime(lang_) = lifetime(this). | ||||
| 	params.language = lang_.c_str(); | ||||
|  | ||||
| 	return params; | ||||
| } | ||||
|  | ||||
| std::string | ||||
| WhisperSession::transcribe_(const std::vector<float>& audio, size_t transcribeCount) { | ||||
| 	int minTranscribeLength = WHISPER_SAMPLE_RATE / 2; // 0.5s | ||||
| 	if (transcribeCount < minTranscribeLength) { | ||||
| 		return ""; | ||||
| 	} | ||||
|  | ||||
| 	whisper_full_params params = buildWhisperParams_(); | ||||
| 	whisper_reset_timings(pContext_); | ||||
|  | ||||
| 	transcribeCount = std::min(audio.size(), transcribeCount); | ||||
|  | ||||
| 	if (whisper_full(pContext_, params, audio.data(), transcribeCount) != 0) { | ||||
| 		throw std::runtime_error("Failed to run Whisper (non-zero exit status)."); | ||||
| 	} else { | ||||
| 		whisper_print_timings(pContext_); | ||||
| 	} | ||||
|  | ||||
| 	// Tokens to be used as a prompt for the next run of Whisper | ||||
| 	unsigned int segmentCount = whisper_full_n_segments(pContext_); | ||||
|  | ||||
| 	// Build the results | ||||
| 	std::stringstream results; | ||||
| 	for (int i = 0; i < segmentCount; i++) { | ||||
| 		results << " " << whisper_full_get_segment_text(pContext_, i); | ||||
| 	} | ||||
|  | ||||
| 	std::string result = results.str(); | ||||
| 	LOGD("Transcribed: %s (audio len %.2f)", result.c_str(), audio.size() / (float) WHISPER_SAMPLE_RATE); | ||||
|  | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| std::string | ||||
| WhisperSession::splitAndTranscribeBefore_(int transcribeUpTo, int trimTo) { | ||||
| 	std::string result = transcribe_(audioBuffer_, transcribeUpTo); | ||||
|  | ||||
| 	// Trim | ||||
| 	LOGI("Trim to %.2f s, transcribe to %.2f s", (float) trimTo / WHISPER_SAMPLE_RATE, (float) transcribeUpTo / WHISPER_SAMPLE_RATE); | ||||
| 	audioBuffer_ = std::vector(audioBuffer_.begin() + trimTo, audioBuffer_.end()); | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| std::string | ||||
| WhisperSession::transcribeNextChunk(const float *pAudio, int sizeAudio) { | ||||
| 	std::string finalizedContent; | ||||
|  | ||||
| 	// Update the local audio buffer | ||||
| 	for (int i = 0; i < sizeAudio; i++) { | ||||
| 		audioBuffer_.push_back(pAudio[i]); | ||||
| 	} | ||||
|  | ||||
| 	// Does the audio buffer need to be split somewhere? | ||||
| 	int maximumSamples = WHISPER_SAMPLE_RATE * 25; | ||||
| 	if (audioBuffer_.size() >= maximumSamples) { | ||||
| 		float minSilenceSeconds = 0.3f; | ||||
| 		auto silenceRange = findLongestSilence( | ||||
| 			audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples | ||||
| 		); | ||||
|  | ||||
| 		// In this case, the audio is long enough that it needs to be split somewhere. If there's | ||||
| 		// no suitable pause available, default to splitting in the middle. | ||||
| 		int halfBufferSize = audioBuffer_.size() / 2; | ||||
| 		int transcribeTo = silenceRange.isValid ? silenceRange.start : halfBufferSize; | ||||
| 		int trimTo = silenceRange.isValid ? silenceRange.end : halfBufferSize; | ||||
|  | ||||
| 		finalizedContent = splitAndTranscribeBefore_(transcribeTo, trimTo); | ||||
| 	} else if (audioBuffer_.size() > WHISPER_SAMPLE_RATE * 3) { | ||||
| 		// Allow brief pauses to create new paragraphs: | ||||
| 		float minSilenceSeconds = 2.0f; | ||||
| 		auto splitPoint = findLongestSilence( | ||||
| 			audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples | ||||
| 		); | ||||
| 		if (splitPoint.isValid) { | ||||
| 			int tolerance = WHISPER_SAMPLE_RATE / 20; // 0.05s | ||||
| 			bool isCompletelySilent = splitPoint.start < tolerance && splitPoint.end > audioBuffer_.size() - tolerance; | ||||
| 			if (isCompletelySilent) { | ||||
| 				audioBuffer_.clear(); | ||||
| 			} else { | ||||
| 				finalizedContent = splitAndTranscribeBefore_(splitPoint.start, splitPoint.end); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	previewText_ = transcribe_(audioBuffer_, audioBuffer_.size()); | ||||
| 	return finalizedContent; | ||||
| } | ||||
|  | ||||
| std::string WhisperSession::getPreview() { | ||||
| 	return previewText_; | ||||
| } | ||||
| @@ -0,0 +1,27 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <string> | ||||
| #include "whisper.h" | ||||
|  | ||||
| class WhisperSession { | ||||
| public: | ||||
| 	WhisperSession(const std::string& modelPath, std::string lang, std::string prompt); | ||||
| 	~WhisperSession(); | ||||
| 	std::string transcribeNextChunk(const float *pAudio, int sizeAudio); | ||||
| 	std::string getPreview(); | ||||
|  | ||||
| private: | ||||
| 	// Current preview state | ||||
| 	std::string previewText_; | ||||
|  | ||||
| 	whisper_full_params buildWhisperParams_(); | ||||
| 	std::string transcribe_(const std::vector<float>& audio, size_t samplesToTranscribe); | ||||
| 	std::string splitAndTranscribeBefore_(int transcribeUpTo, int trimTo); | ||||
|  | ||||
| 	whisper_context *pContext_; | ||||
| 	const std::string lang_; | ||||
| 	const std::string prompt_; | ||||
|  | ||||
| 	std::vector<float> audioBuffer_; | ||||
| }; | ||||
|  | ||||
| @@ -0,0 +1,10 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <android/log.h> | ||||
|  | ||||
| // Use macros for these rather than functions. Functions generate a "may be unsafe" | ||||
| // warning because the compiler can't check that the first argument is a string | ||||
| // literal. | ||||
| #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, "Whisper::JNI", __VA_ARGS__); | ||||
| #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "Whisper::JNI", __VA_ARGS__); | ||||
| #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "Whisper::JNI", __VA_ARGS__); | ||||
| @@ -0,0 +1,111 @@ | ||||
| #include "findLongestSilence.h" | ||||
| #include "androidUtil.h" | ||||
|  | ||||
| static void highpass(std::vector<float>& data, int sampleRate) { | ||||
| 	// Highpass filter. See https://en.wikipedia.org/wiki/High-pass_filter and | ||||
| 	// the example in whisper.cpp/streaming. | ||||
| 	float highpassCutoffHz = 60.0f; | ||||
| 	float RC = 1.0f / (2 * 3.1416f * highpassCutoffHz); | ||||
| 	float timePerSample = 1.0f / sampleRate; | ||||
| 	float alpha = RC / (RC + timePerSample); | ||||
|  | ||||
| 	float lastInput = data[0]; | ||||
| 	for (int i = 1; i < data.size(); i++) { | ||||
| 		float currentInput = data[i]; | ||||
| 		data[i] = alpha * data[i - 1] + alpha * (currentInput - lastInput); | ||||
| 		lastInput = currentInput; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| SilenceRange findLongestSilence( | ||||
| 	const std::vector<float>& audioData, | ||||
| 	int sampleRate, | ||||
| 	float minSilenceLengthSeconds, | ||||
| 	int maxSilencePosition | ||||
| ) { | ||||
| 	int bestCandidateLength = 0; | ||||
| 	int bestCandidateStart = -1; | ||||
| 	int bestCandidateEnd = -1; | ||||
|  | ||||
| 	int currentCandidateStart = -1; | ||||
|  | ||||
| 	std::vector<float> processedAudio { audioData }; | ||||
| 	highpass(processedAudio, sampleRate); | ||||
|  | ||||
| 	// Break into windows of size `windowSize`: | ||||
| 	int windowSize = 256; | ||||
| 	int windowsPerSecond = sampleRate / windowSize; | ||||
| 	int quietWindows = 0; | ||||
|  | ||||
| 	// Finishes the current candidate for longest silence | ||||
| 	auto finalizeCandidate = [&] (int currentOffset) { | ||||
| 		bool hasCandidate = currentCandidateStart >= 0; | ||||
| 		if (!hasCandidate) { | ||||
| 			return; | ||||
| 		} | ||||
|  | ||||
| 		int currentCandidateLength = currentOffset - currentCandidateStart; | ||||
| 		if (currentCandidateLength > bestCandidateLength && currentCandidateStart <= maxSilencePosition) { | ||||
| 			bestCandidateLength = currentCandidateLength; | ||||
| 			bestCandidateStart = currentCandidateStart; | ||||
| 			bestCandidateEnd = currentOffset; | ||||
| 			LOGD("New best candidate with length %d", currentCandidateLength); | ||||
| 		} | ||||
|  | ||||
| 		currentCandidateStart = -1; | ||||
| 	}; | ||||
|  | ||||
| 	int windowOffset; | ||||
| 	for (windowOffset = 0; windowOffset < processedAudio.size() && windowOffset <= maxSilencePosition; windowOffset += windowSize) { | ||||
| 		int rollingAverageSize = 24; | ||||
| 		float threshold = static_cast<float>(rollingAverageSize) / 80.0f; | ||||
|  | ||||
| 		// Count the number of samples that (when averaged with the nearby samples) | ||||
| 		// are below some threshold value. | ||||
| 		float absSum = 0; | ||||
| 		int silentSamples = 0; | ||||
| 		for (int i = windowOffset; i < windowOffset + windowSize && i < processedAudio.size(); i++) { | ||||
| 			absSum += abs(processedAudio[i]); | ||||
|  | ||||
| 			bool isSumComplete = i - rollingAverageSize >= windowOffset; | ||||
| 			if (isSumComplete) { | ||||
| 				absSum -= abs(processedAudio[i - rollingAverageSize]); | ||||
|  | ||||
| 				if (absSum < threshold) { | ||||
| 					silentSamples++; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// The window should be considered "quiet" if enough samples were below the threshold. | ||||
| 		// Don't require all of them to be to allow clicks and pops. | ||||
| 		if (silentSamples >= windowSize * 3 / 4) { | ||||
| 			quietWindows ++; | ||||
| 		} else { | ||||
| 			quietWindows = 0; | ||||
| 		} | ||||
|  | ||||
| 		int minQuietWindows = static_cast<int>(windowsPerSecond * minSilenceLengthSeconds); | ||||
| 		if (quietWindows >= minQuietWindows && currentCandidateStart == -1) { | ||||
| 			// Found a candidate. Start it. | ||||
| 			currentCandidateStart = windowOffset; | ||||
| 		} else if (quietWindows == 0) { | ||||
| 			// Ended a candidate. Is it better than the best? | ||||
| 			finalizeCandidate(windowOffset); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	finalizeCandidate(windowOffset); | ||||
|  | ||||
| 	// Return the best candidate. | ||||
| 	if (bestCandidateLength == 0) { | ||||
| 		return { .isValid = false, .start = 0, .end = 0 }; | ||||
| 	} else { | ||||
| 		return { | ||||
| 			.isValid=true, | ||||
| 			.start=bestCandidateStart, | ||||
| 			.end=bestCandidateEnd | ||||
| 		}; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -0,0 +1,24 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <vector> | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
|  | ||||
| struct SilenceRange { | ||||
| 	bool isValid; | ||||
| 	int start; | ||||
| 	int end; | ||||
| }; | ||||
|  | ||||
| SilenceRange findLongestSilence( | ||||
| 	const std::vector<float>& audioData, | ||||
| 	int sampleRate, | ||||
|  | ||||
| 	// Minimum length of silence in seconds | ||||
| 	float minSilenceLengthSeconds, | ||||
|  | ||||
| 	// Doesn't check for silence at a position greater than maximumSilenceStart | ||||
| 	int maximumSilenceStart | ||||
| ); | ||||
|  | ||||
|  | ||||
| @@ -0,0 +1,169 @@ | ||||
| #include "findLongestSilence_test.h" | ||||
| #include "findLongestSilence.h" | ||||
| #include "androidUtil.h" | ||||
|  | ||||
| #include <string> | ||||
| #include <vector> | ||||
| #include <sstream> | ||||
| #include <cmath> | ||||
| #include <random> | ||||
|  | ||||
| static void testTones(); | ||||
| static void testToneWithPause(); | ||||
| static void testSilence(); | ||||
| static void testNoise(); | ||||
|  | ||||
| static void fail(const std::string& message); | ||||
|  | ||||
| struct GeneratedAudio { | ||||
| 	std::vector<float> data; | ||||
| 	int sampleRate; | ||||
| 	int sampleCount; | ||||
| }; | ||||
|  | ||||
| using AudioGenerator = std::function<const float(float)>; | ||||
| static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration); | ||||
| static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel); | ||||
| static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel); | ||||
|  | ||||
|  | ||||
| void findLongestSilence_test() { | ||||
| 	testTones(); | ||||
| 	testToneWithPause(); | ||||
| 	testSilence(); | ||||
| 	testNoise(); | ||||
| } | ||||
|  | ||||
|  | ||||
| static void testTones() { | ||||
| 	for (int frequency = 440; frequency < 1600; frequency += 300) { | ||||
| 		std::stringstream messageBuilder; | ||||
| 		messageBuilder << "Should not find silence in tone with frequency " << frequency << " HZ."; | ||||
|  | ||||
| 		auto audioTone = makeAudio([frequency](float t) { | ||||
| 			// Also set the amplitude to 0.2f (to more closely match mic input). | ||||
| 			return std::sin(t * static_cast<float>(frequency)) * 0.2f; | ||||
| 		}, 15000, 10.0f); | ||||
|  | ||||
| 		expectNoSilence(audioTone, messageBuilder.str()); | ||||
| 	} | ||||
|  | ||||
| 	auto lowFrequencyTone = makeAudio([](float t) { | ||||
| 		return std::sin(t * 8) * 0.3f; | ||||
| 	}, 15000, 10.0f); | ||||
| 	expectSilenceBetween(lowFrequencyTone, 0.0f, 10.0f, "Should find silence in a very low-frequency tone"); | ||||
| } | ||||
|  | ||||
| static void testToneWithPause() { | ||||
| 	auto audioToneWithPause = makeAudio([](float t) { | ||||
| 		if (t < 5.0f || t > 6.0f) { | ||||
| 			return std::sin(t * 880); | ||||
| 		} else { | ||||
| 			return 0.0f; | ||||
| 		} | ||||
| 	}, 15000, 11.0f); | ||||
| 	expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region"); | ||||
|  | ||||
| 	auto audioToneWithTwoPauses = makeAudio([](float t) { | ||||
| 		if (t < 1.0f || (t > 8.0f && t < 10.0f)) { | ||||
| 			return 0.0f; | ||||
| 		} else { | ||||
| 			return std::sin(t * 880); | ||||
| 		} | ||||
| 	}, 15000, 20.0f); | ||||
| 	expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region"); | ||||
| } | ||||
|  | ||||
| static void testSilence() { | ||||
| 	auto silence = makeAudio([](float t) { | ||||
| 		return 0.0f; | ||||
| 	}, 16000, 10.0f); | ||||
| 	expectSilenceBetween(silence, 0.0f, 10.0f, "Should find silence in a completely silent signal"); | ||||
| } | ||||
|  | ||||
| static void testNoise() { | ||||
| 	std::minstd_rand randomness {2}; | ||||
| 	std::uniform_real_distribution noiseGenerator {-1.0, 1.0}; | ||||
| 	auto quietNoise = makeAudio([&](float t) { | ||||
| 		return noiseGenerator(randomness) * 0.02f; | ||||
| 	}, 16000, 5.0f); | ||||
| 	expectSilenceBetween(quietNoise, 0.0f, 5.0f, "Should find silence in a tone with low-amplitude noise"); | ||||
| } | ||||
|  | ||||
|  | ||||
| static void fail(const std::string& message) { | ||||
| 	throw std::runtime_error(message); | ||||
| } | ||||
|  | ||||
| static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration) { | ||||
| 	std::vector<float> result { }; | ||||
|  | ||||
| 	int numSamples = static_cast<int>(static_cast<float>(sampleRate) * duration); | ||||
| 	for (int i = 0; i < numSamples; i++) { | ||||
| 		float time = static_cast<float>(i) / static_cast<float>(sampleRate); | ||||
| 		result.push_back(generator(time)); | ||||
| 	} | ||||
|  | ||||
| 	return { | ||||
| 		.data=result, | ||||
| 		.sampleRate=sampleRate, | ||||
| 		.sampleCount=numSamples, | ||||
| 	}; | ||||
| } | ||||
|  | ||||
| static void logTestPass(const std::string& message) { | ||||
| 	LOGI("Test PASS: %s", message.c_str()); | ||||
| } | ||||
|  | ||||
| static float samplesToSeconds(int samples, int sampleRate) { | ||||
| 	return static_cast<float>(samples) / static_cast<float>(sampleRate); | ||||
| } | ||||
|  | ||||
| static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel) { | ||||
| 	auto silence = findLongestSilence( | ||||
| 			audio.data, | ||||
| 			audio.sampleRate, | ||||
| 			0.02f, | ||||
| 			audio.sampleCount | ||||
| 	); | ||||
| 	if (silence.isValid) { | ||||
| 		std::stringstream errorBuilder; | ||||
| 		float startSeconds = samplesToSeconds(silence.start, audio.sampleRate); | ||||
| 		float stopSeconds = samplesToSeconds(silence.end, audio.sampleRate); | ||||
| 		errorBuilder << "Error: Found silence between " << startSeconds << "s and " << stopSeconds << "s"; | ||||
| 		errorBuilder << ": " << testLabel; | ||||
| 		fail(errorBuilder.str()); | ||||
| 	} | ||||
|  | ||||
| 	logTestPass(testLabel); | ||||
| } | ||||
|  | ||||
| static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel) { | ||||
| 	auto silenceResult = findLongestSilence( | ||||
| 			audio.data, | ||||
| 			audio.sampleRate, | ||||
| 			0.02f, | ||||
| 			audio.sampleCount | ||||
| 	); | ||||
|  | ||||
| 	if (!silenceResult.isValid) { | ||||
| 		fail("Error: No silence found: " + testLabel); | ||||
| 	} | ||||
|  | ||||
| 	auto checkEndpoint = [&] (int actualValueSamples, float expectedValueSeconds, const std::string& description) { | ||||
| 		float actualValueSeconds = samplesToSeconds(actualValueSamples, audio.sampleRate); | ||||
| 		float tolerance = 0.1f; // 100ms | ||||
| 		if (std::abs(expectedValueSeconds - actualValueSeconds) > tolerance) { | ||||
| 			std::stringstream messageBuilder; | ||||
| 			messageBuilder << "Error: Silence " << description << " mismatch: "; | ||||
| 			messageBuilder << "got " << actualValueSeconds << "s expected " << expectedValueSeconds << "s. "; | ||||
| 			messageBuilder << testLabel; | ||||
| 			fail(messageBuilder.str()); | ||||
| 		} | ||||
| 	}; | ||||
|  | ||||
| 	checkEndpoint(silenceResult.start, startTimeSeconds, "start time"); | ||||
| 	checkEndpoint(silenceResult.end, stopTimeSeconds, "stop time"); | ||||
|  | ||||
| 	logTestPass(testLabel); | ||||
| } | ||||
| @@ -0,0 +1,3 @@ | ||||
| #pragma once | ||||
|  | ||||
| void findLongestSilence_test(); | ||||
							
								
								
									
										125
									
								
								packages/app-mobile/android/app/src/main/cpp/whisperWrapper.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								packages/app-mobile/android/app/src/main/cpp/whisperWrapper.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,125 @@ | ||||
| // Write C++ code here. | ||||
| // | ||||
| // Do not forget to dynamically load the C++ library into your application. | ||||
| // | ||||
| // For instance, | ||||
| // | ||||
| // In MainActivity.java: | ||||
| //    static { | ||||
| //       System.loadLibrary("joplin"); | ||||
| //    } | ||||
| // | ||||
| // Or, in MainActivity.kt: | ||||
| //    companion object { | ||||
| //      init { | ||||
| //         System.loadLibrary("joplin") | ||||
| //      } | ||||
| //    } | ||||
| #include <jni.h> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <sstream> | ||||
| #include <android/log.h> | ||||
| #include "whisper.h" | ||||
| #include "utils/WhisperSession.h" | ||||
| #include "utils/androidUtil.h" | ||||
| #include "utils/findLongestSilence_test.h" | ||||
|  | ||||
| void log_android(enum ggml_log_level level, const char* message, void* user_data) { | ||||
| 	android_LogPriority priority = level == 4 ? ANDROID_LOG_ERROR : ANDROID_LOG_INFO; | ||||
| 	__android_log_print(priority, "Whisper::JNI::cpp", "%s", message); | ||||
| } | ||||
|  | ||||
| jstring stringToJava(JNIEnv *env, const std::string& source) { | ||||
| 	return env->NewStringUTF(source.c_str()); | ||||
| } | ||||
|  | ||||
| std::string stringToCXX(JNIEnv *env, jstring jString) { | ||||
| 	const char *jStringChars = env->GetStringUTFChars(jString, nullptr); | ||||
| 	std::string result { jStringChars }; | ||||
| 	env->ReleaseStringUTFChars(jString, jStringChars); | ||||
|  | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| void throwException(JNIEnv *env, const std::string& message) { | ||||
| 	jclass errorClass = env->FindClass("java/lang/Exception"); | ||||
| 	env->ThrowNew(errorClass, message.c_str()); | ||||
| } | ||||
|  | ||||
| extern "C" | ||||
| JNIEXPORT jlong JNICALL | ||||
| Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_init( | ||||
| 		JNIEnv *env, | ||||
| 		jobject thiz, | ||||
| 		jstring modelPath, | ||||
| 		jstring language, | ||||
| 		jstring prompt | ||||
| ) { | ||||
| 	whisper_log_set(log_android, nullptr); | ||||
|  | ||||
| 	try { | ||||
| 		auto *pSession = new WhisperSession( | ||||
| 				stringToCXX(env, modelPath), stringToCXX(env, language), stringToCXX(env, prompt) | ||||
| 		); | ||||
| 		return (jlong) pSession; | ||||
| 	} catch (const std::exception& exception) { | ||||
| 		LOGW("Failed to init whisper: %s", exception.what()); | ||||
| 		throwException(env, exception.what()); | ||||
| 		return 0; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| extern "C" | ||||
| JNIEXPORT void JNICALL | ||||
| Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_free(JNIEnv *env, jobject thiz, | ||||
| 																 jlong pointer) { | ||||
| 	std::free(reinterpret_cast<WhisperSession *>(pointer)); | ||||
| } | ||||
|  | ||||
| extern "C" | ||||
| JNIEXPORT jstring JNICALL | ||||
| Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_fullTranscribe(JNIEnv *env, | ||||
| 																		   jobject thiz, | ||||
| 																		   jlong pointer, | ||||
| 																		   jfloatArray audio_data) { | ||||
| 	auto *pSession = reinterpret_cast<WhisperSession *> (pointer); | ||||
| 	jfloat *pAudioData = env->GetFloatArrayElements(audio_data, nullptr); | ||||
| 	jsize lenAudioData = env->GetArrayLength(audio_data); | ||||
| 	std::string result; | ||||
|  | ||||
| 	try { | ||||
| 		LOGD("Starting Whisper, transcribe %d", lenAudioData); | ||||
| 		result = pSession->transcribeNextChunk(pAudioData, lenAudioData); | ||||
| 		auto preview = pSession->getPreview(); | ||||
| 		LOGD("Ran Whisper. Got %s (preview %s)", result.c_str(), preview.c_str()); | ||||
| 	} catch (const std::exception& exception) { | ||||
| 		LOGW("Failed to run whisper: %s", exception.what()); | ||||
| 		throwException(env, exception.what()); | ||||
| 	} | ||||
|  | ||||
| 	// JNI_ABORT: "free the buffer without copying back the possible changes", pass 0 to copy | ||||
| 	// changes (there should be no changes) | ||||
| 	env->ReleaseFloatArrayElements(audio_data, pAudioData, JNI_ABORT); | ||||
|  | ||||
| 	return stringToJava(env, result); | ||||
| } | ||||
| extern "C" | ||||
| JNIEXPORT jstring JNICALL | ||||
| Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_getPreview( | ||||
| 		JNIEnv *env, jobject thiz, jlong pointer | ||||
| ) { | ||||
| 	auto *pSession = reinterpret_cast<WhisperSession *> (pointer); | ||||
| 	return stringToJava(env, pSession->getPreview()); | ||||
| } | ||||
|  | ||||
| extern "C" | ||||
| JNIEXPORT void JNICALL | ||||
| Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_runTests(JNIEnv *env, jobject thiz) { | ||||
| 	try { | ||||
| 		findLongestSilence_test(); | ||||
| 	} catch (const std::exception& exception) { | ||||
| 		LOGW("Failed to run tests: %s", exception.what()); | ||||
| 		throwException(env, exception.what()); | ||||
| 	} | ||||
| } | ||||
| @@ -21,7 +21,7 @@ class AudioRecorder(context: Context) : Closeable { | ||||
| 	private var bufferWriteOffset = 0 | ||||
|  | ||||
| 	// Accessor must not modify result | ||||
| 	val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset) | ||||
| 	private val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset) | ||||
| 	val bufferLengthSeconds: Double get() = bufferWriteOffset.toDouble() / sampleRate | ||||
|  | ||||
| 	init { | ||||
| @@ -74,11 +74,16 @@ class AudioRecorder(context: Context) : Closeable { | ||||
| 	} | ||||
|  | ||||
| 	// Pulls all available data from the audio recorder's buffer | ||||
| 	fun pullAvailable() { | ||||
| 		return read(maxBufferSize, AudioRecord.READ_NON_BLOCKING) | ||||
| 	fun pullAvailable(): FloatArray { | ||||
| 		read(maxBufferSize, AudioRecord.READ_NON_BLOCKING) | ||||
|  | ||||
| 		val result = bufferedData | ||||
| 		buffer.fill(0.0f, 0, maxBufferSize); | ||||
| 		bufferWriteOffset = 0 | ||||
| 		return result | ||||
| 	} | ||||
|  | ||||
| 	fun pullNextSeconds(seconds: Double) { | ||||
| 	fun pullNextSeconds(seconds: Double):FloatArray { | ||||
| 		val remainingSize = maxBufferSize - bufferWriteOffset | ||||
| 		val requestedSize = (seconds * sampleRate).toInt() | ||||
|  | ||||
| @@ -87,7 +92,8 @@ class AudioRecorder(context: Context) : Closeable { | ||||
| 			advanceStartBySamples(maxBufferSize / 3) | ||||
| 		} | ||||
|  | ||||
| 		return read(requestedSize, AudioRecord.READ_BLOCKING) | ||||
| 		read(requestedSize, AudioRecord.READ_BLOCKING) | ||||
| 		return pullAvailable() | ||||
| 	} | ||||
|  | ||||
| 	override fun close() { | ||||
|   | ||||
| @@ -0,0 +1,54 @@ | ||||
| package net.cozic.joplin.audio | ||||
|  | ||||
| import java.io.Closeable | ||||
|  | ||||
| class NativeWhisperLib( | ||||
| 	modelPath: String, | ||||
| 	languageCode: String, | ||||
| 	prompt: String, | ||||
| ) : Closeable { | ||||
| 	companion object { | ||||
| 		init { | ||||
| 			System.loadLibrary("joplin") | ||||
| 		} | ||||
|  | ||||
| 		external fun runTests(): Unit; | ||||
|  | ||||
| 		// TODO: The example whisper.cpp project transfers pointers as Longs to the Kotlin code. | ||||
| 		// This seems unsafe. Try changing how this is managed. | ||||
| 		private external fun init(modelPath: String, languageCode: String, prompt: String): Long; | ||||
| 		private external fun free(pointer: Long): Unit; | ||||
|  | ||||
| 		private external fun fullTranscribe(pointer: Long, audioData: FloatArray): String; | ||||
| 		private external fun getPreview(pointer: Long): String; | ||||
| 	} | ||||
|  | ||||
| 	private var closed = false | ||||
| 	private val pointer: Long = init(modelPath, languageCode, prompt) | ||||
|  | ||||
| 	fun transcribe(audioData: FloatArray): String { | ||||
| 		if (closed) { | ||||
| 			throw Exception("Cannot transcribe using a closed session") | ||||
| 		} | ||||
|  | ||||
| 		return fullTranscribe(pointer, audioData) | ||||
| 	} | ||||
|  | ||||
| 	fun getPreview(): String { | ||||
| 		if (closed) { | ||||
| 			throw Exception("Cannot get preview from a closed session") | ||||
| 		} | ||||
|  | ||||
| 		return getPreview(pointer) | ||||
| 	} | ||||
|  | ||||
| 	override fun close() { | ||||
| 		if (closed) { | ||||
| 			throw Exception("Cannot close a whisper session twice") | ||||
| 		} | ||||
|  | ||||
| 		closed = true | ||||
| 		free(pointer) | ||||
| 	} | ||||
|  | ||||
| } | ||||
| @@ -1,110 +1,33 @@ | ||||
| package net.cozic.joplin.audio | ||||
|  | ||||
| import ai.onnxruntime.OnnxTensor | ||||
| import ai.onnxruntime.OrtEnvironment | ||||
| import ai.onnxruntime.OrtSession | ||||
| import ai.onnxruntime.extensions.OrtxPackage | ||||
| import android.annotation.SuppressLint | ||||
| import android.content.Context | ||||
| import android.util.Log | ||||
| import java.io.Closeable | ||||
| import java.nio.FloatBuffer | ||||
| import java.nio.IntBuffer | ||||
| import kotlin.time.DurationUnit | ||||
| import kotlin.time.measureTimedValue | ||||
|  | ||||
| class SpeechToTextConverter( | ||||
| 	modelPath: String, | ||||
| 	locale: String, | ||||
| 	prompt: String, | ||||
| 	recorderFactory: AudioRecorderFactory, | ||||
| 	private val environment: OrtEnvironment, | ||||
| 	context: Context, | ||||
| ) : Closeable { | ||||
| 	private val recorder = recorderFactory(context) | ||||
| 	private val session: OrtSession = environment.createSession( | ||||
| 		modelPath, | ||||
| 		OrtSession.SessionOptions().apply { | ||||
| 			// Needed for audio decoding | ||||
| 			registerCustomOpLibrary(OrtxPackage.getLibraryPath()) | ||||
| 		}, | ||||
| 	) | ||||
| 	private val languageCode = Regex("_.*").replace(locale, "") | ||||
| 	private val decoderInputIds = when (languageCode) { | ||||
| 		// Add 50363 to the end to omit timestamps | ||||
| 		"en" -> intArrayOf(50258, 50259, 50359) | ||||
| 		"fr" -> intArrayOf(50258, 50265, 50359) | ||||
| 		"es" -> intArrayOf(50258, 50262, 50359) | ||||
| 		"de" -> intArrayOf(50258, 50261, 50359) | ||||
| 		"it" -> intArrayOf(50258, 50274, 50359) | ||||
| 		"nl" -> intArrayOf(50258, 50271, 50359) | ||||
| 		"ko" -> intArrayOf(50258, 50264, 50359) | ||||
| 		"th" -> intArrayOf(50258, 50289, 50359) | ||||
| 		"ru" -> intArrayOf(50258, 50263, 50359) | ||||
| 		"pt" -> intArrayOf(50258, 50267, 50359) | ||||
| 		"pl" -> intArrayOf(50258, 50269, 50359) | ||||
| 		"id" -> intArrayOf(50258, 50275, 50359) | ||||
| 		"hi" -> intArrayOf(50258, 50276, 50359) | ||||
| 		// Let Whisper guess the language | ||||
| 		else -> intArrayOf(50258) | ||||
| 	} | ||||
| 	private var whisper = NativeWhisperLib( | ||||
| 		modelPath, | ||||
| 		languageCode, | ||||
| 		prompt, | ||||
| 	) | ||||
|  | ||||
| 	fun start() { | ||||
| 		recorder.start() | ||||
| 	} | ||||
|  | ||||
| 	private fun getInputs(data: FloatArray): MutableMap<String, OnnxTensor> { | ||||
| 		fun intTensor(value: Int) = OnnxTensor.createTensor( | ||||
| 			environment, | ||||
| 			IntBuffer.wrap(intArrayOf(value)), | ||||
| 			longArrayOf(1), | ||||
| 		) | ||||
| 		fun floatTensor(value: Float) = OnnxTensor.createTensor( | ||||
| 			environment, | ||||
| 			FloatBuffer.wrap(floatArrayOf(value)), | ||||
| 			longArrayOf(1), | ||||
| 		) | ||||
| 		val audioPcmTensor = OnnxTensor.createTensor( | ||||
| 			environment, | ||||
| 			FloatBuffer.wrap(data), | ||||
| 			longArrayOf(1, data.size.toLong()), | ||||
| 		) | ||||
| 		val decoderInputIdsTensor = OnnxTensor.createTensor( | ||||
| 			environment, | ||||
| 			IntBuffer.wrap(decoderInputIds), | ||||
| 			longArrayOf(1, decoderInputIds.size.toLong()) | ||||
| 		) | ||||
|  | ||||
| 		return mutableMapOf( | ||||
| 			"audio_pcm" to audioPcmTensor, | ||||
| 			"max_length" to intTensor(412), | ||||
| 			"min_length" to intTensor(0), | ||||
| 			"num_return_sequences" to intTensor(1), | ||||
| 			"num_beams" to intTensor(1), | ||||
| 			"length_penalty" to floatTensor(1.1f), | ||||
| 			"repetition_penalty" to floatTensor(3f), | ||||
| 			"decoder_input_ids" to decoderInputIdsTensor, | ||||
|  | ||||
| 			// Required for timestamps | ||||
| 			"logits_processor" to intTensor(1) | ||||
| 		) | ||||
| 	} | ||||
|  | ||||
| 	// TODO .get() fails on older Android versions | ||||
| 	@SuppressLint("NewApi") | ||||
| 	private fun convert(data: FloatArray): String { | ||||
| 		val (inputs, convertInputsTime) = measureTimedValue { | ||||
| 			getInputs(data) | ||||
| 		} | ||||
| 		val (outputs, getOutputsTime) = measureTimedValue { | ||||
| 			session.run(inputs, setOf("str")) | ||||
| 		} | ||||
| 		val mainOutput = outputs.get("str").get().value as Array<Array<String>> | ||||
| 		outputs.close() | ||||
|  | ||||
| 		Log.i("Whisper", "Converted ${data.size / 16000}s of data in ${ | ||||
| 			getOutputsTime.toString(DurationUnit.SECONDS, 2) | ||||
| 		} converted inputs in ${convertInputsTime.inWholeMilliseconds}ms") | ||||
| 		return mainOutput[0][0] | ||||
| 		Log.d("Whisper", "Pre-transcribe data of size ${data.size}") | ||||
| 		val result = whisper.transcribe(data) | ||||
| 		Log.d("Whisper", "Post transcribe. Got $result") | ||||
| 		return result; | ||||
| 	} | ||||
|  | ||||
| 	fun dropFirstSeconds(seconds: Double) { | ||||
| @@ -114,23 +37,26 @@ class SpeechToTextConverter( | ||||
|  | ||||
| 	val bufferLengthSeconds: Double get() = recorder.bufferLengthSeconds | ||||
|  | ||||
| 	fun expandBufferAndConvert(seconds: Double): String { | ||||
| 		recorder.pullNextSeconds(seconds) | ||||
| 		// Also pull any extra available data, in case the speech-to-text converter | ||||
| 		// is lagging behind the audio recorder. | ||||
| 		recorder.pullAvailable() | ||||
|  | ||||
| 		return convert(recorder.bufferedData) | ||||
| 	fun convertNext(seconds: Double): String { | ||||
| 		val buffer = recorder.pullNextSeconds(seconds) | ||||
| 		val result = convert(buffer) | ||||
| 		dropFirstSeconds(seconds) | ||||
| 		return result | ||||
| 	} | ||||
|  | ||||
| 	// Converts as many seconds of buffered data as possible, without waiting | ||||
| 	fun expandBufferAndConvert(): String { | ||||
| 		recorder.pullAvailable() | ||||
| 		return convert(recorder.bufferedData) | ||||
| 	fun convertRemaining(): String { | ||||
| 		val buffer = recorder.pullAvailable() | ||||
| 		return convert(buffer) | ||||
| 	} | ||||
|  | ||||
| 	fun getPreview(): String { | ||||
| 		return whisper.getPreview() | ||||
| 	} | ||||
|  | ||||
| 	override fun close() { | ||||
| 		Log.d("Whisper", "Close") | ||||
| 		recorder.close() | ||||
| 		session.close() | ||||
| 		whisper.close() | ||||
| 	} | ||||
| } | ||||
| @@ -1,6 +1,5 @@ | ||||
| package net.cozic.joplin.audio | ||||
|  | ||||
| import ai.onnxruntime.OrtEnvironment | ||||
| import com.facebook.react.ReactPackage | ||||
| import com.facebook.react.bridge.LifecycleEventListener | ||||
| import com.facebook.react.bridge.NativeModule | ||||
| @@ -24,7 +23,6 @@ class SpeechToTextPackage : ReactPackage { | ||||
| 	class SpeechToTextModule( | ||||
| 		private var context: ReactApplicationContext, | ||||
| 	) : ReactContextBaseJavaModule(context), LifecycleEventListener { | ||||
| 		private var environment: OrtEnvironment? = null | ||||
| 		private val executorService: ExecutorService = Executors.newFixedThreadPool(1) | ||||
| 		private val sessionManager = SpeechToTextSessionManager(executorService) | ||||
|  | ||||
| @@ -32,21 +30,24 @@ class SpeechToTextPackage : ReactPackage { | ||||
|  | ||||
| 		override fun onHostResume() { } | ||||
| 		override fun onHostPause() { } | ||||
| 		override fun onHostDestroy() { | ||||
| 			environment?.close() | ||||
| 		override fun onHostDestroy() { } | ||||
|  | ||||
| 		@ReactMethod | ||||
| 		fun runTests(promise: Promise) { | ||||
| 			try { | ||||
| 				NativeWhisperLib.runTests() | ||||
| 				promise.resolve(true) | ||||
| 			} catch (exception: Throwable) { | ||||
| 				promise.reject(exception) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		@ReactMethod | ||||
| 		fun openSession(modelPath: String, locale: String, promise: Promise) { | ||||
| 		fun openSession(modelPath: String, locale: String, prompt: String, promise: Promise) { | ||||
| 			val appContext = context.applicationContext | ||||
| 			// Initialize environment as late as possible: | ||||
| 			val ortEnvironment = environment ?: OrtEnvironment.getEnvironment() | ||||
| 			if (environment != null) { | ||||
| 				environment = ortEnvironment | ||||
| 			} | ||||
|  | ||||
| 			try { | ||||
| 				val sessionId = sessionManager.openSession(modelPath, locale, ortEnvironment, appContext) | ||||
| 				val sessionId = sessionManager.openSession(modelPath, locale, prompt, appContext) | ||||
| 				promise.resolve(sessionId) | ||||
| 			} catch (exception: Throwable) { | ||||
| 				promise.reject(exception) | ||||
| @@ -69,8 +70,8 @@ class SpeechToTextPackage : ReactPackage { | ||||
| 		} | ||||
|  | ||||
| 		@ReactMethod | ||||
| 		fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) { | ||||
| 			sessionManager.expandBufferAndConvert(sessionId, duration, promise) | ||||
| 		fun convertNext(sessionId: Int, duration: Double, promise: Promise) { | ||||
| 			sessionManager.convertNext(sessionId, duration, promise) | ||||
| 		} | ||||
|  | ||||
| 		@ReactMethod | ||||
| @@ -78,6 +79,11 @@ class SpeechToTextPackage : ReactPackage { | ||||
| 			sessionManager.convertAvailable(sessionId, promise) | ||||
| 		} | ||||
|  | ||||
| 		@ReactMethod | ||||
| 		fun getPreview(sessionId: Int, promise: Promise) { | ||||
| 			sessionManager.getPreview(sessionId, promise) | ||||
| 		} | ||||
|  | ||||
| 		@ReactMethod | ||||
| 		fun closeSession(sessionId: Int, promise: Promise) { | ||||
| 			sessionManager.closeSession(sessionId, promise) | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| package net.cozic.joplin.audio | ||||
|  | ||||
| import ai.onnxruntime.OrtEnvironment | ||||
| import android.content.Context | ||||
| import com.facebook.react.bridge.Promise | ||||
| import java.util.concurrent.Executor | ||||
| @@ -21,13 +20,13 @@ class SpeechToTextSessionManager( | ||||
| 	fun openSession( | ||||
| 		modelPath: String, | ||||
| 		locale: String, | ||||
| 		environment: OrtEnvironment, | ||||
| 		prompt: String, | ||||
| 		context: Context, | ||||
| 	): Int { | ||||
| 		val sessionId = nextSessionId++ | ||||
| 		sessions[sessionId] = SpeechToTextSession( | ||||
| 			SpeechToTextConverter( | ||||
| 				modelPath, locale, recorderFactory = AudioRecorder.factory, environment, context, | ||||
| 				modelPath, locale, prompt, recorderFactory = AudioRecorder.factory, context, | ||||
| 			) | ||||
| 		) | ||||
| 		return sessionId | ||||
| @@ -87,9 +86,9 @@ class SpeechToTextSessionManager( | ||||
| 	} | ||||
|  | ||||
| 	// Waits for the next [duration] seconds to become available, then converts | ||||
| 	fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) { | ||||
| 	fun convertNext(sessionId: Int, duration: Double, promise: Promise) { | ||||
| 		this.concurrentWithSession(sessionId, promise::reject) { session -> | ||||
| 			val result = session.converter.expandBufferAndConvert(duration) | ||||
| 			val result = session.converter.convertNext(duration) | ||||
| 			promise.resolve(result) | ||||
| 		} | ||||
| 	} | ||||
| @@ -97,7 +96,14 @@ class SpeechToTextSessionManager( | ||||
| 	// Converts all available recorded data | ||||
| 	fun convertAvailable(sessionId: Int, promise: Promise) { | ||||
| 		this.concurrentWithSession(sessionId, promise::reject) { session -> | ||||
| 			val result = session.converter.expandBufferAndConvert() | ||||
| 			val result = session.converter.convertRemaining() | ||||
| 			promise.resolve(result) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	fun getPreview(sessionId: Int, promise: Promise) { | ||||
| 		this.concurrentWithSession(sessionId, promise::reject) { session -> | ||||
| 			val result = session.converter.getPreview() | ||||
| 			promise.resolve(result) | ||||
| 		} | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										9
									
								
								packages/app-mobile/android/vendor/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								packages/app-mobile/android/vendor/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| whisper.cpp/.gitmodules | ||||
| whisper.cpp/scripts/ | ||||
| whisper.cpp/samples/ | ||||
| whisper.cpp/tests/ | ||||
| whisper.cpp/models/ | ||||
| whisper.cpp/examples/ | ||||
| whisper.cpp/.*/ | ||||
| whisper.cpp/bindings/ | ||||
| whisper.cpp/**/*.Dockerfile | ||||
							
								
								
									
										7
									
								
								packages/app-mobile/android/vendor/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								packages/app-mobile/android/vendor/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| # Vendored Android packages | ||||
|  | ||||
| This directory contains upstream packages that can't be added as direct dependencies (e.g. through `npm`). | ||||
|  | ||||
| ## whisper.cpp | ||||
|  | ||||
| `whisper.cpp` provides voice typing capabilities. It can be updated by replacing the contents of the `whisper.cpp` directory with the latest content from https://github.com/ggerganov/whisper.cpp. To decrease the size of the `whisper.cpp` directory, some files are ignored by the `.gitignore`. | ||||
							
								
								
									
										60
									
								
								packages/app-mobile/android/vendor/whisper.cpp/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								packages/app-mobile/android/vendor/whisper.cpp/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| *.o | ||||
| *.a | ||||
| *.d | ||||
| .cache/ | ||||
| .coreml/ | ||||
| .test/ | ||||
| .venv/ | ||||
| .vs/ | ||||
| .vscode/ | ||||
| .DS_Store | ||||
| .vimspector.json | ||||
| /CMakeSettings.json | ||||
| /talk-llama.dSYM/ | ||||
|  | ||||
| build/ | ||||
| build-*/ | ||||
|  | ||||
| # SPM | ||||
| .build/ | ||||
| .swiftpm | ||||
| *.metallib | ||||
|  | ||||
| ggml-metal-embed.metal | ||||
| ggml-metal-embed.metal.tmp | ||||
|  | ||||
| /main | ||||
| /stream | ||||
| /command | ||||
| /talk | ||||
| /talk-llama | ||||
| /bench | ||||
| /quantize | ||||
| /server | ||||
| /lsp | ||||
|  | ||||
| arm_neon.h | ||||
| sync.sh | ||||
| libwhisper.a | ||||
| libwhisper.so | ||||
| compile_commands.json | ||||
|  | ||||
| examples/arm_neon.h | ||||
| examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata | ||||
| examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/ | ||||
| examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata | ||||
|  | ||||
| extra/bench-gg.txt | ||||
|  | ||||
| models/*.mlmodel | ||||
| models/*.mlmodelc | ||||
| models/*.mlpackage | ||||
| bindings/java/.gradle/ | ||||
| bindings/java/.idea/ | ||||
| .idea/ | ||||
|  | ||||
| benchmark_results.csv | ||||
| cmake-build-debug/ | ||||
| .cxx/ | ||||
| .gradle/ | ||||
| local.properties | ||||
							
								
								
									
										510
									
								
								packages/app-mobile/android/vendor/whisper.cpp/AUTHORS
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										510
									
								
								packages/app-mobile/android/vendor/whisper.cpp/AUTHORS
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,510 @@ | ||||
| # date: Tue Feb  4 13:03:35 EET 2025 | ||||
| # this file is auto-generated by scripts/gen-authors.sh | ||||
|  | ||||
| 0/0 <zero@imaskeleton.me> | ||||
| 0cc4m <picard12@live.de> | ||||
| 0xsourcecode <134374803+0xsourcecode@users.noreply.github.com> | ||||
| 65a <10104049+65a@users.noreply.github.com> | ||||
| AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com> | ||||
| AT <manyoso@users.noreply.github.com> | ||||
| Aarni Koskela <akx@iki.fi> | ||||
| Aaron Pham <29749331+aarnphm@users.noreply.github.com> | ||||
| Aaron Taylor <aaron@exphat.com> | ||||
| Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> | ||||
| Abitofevrything <54505189+abitofevrything@users.noreply.github.com> | ||||
| Adam Jones <domdomegg+git@gmail.com> | ||||
| Adrien Gallouët <adrien@gallouet.fr> | ||||
| Adrien Gallouët <angt@huggingface.co> | ||||
| AfryMask <AfryMask@163.com> | ||||
| Ahmad Bilal <ahmad.bilal@empglabs.com> | ||||
| Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> | ||||
| AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> | ||||
| AidanBeltonS <aidan.belton@codeplay.com> | ||||
| Akarshan Biswas <akarshan.biswas@gmail.com> | ||||
| Akarshan Biswas <akarshanbiswas@fedoraproject.org> | ||||
| Akash Mahajan <akash7190@gmail.com> | ||||
| Akash Mahajan <akashmjn@stanford.edu> | ||||
| Al Hoang <3811822-hoanga@users.noreply.gitlab.com> | ||||
| Alan <unknown> | ||||
| Albert Jin <albert.jin@gmail.com> | ||||
| Alberto Cabrera Pérez <alberto.cabrera@codeplay.com> | ||||
| Alberto Cabrera Pérez <alberto.cabrera@intel.com> | ||||
| Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com> | ||||
| Alex Azarov <alex@azarov.by> | ||||
| Alex Bacart <13940752+alex-bacart@users.noreply.github.com> | ||||
| Alex Evgrashin <aevgrashin@yandex.ru> | ||||
| Alex O'Connell <35843486+acon96@users.noreply.github.com> | ||||
| Alexandr Graschenkov <alexandr.graschenkov91@gmail.com> | ||||
| Alexandru Mariuti <alex@mariuti.com> | ||||
| Alexey Kharlamov <alexey@kharlamov.biz> | ||||
| Alfredo Montesinos <alfredo.montesinos@g.austincc.edu> | ||||
| Ali Alameh <ali.alameh@isae.edu.lb> | ||||
| Alter <0x7c48@gmail.com> | ||||
| Ananta Bastola <anantarajbastola@gmail.com> | ||||
| Andreas Kieslinger <47689530+aendk@users.noreply.github.com> | ||||
| Andreas Lubbe <git@lubbe.org> | ||||
| Andreu Huguet <andreuhuguet@gmail.com> | ||||
| Andrew Huynh <a5thuynh@gmail.com> | ||||
| Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> | ||||
| Andrew S <andrews54757@gmail.com> | ||||
| Andy Maloney <asmaloney@gmail.com> | ||||
| Anton Kostin <masguit42@users.noreply.github.com> | ||||
| Artyom Mezin <psycho.fading@gmail.com> | ||||
| Asad Memon <asad.lionpk@gmail.com> | ||||
| Ashraful Islam <ashraful.meche@gmail.com> | ||||
| AsukaMinato <asukaminato@nyan.eu.org> | ||||
| AustinMroz <austinmroz@utexas.edu> | ||||
| Avik Sengupta <avik@sengupta.net> | ||||
| Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com> | ||||
| Baffin Lee <baffinlee@gmail.com> | ||||
| Ben Ashbaugh <ben.ashbaugh@intel.com> | ||||
| Ben Nortier <bjnortier@gmail.com> | ||||
| Benjamin Heiniger <benjamin.heiniger@bluewin.ch> | ||||
| Bernhard M. Wiedemann <githubbmwprimary@lsmod.de> | ||||
| Binozo <70137898+Binozo@users.noreply.github.com> | ||||
| Bo-Yi Wu <appleboy.tw@gmail.com> | ||||
| Boris Bliznioukov <blib@mail.com> | ||||
| Borislav Stanimirov <b.stanimirov@abv.bg> | ||||
| Brad Murray <59848399+bradmurray-dt@users.noreply.github.com> | ||||
| Brian Murray <brian@bmurray.ca> | ||||
| CRD716 <crd716@gmail.com> | ||||
| Canis Lupus <Canis-UK@users.noreply.github.com> | ||||
| Carlos Zoido <mrgalleta@gmail.com> | ||||
| Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> | ||||
| CarterLi999 <664681047@qq.com> | ||||
| ChangSeok Oh <shivamidow@users.noreply.github.com> | ||||
| Changyeon Kim <cyzero.kim@samsung.com> | ||||
| Chaoqun <27287694+OpenWaygate@users.noreply.github.com> | ||||
| Charles Xu <63788048+chaxu01@users.noreply.github.com> | ||||
| Charles Xu <charles.xu@arm.com> | ||||
| Chen Xi <xi2.chen@intel.com> | ||||
| Chen Xi <xixichen08@foxmail.com> | ||||
| Chenguang Li <87689256+noemotiovon@users.noreply.github.com> | ||||
| Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com> | ||||
| Chidi Williams <williamschidi1@gmail.com> | ||||
| Chris Elrod <elrodc@gmail.com> | ||||
| Christian <12550267+iceychris@users.noreply.github.com> | ||||
| Christian Kastner <ckk@kvr.at> | ||||
| Clifford Heath <clifford.heath@gmail.com> | ||||
| Clint Herron <hanclinto@gmail.com> | ||||
| Colin <github@whoisc.cc> | ||||
| Conrad Kramer <conrad@conradkramer.com> | ||||
| Corey Earwood <iamcgn+github@gmail.com> | ||||
| CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> | ||||
| DAN™ <dranger003@gmail.com> | ||||
| DGdev91 <DGdev91@users.noreply.github.com> | ||||
| Damian Czaja <trojan295@protonmail.com> | ||||
| Dan Johansson <164997844+eddnjjn@users.noreply.github.com> | ||||
| Dan Johansson <dan.johansson@arm.com> | ||||
| Daniel Bevenius <daniel.bevenius@gmail.com> | ||||
| Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com> | ||||
| Daniel Ziegenberg <daniel@ziegenberg.at> | ||||
| Daniele <57776841+daniandtheweb@users.noreply.github.com> | ||||
| Dave <dave-fl@users.noreply.github.com> | ||||
| Dave Airlie <airlied@gmail.com> | ||||
| Dave Airlie <airlied@redhat.com> | ||||
| Daven Sanassy <daven@vochlea.co.uk> | ||||
| David <dnhkng@gmail.com> | ||||
| David Thorpe <djt@mutablelogic.com> | ||||
| DavidKorczynski <david@adalogics.com> | ||||
| Davidson Francis <davidsondfgl@gmail.com> | ||||
| Dener Stassun <denerstassun@gmail.com> | ||||
| Dibakar Gope <dibakar.gope@arm.com> | ||||
| Didzis Gosko <didzis@users.noreply.github.com> | ||||
| Diego Devesa <slarengh@gmail.com> | ||||
| Digipom <admin@digipom.com> | ||||
| Dimo <dimo@ieee.org> | ||||
| Djip007 <3705339+Djip007@users.noreply.github.com> | ||||
| Djip007 <djip.perois@free.fr> | ||||
| Dody Suria Wijaya <dodysw@gmail.com> | ||||
| Dou Xinpeng <15529241576@163.com> | ||||
| Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com> | ||||
| Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> | ||||
| Duncan McConnell <ddmcconnell4@gmail.com> | ||||
| Egor Egorov <me@egorfine.com> | ||||
| Elkana Bardugo <ttv200@gmail.com> | ||||
| Emmanuel Schmidbauer <eschmidbauer@gmail.com> | ||||
| Engininja2 <139037756+Engininja2@users.noreply.github.com> | ||||
| Eric Curtin <ericcurtin17@gmail.com> | ||||
| Eric Swanson <eswanson@alloscomp.com> | ||||
| Eric Tendian <erictendian@gmail.com> | ||||
| Eric Zhang <34133756+EZForever@users.noreply.github.com> | ||||
| Erik Scholz <Green-Sky@users.noreply.github.com> | ||||
| Evan Jones <evan.q.jones@gmail.com> | ||||
| Evan Martin <evan.martin@gmail.com> | ||||
| Eve <139727413+netrunnereve@users.noreply.github.com> | ||||
| Evgeny Kuznetsov <evgeny@kuznetsov.md> | ||||
| F1L1P <78918286+F1L1Pv2@users.noreply.github.com> | ||||
| Faisal Zaghloul <quic_fzaghlou@quicinc.com> | ||||
| Fangjun Kuang <csukuangfj@gmail.com> | ||||
| Felix <stenbackfelix@gmail.com> | ||||
| Finn Voorhees <finnvoorhees@gmail.com> | ||||
| FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> | ||||
| FlippFuzz <41221030+FlippFuzz@users.noreply.github.com> | ||||
| Frankie Robertson <frankier@users.noreply.github.com> | ||||
| Gang Chen <goncha@gmail.com> | ||||
| Gavin Cai <gavin1818@hotmail.com> | ||||
| George Hindle <george@georgehindle.com> | ||||
| Georgi Gerganov <ggerganov@gmail.com> | ||||
| Gilad S <7817232+giladgd@users.noreply.github.com> | ||||
| Gilad S <giladgd@users.noreply.github.com> | ||||
| Gilad S. <7817232+giladgd@users.noreply.github.com> | ||||
| GitAritron <103900385+GitAritron@users.noreply.github.com> | ||||
| GiviMAD <GiviMAD@users.noreply.github.com> | ||||
| Gleicon Moraes <gleicon@gmail.com> | ||||
| Gregor Jasny <gjasny@googlemail.com> | ||||
| Guillaume Wenzek <gwenzek@users.noreply.github.com> | ||||
| HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com> | ||||
| Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> | ||||
| Hang <bebound@gmail.com> | ||||
| Haus1 <haus.xda@gmail.com> | ||||
| Herman Semenov <GermanAizek@yandex.ru> | ||||
| HimariO <dsfhe49854@gmail.com> | ||||
| Hong Bo PENG <penghb@cn.ibm.com> | ||||
| Hrishikesh Barman <geekodour@users.noreply.github.com> | ||||
| Hugo <hugo@whynothugo.nl> | ||||
| Ian Bicking <ian@ianbicking.org> | ||||
| Ian Bull <irbull@eclipsesource.com> | ||||
| Ihar Hrachyshka <ihrachys@redhat.com> | ||||
| Ikko Ashimine <eltociear@gmail.com> | ||||
| Ikko Eltociear Ashimine <eltociear@gmail.com> | ||||
| InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com> | ||||
| Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com> | ||||
| Ivan <nekotekina@gmail.com> | ||||
| Ivan Filipov <159561759+vanaka11@users.noreply.github.com> | ||||
| Ivan Gorin <ivangorin21@gmail.com> | ||||
| Ivo von Putzer Reibegg <ivo.putzer@gmail.com> | ||||
| JJ <103335846+computerscienceiscool@users.noreply.github.com> | ||||
| Jack Mousseau <jmousseau@users.noreply.github.com> | ||||
| JacobLinCool <jacoblincool@gmail.com> | ||||
| Jakub Ráček <blizzcz@gmail.com> | ||||
| Jared Van Bortel <jared@nomic.ai> | ||||
| Jay Binks <jaybinks@gmail.com> | ||||
| Jayant <jayantyadav202@gmail.com> | ||||
| Jeff Bolz <jbolz@nvidia.com> | ||||
| Jeroen Mostert <jeroen.mostert@cm.com> | ||||
| Jhen-Jie Hong <developer@jhen.me> | ||||
| Jhen-Jie Hong <iainst0409@gmail.com> | ||||
| JidongZhang-THU <1119708529@qq.com> | ||||
| Jo Liss <joliss42@gmail.com> | ||||
| Joe Todd <joe.todd@codeplay.com> | ||||
| Johan <jr.raffin@gmail.com> | ||||
| Johannes Gäßler <johannesg@5d6.de> | ||||
| John Balis <phobossystems@gmail.com> | ||||
| JohnnyB <jboero@users.noreply.github.com> | ||||
| Jonathan Soo <jcsoo@agora.com> | ||||
| Jonno <1160532+razodactyl@users.noreply.github.com> | ||||
| Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi> | ||||
| Jose <34888496+Jerry-Master@users.noreply.github.com> | ||||
| Josh Bleecher Snyder <josharian@gmail.com> | ||||
| Josscii <jossciiweiyi@gmail.com> | ||||
| Judd <foldl@users.noreply.github.com> | ||||
| Jumper775 <78500318+jumpers775@users.noreply.github.com> | ||||
| Jun Hee Yoo <contact.jhyoo@gmail.com> | ||||
| Junil Kim <logyourself@gmail.com> | ||||
| Justina Cho <justcho5@gmail.com> | ||||
| Justine Tunney <jtunney@gmail.com> | ||||
| Justine Tunney <jtunney@mozilla.com> | ||||
| KITAITI Makoto <KitaitiMakoto@gmail.com> | ||||
| KP Kaiser <kirk@zothcorp.com> | ||||
| Kamilake <exjang0@gmail.com> | ||||
| Karol Kontny <82021046+kkontny@users.noreply.github.com> | ||||
| Karthick <j.karthic2004@gmail.com> | ||||
| Kartik Saranathan <278928+Kartiku@users.noreply.github.com> | ||||
| Kasumi <90275229+kasumi-1@users.noreply.github.com> | ||||
| Kawrakow <48489457+ikawrakow@users.noreply.github.com> | ||||
| Kendrick Taylor <kendrick@circuitsix.com> | ||||
| Kevin Brothaler <admin@digipom.com> | ||||
| Kevin Gibbons <bakkot@gmail.com> | ||||
| Konosuke Sakai <konosuke@konosuke.work> | ||||
| Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com> | ||||
| Kreijstal <rainb@tfwno.gf> | ||||
| Kylin <56434533+KyL0N@users.noreply.github.com> | ||||
| LBlue <153975653+lbluep@users.noreply.github.com> | ||||
| Larry Battle <larry.battle.tech@gmail.com> | ||||
| Laytan Laats <laytanlaats@hotmail.com> | ||||
| Leo Moll <leo.moll@yeasoft.com> | ||||
| Lexevolution <31176843+Lexevolution@users.noreply.github.com> | ||||
| LittleLoli <26589867+WhichWho@users.noreply.github.com> | ||||
| Lucas Zanek <57494138+LucasZNK@users.noreply.github.com> | ||||
| Luis Herrera <herrera-luis@users.noreply.github.com> | ||||
| Lukas Rist <glaslos@gmail.com> | ||||
| M. A. Ali <73258591+MightyStud@users.noreply.github.com> | ||||
| M. Eren Akbiyik <erenakbiyik@gmail.com> | ||||
| Ma Mingfei <mingfei.ma@intel.com> | ||||
| Maciek <maciek.mab122@gmail.com> | ||||
| Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> | ||||
| Marcin Mielniczuk <marmistrz.dev@zoho.eu> | ||||
| Mark Karpelès <MagicalTux@users.noreply.github.com> | ||||
| Mark Zhuang <zhuangqiubin@gmail.com> | ||||
| Markus Tavenrath <mtavenrath@users.noreply.github.com> | ||||
| Martin Delille <martin@delille.org> | ||||
| Martin Warnaar <martinwarnaar@gmail.com> | ||||
| Masaya, Kato <62578291+msy-kato@users.noreply.github.com> | ||||
| Matheus de Sousa <23645013+keyehzy@users.noreply.github.com> | ||||
| Mathieu Baudier <mbaudier@argeo.org> | ||||
| Mathijs de Bruin <mathijs@mathijsfietst.nl> | ||||
| Matija Pevec <mightymatth@users.noreply.github.com> | ||||
| Matt Stephenson <mstephenson6@users.noreply.github.com> | ||||
| Max Krasnyansky <max.krasnyansky@gmail.com> | ||||
| Max Krasnyansky <quic_maxk@quicinc.com> | ||||
| Maximiliano Levi <8160966+maxilevi@users.noreply.github.com> | ||||
| Meng, Hengyu <hengyu.meng@intel.com> | ||||
| Mengqing Cao <cmq0113@163.com> | ||||
| Michael Podvitskiy <podvitskiymichael@gmail.com> | ||||
| Michael Rienstra <mrienstra@gmail.com> | ||||
| Mikhail Grigorev <sleuthhound@gmail.com> | ||||
| Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com> | ||||
| Mohit Agarwal <mohit@sdf.org> | ||||
| Molly Sophia <mollysophia379@gmail.com> | ||||
| Murilo Santana <mvrilo@gmail.com> | ||||
| NETZkultur GmbH <mulholland@netzkultur.de> | ||||
| Natsu <chino@hotococoa.moe> | ||||
| Neil Chudleigh <nchudleigh@users.noreply.github.com> | ||||
| Neo Zhang <14088817+arthw@users.noreply.github.com> | ||||
| Neo Zhang Jianyu <jianyu.zhang@intel.com> | ||||
| Neuman Vong <neuman.vong@gmail.com> | ||||
| Nicholai Tukanov <nicholaitukanov@gmail.com> | ||||
| Nicholas Albion <nalbion@yahoo.com> | ||||
| Nico Bosshard <nico@bosshome.ch> | ||||
| Nicolò Scipione <nicolo.scipione@codeplay.com> | ||||
| Niels Mayer <Niels.Mayer@gmail.com> | ||||
| Nikita Sarychev <42014488+sARY77@users.noreply.github.com> | ||||
| Nikolaj Olsson <nikse.dk@gmail.com> | ||||
| Okabintaro <103938900+Okabintaro@users.noreply.github.com> | ||||
| Oleg Sidorov <me@whitebox.io> | ||||
| Oleg Sidorov <oleg@sidorov.nl> | ||||
| Olivier Chafik <ochafik@users.noreply.github.com> | ||||
| Ondrej Kokes <ondrej.kokes@gmail.com> | ||||
| Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com> | ||||
| PAB <pierreantoine.bannier@gmail.com> | ||||
| Paul Tsochantaris <ptsochantaris@icloud.com> | ||||
| Pedro Probst <pprobst@insiberia.net> | ||||
| Peng <hzp1024@qq.com> | ||||
| Peter <peter277@users.noreply.github.com> | ||||
| Philipp Zabel <philipp.zabel@gmail.com> | ||||
| Philippe Normand <phil@base-art.net> | ||||
| Philippe Normand <philn@igalia.com> | ||||
| Plamen Minev <pacominev@gmail.com> | ||||
| Prashant Vithule <119530321+Vithulep@users.noreply.github.com> | ||||
| Przemysław Pawełczyk <przemoc@gmail.com> | ||||
| Qianhe Chen <54462604+chenqianhe@users.noreply.github.com> | ||||
| R0CKSTAR <xiaodong.ye@mthreads.com> | ||||
| R0CKSTAR <yeahdongcn@gmail.com> | ||||
| Radoslav Gerganov <rgerganov@gmail.com> | ||||
| Radosław Gryta <radek.gryta@gmail.com> | ||||
| Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com> | ||||
| Raiya Araki <83504221+rai62@users.noreply.github.com> | ||||
| Reinforce-II <fate@eastal.com> | ||||
| Reinis Muiznieks <muiznieks.reinis@gmail.com> | ||||
| RelatedTitle <r3latedtitle@gmail.com> | ||||
| Rémy Oudompheng <oudomphe@phare.normalesup.org> | ||||
| RhinoDevel <RhinoDevel@users.noreply.github.com> | ||||
| Rich Jones <miserlou@gmail.com> | ||||
| Robert Ormandi <52251610+ormandi@users.noreply.github.com> | ||||
| Robin <robin.xw@hotmail.com> | ||||
| Roddur Dasgupta <roddurd@gmail.com> | ||||
| Roland Rabien <figbug@gmail.com> | ||||
| Romain Biessy <romain.biessy@codeplay.com> | ||||
| Ronsor <ronsor@ronsor.pw> | ||||
| Rotem Dan <rotemdan@gmail.com> | ||||
| Ryan Hitchman <hitchmanr@gmail.com> | ||||
| Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com> | ||||
| RyanChang <ftes90015@gmail.com> | ||||
| SRHMorris <69468379+SRHMorris@users.noreply.github.com> | ||||
| SXX <sxx1136965276@gmail.com> | ||||
| Sacha Arbonel <sacha.arbonel@hotmail.fr> | ||||
| Salman Faroz <stsfaroz@gmail.com> | ||||
| Salvatore Mesoraca <s.mesoraca16@gmail.com> | ||||
| Sam <49637763+Onlyartist9@users.noreply.github.com> | ||||
| Sam Pullara <spullara@gmail.com> | ||||
| Samuel Durante <44513615+samueldurantes@users.noreply.github.com> | ||||
| Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> | ||||
| Sandro Hanea <40202887+sandrohanea@users.noreply.github.com> | ||||
| Sergio López <slp@redhat.com> | ||||
| Sergio López <slp@sinrega.org> | ||||
| Shanshan Shen <467638484@qq.com> | ||||
| Shijie <821898965@qq.com> | ||||
| Shupei Fan <dymarkfan@outlook.com> | ||||
| Siddharth Ramakrishnan <srr2141@columbia.edu> | ||||
| Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> | ||||
| Simon Moisselin <simon.moisstoll@gmail.com> | ||||
| Sindre Sorhus <sindresorhus@gmail.com> | ||||
| Slava Primenko <primenko.s@gmail.com> | ||||
| Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> | ||||
| Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com> | ||||
| Stefan Sydow <s.sydow@heinlein-video.de> | ||||
| Stefan Sydow <stefan@sydow.email> | ||||
| Syahmi Azhar <prsyahmi@gmail.com> | ||||
| Syed Jafri <syedjafri97@gmail.com> | ||||
| Sơn Phan Trung <phantrungson17@gmail.com> | ||||
| Taisei Mima <bhbstar.me@gmail.com> | ||||
| Takeshi Inoue <inoue.takeshi@gmail.com> | ||||
| Tamotsu Takahashi <ttakah+github@gmail.com> | ||||
| Taras Glek <taras@thegp.com> | ||||
| Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com> | ||||
| Thamster <Thamster@users.noreply.github.com> | ||||
| Thijs Raymakers <thijs@raymakers.nl> | ||||
| Thomas Fitzsimmons <fitzsim@fitzsim.org> | ||||
| Tiago Fassoni <tiagofassoni@users.noreply.github.com> | ||||
| Tienshiao Ma <tienshiao@tienshiao.org> | ||||
| Tim Miller <drasticactions@users.noreply.github.com> | ||||
| Timothy Cronin <40186632+4imothy@users.noreply.github.com> | ||||
| Tobrun <tobrun.van.nuland@gmail.com> | ||||
| Todd <taf2@users.noreply.github.com> | ||||
| Toliver <teejae@gmail.com> | ||||
| Tong Li <31761981+litongjava@users.noreply.github.com> | ||||
| Tony Wasserka <4840017+neobrain@users.noreply.github.com> | ||||
| Topping1 <78745143+Topping1@users.noreply.github.com> | ||||
| Travis Cline <travis.cline@gmail.com> | ||||
| UEXTM.com <84163508+uextm@users.noreply.github.com> | ||||
| UsernamesLame <156965854+UsernamesLame@users.noreply.github.com> | ||||
| Vadim Peretokin <vperetokin@hey.com> | ||||
| Valentin Gosu <1454649+valenting@users.noreply.github.com> | ||||
| Vin Misra <vinith@alum.mit.edu> | ||||
| Vulcan <93451215+trholding@users.noreply.github.com> | ||||
| WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com> | ||||
| William Tambellini <william.tambellini@gmail.com> | ||||
| William Tambellini <wtambellini@sdl.com> | ||||
| Wilson Silva <wilson.dsigns@gmail.com> | ||||
| Xiang (Kevin) Li <kevinli020508@gmail.com> | ||||
| Xiao-Yong Jin <jinxiaoyong@gmail.com> | ||||
| XiaotaoChen <chenxiaotao1234@gmail.com> | ||||
| Xingchen Song(宋星辰) <xingchensong1996@163.com> | ||||
| Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> | ||||
| Xuan Son Nguyen <thichthat@gmail.com> | ||||
| Yajing Tang <phillis@google.com> | ||||
| Yang Shen <aplshenyang@gmail.com> | ||||
| Yunès <jean.baptiste.yunes@free.fr> | ||||
| Yuri Khrustalev <ykhrustalev@users.noreply.github.com> | ||||
| Yusuf Redžić <48274562+redzic@users.noreply.github.com> | ||||
| ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com> | ||||
| Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> | ||||
| Zhiyuan Li <lizhiyuan@uniartisan.com> | ||||
| Zhiyuan Li <uniartisan2017@gmail.com> | ||||
| Zigfrid Zvezdin <ziggerZZ@gmail.com> | ||||
| Zollner <24618122+Zolliner@users.noreply.github.com> | ||||
| a3sh <38979186+A3shTnT@users.noreply.github.com> | ||||
| ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> | ||||
| agray3 <agray3@users.noreply.github.com> | ||||
| ai-at-home <149282006+ai-at-home@users.noreply.github.com> | ||||
| aldorof <aldorof@users.noreply.github.com> | ||||
| alonfaraj <alonfaraj@gmail.com> | ||||
| amd-dwang <dong.wang@amd.com> | ||||
| amritahs-ibm <amritahs@linux.vnet.ibm.com> | ||||
| andypayne <apayne@gmail.com> | ||||
| ardfork <134447697+ardfork@users.noreply.github.com> | ||||
| arizhih <40765267+arizhih@users.noreply.github.com> | ||||
| automaticcat <daogiatuank54@gmail.com> | ||||
| bandoti <141645996+bandoti@users.noreply.github.com> | ||||
| be-next <jerome.ramette@gmail.com> | ||||
| bert hubert <bert@hubertnet.nl> | ||||
| billyct <billy_allen@126.com> | ||||
| bmwl <brian.marshall@tolko.com> | ||||
| bobqianic <129547291+bobqianic@users.noreply.github.com> | ||||
| bocytko <bocytko+github@gmail.com> | ||||
| boolemancer <48014766+boolemancer@users.noreply.github.com> | ||||
| boolemancer <boolemancer@gmail.com> | ||||
| bradmit <151883577+bradmit@users.noreply.github.com> | ||||
| brunofaustino <b.fa.amorim@gmail.com> | ||||
| bssrdf <merlintiger@hotmail.com> | ||||
| byte-6174 <88070277+byte-6174@users.noreply.github.com> | ||||
| cdosoftei <ciprian.dosoftei@gmail.com> | ||||
| clach04 <Chris.Clark@actian.com> | ||||
| compilade <113953597+compilade@users.noreply.github.com> | ||||
| compilade <git@compilade.net> | ||||
| conradg <conradjgodfrey@gmail.com> | ||||
| crummyh <elijah@crums.us> | ||||
| ddpasa <112642920+ddpasa@users.noreply.github.com> | ||||
| denersc <denerstassun@gmail.com> | ||||
| dscripka <dscripka@users.noreply.github.com> | ||||
| duthils <duthils@duthils.net> | ||||
| ecneladis <ecneladis@users.noreply.github.com> | ||||
| faker <nspyia2002@gmail.com> | ||||
| fitzsim <fitzsim@fitzsim.org> | ||||
| fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> | ||||
| fraxy-v <65565042+fraxy-v@users.noreply.github.com> | ||||
| genevera (she/her) <genevera@users.noreply.github.com> | ||||
| geniusnut <geniusnut@gmail.com> | ||||
| gilbertgong <gilbert.gong@gmail.com> | ||||
| gn64 <yukikaze.jp@gmail.com> | ||||
| goldwaving <77494627+goldwaving@users.noreply.github.com> | ||||
| greeshmay <greeshmay@gmail.com> | ||||
| haopeng <657407891@qq.com> | ||||
| hipudding <huafengchun@gmail.com> | ||||
| hsinhoyeh <yhh92u@gmail.com> | ||||
| hydai <z54981220@gmail.com> | ||||
| iamthad <thadeus.j.fleming@gmail.com> | ||||
| issixx <46835150+issixx@users.noreply.github.com> | ||||
| james wolf <contractorwolf@hotmail.com> | ||||
| jdomke <28772296+jdomke@users.noreply.github.com> | ||||
| jettoblack <jettoblack@gmail.com> | ||||
| jiez <373447296@qq.com> | ||||
| joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> | ||||
| jorismertz <35079666+jorismertz@users.noreply.github.com> | ||||
| junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> | ||||
| junkfood <69683722+JunkFood02@users.noreply.github.com> | ||||
| jwijffels <jwijffels@bnosac.be> | ||||
| k.h.lai <adrian.k.h.lai@outlook.com> | ||||
| kamranjon <kamranjon@gmail.com> | ||||
| katsu560 <katsu560oo-@docomo.ne.jp> | ||||
| kennethge <57784063+kenneth-ge@users.noreply.github.com> | ||||
| keyehzy <msamuel@aluno.puc-rio.br> | ||||
| kunnis <kunnis@users.noreply.github.com> | ||||
| l3utterfly <gc.pthzfoldr@gmail.com> | ||||
| leejet <leejet714@gmail.com> | ||||
| leo-pony <nengjunma@outlook.com> | ||||
| lhez <quic_lih@quicinc.com> | ||||
| litong <31761981+litongjava@users.noreply.github.com> | ||||
| liuwei-git <14815172+liuwei-git@users.noreply.github.com> | ||||
| lnyan <lkwq007@gmail.com> | ||||
| luoyu-intel <yu.luo@intel.com> | ||||
| m.bell <m.bell@techsmith.com> | ||||
| mahorozte <41834471+mahorozte@users.noreply.github.com> | ||||
| mashizora <30516315+mashizora@users.noreply.github.com> | ||||
| matt23654 <matthew.webber@protonmail.com> | ||||
| matteo <matteogeniaccio@yahoo.it> | ||||
| mgrachten <maarten@grachten.eu> | ||||
| mkiol <mkiol@users.noreply.github.com> | ||||
| mky_coder <47767389+mkycoder@users.noreply.github.com> | ||||
| novag <7754358+novag@users.noreply.github.com> | ||||
| pajowu <pajowu@pajowu.de> | ||||
| pengxin99 <pengxin.yuan@intel.com> | ||||
| petterreinholdtsen <pere-github@hungry.com> | ||||
| polarmoon <90010972+polarmoon@users.noreply.github.com> | ||||
| rlapray <lapray.romain@gmail.com> | ||||
| sandrohanea <40202887+sandrohanea@users.noreply.github.com> | ||||
| semiformal-net <84111142+semiformal-net@users.noreply.github.com> | ||||
| shibukazu <61775791+shibukazu@users.noreply.github.com> | ||||
| shikokuchuo <53399081+shikokuchuo@users.noreply.github.com> | ||||
| slaren <slarengh@gmail.com> | ||||
| slashlib <slashlib@users.noreply.github.com> | ||||
| snadampal <87143774+snadampal@users.noreply.github.com> | ||||
| someone13574 <81528246+someone13574@users.noreply.github.com> | ||||
| st-gr <38470677+st-gr@users.noreply.github.com> | ||||
| stduhpf <stephduh@live.fr> | ||||
| stormofice <58337328+stormofice@users.noreply.github.com> | ||||
| texmex76 <40733439+texmex76@users.noreply.github.com> | ||||
| thefinaldegree <thefinaldegree@gmail.com> | ||||
| thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> | ||||
| toboil-features <160222185+toboil-features@users.noreply.github.com> | ||||
| trixirt <trix@redhat.com> | ||||
| ulatekh <ulatekh@yahoo.com> | ||||
| undef <undefdev@gmail.com> | ||||
| uvos <devnull@uvos.xyz> | ||||
| uvos <philipp@uvos.xyz> | ||||
| valVk <valVk@users.noreply.github.com> | ||||
| venkr <venkateshrameshkumar+1@gmail.com> | ||||
| vicalloy <zbirder@gmail.com> | ||||
| wangshuai09 <391746016@qq.com> | ||||
| woachk <24752637+woachk@users.noreply.github.com> | ||||
| xctan <axunlei@gmail.com> | ||||
| xdrudis <xavierdrudis@yahoo.es> | ||||
| yuri@FreeBSD <yuri@FreeBSD> | ||||
| zhangjixiong <code.zjx@gmail.com> | ||||
| zhentaoyu <zhentao.yu@intel.com> | ||||
| zhouwg <6889919+zhouwg@users.noreply.github.com> | ||||
| zhouwg <zhouwg2000@gmail.com> | ||||
| 谢乃闻 <sienaiwun@users.noreply.github.com> | ||||
| 布客飞龙 <562826179@qq.com> | ||||
| Артём Земляк <azemlyak@smart-consulting.ru> | ||||
							
								
								
									
										185
									
								
								packages/app-mobile/android/vendor/whisper.cpp/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								packages/app-mobile/android/vendor/whisper.cpp/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,185 @@ | ||||
| cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories. | ||||
| project("whisper.cpp" C CXX) | ||||
| project("whisper.cpp" VERSION 1.7.4) | ||||
| include(CheckIncludeFileCXX) | ||||
|  | ||||
| set(SOVERSION 1) | ||||
|  | ||||
| #set(CMAKE_WARN_DEPRECATED YES) | ||||
| set(CMAKE_WARN_UNUSED_CLI YES) | ||||
|  | ||||
| set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||||
|  | ||||
| if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) | ||||
|     set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) | ||||
|     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") | ||||
| endif() | ||||
|  | ||||
| # Add path to modules | ||||
| list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") | ||||
|  | ||||
| set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||||
|  | ||||
| if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | ||||
|     set(WHISPER_STANDALONE ON) | ||||
|  | ||||
|     include(git-vars) | ||||
|  | ||||
|     # configure project version | ||||
|     configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY) | ||||
| else() | ||||
|     set(WHISPER_STANDALONE OFF) | ||||
| endif() | ||||
|  | ||||
| if (EMSCRIPTEN) | ||||
|     set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||||
|  | ||||
|     option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON) | ||||
|  | ||||
|     # TODO: without these, we get the following error: | ||||
|     #       wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features. | ||||
|     set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880") | ||||
| else() | ||||
|     if (MINGW) | ||||
|         set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||||
|     else() | ||||
|         set(BUILD_SHARED_LIBS_DEFAULT ON) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) | ||||
|  | ||||
| # | ||||
| # option list | ||||
| # | ||||
|  | ||||
| # general | ||||
| option(WHISPER_CCACHE "whisper: use ccache if available" ON) | ||||
|  | ||||
| # debug | ||||
| option(WHISPER_ALL_WARNINGS           "whisper: enable all compiler warnings"                   ON) | ||||
| option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF) | ||||
|  | ||||
| # build | ||||
| option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF) | ||||
|  | ||||
| # sanitizers | ||||
| option(WHISPER_SANITIZE_THREAD    "whisper: enable thread sanitizer"    OFF) | ||||
| option(WHISPER_SANITIZE_ADDRESS   "whisper: enable address sanitizer"   OFF) | ||||
| option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF) | ||||
|  | ||||
| # extra artifacts | ||||
| option(WHISPER_BUILD_TESTS    "whisper: build tests"          ${WHISPER_STANDALONE}) | ||||
| option(WHISPER_BUILD_EXAMPLES "whisper: build examples"       ${WHISPER_STANDALONE}) | ||||
| option(WHISPER_BUILD_SERVER   "whisper: build server example" ${WHISPER_STANDALONE}) | ||||
|  | ||||
| # 3rd party libs | ||||
| option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF) | ||||
| option(WHISPER_SDL2 "whisper: support for libSDL2" OFF) | ||||
|  | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
|     option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF) | ||||
| endif() | ||||
|  | ||||
| option(WHISPER_COREML                "whisper: enable Core ML framework"  OFF) | ||||
| option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF) | ||||
| option(WHISPER_OPENVINO              "whisper: support for OpenVINO"      OFF) | ||||
|  | ||||
| # Required for relocatable CMake package | ||||
| include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) | ||||
|  | ||||
| # override ggml options | ||||
| set(GGML_CCACHE             ${WHISPER_CCACHE}) | ||||
| set(GGML_SANITIZE_THREAD    ${WHISPER_SANITIZE_THREAD}) | ||||
| set(GGML_SANITIZE_ADDRESS   ${WHISPER_SANITIZE_ADDRESS}) | ||||
| set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED}) | ||||
| set(GGML_ALL_WARNINGS       ${WHISPER_ALL_WARNINGS}) | ||||
| set(GGML_FATAL_WARNINGS     ${WHISPER_FATAL_WARNINGS}) | ||||
|  | ||||
| # transition helpers | ||||
| function (whisper_option_depr TYPE OLD NEW) | ||||
|     if (${OLD}) | ||||
|         message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n") | ||||
|         set(${NEW} ON) | ||||
|     endif() | ||||
| endfunction() | ||||
|  | ||||
| whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS              GGML_CUDA) | ||||
| whisper_option_depr(WARNING     WHISPER_CUDA                GGML_CUDA) | ||||
| whisper_option_depr(WARNING     WHISPER_KOMPUTE             GGML_KOMPUTE) | ||||
| whisper_option_depr(WARNING     WHISPER_METAL               GGML_METAL) | ||||
| whisper_option_depr(WARNING     WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY) | ||||
| whisper_option_depr(WARNING     WHISPER_NATIVE              GGML_NATIVE) | ||||
| whisper_option_depr(WARNING     WHISPER_OPENMP              GGML_OPENMP) | ||||
| whisper_option_depr(WARNING     WHISPER_RPC                 GGML_RPC) | ||||
| whisper_option_depr(WARNING     WHISPER_SYCL                GGML_SYCL) | ||||
| whisper_option_depr(WARNING     WHISPER_SYCL_F16            GGML_SYCL_F16) | ||||
|  | ||||
| # | ||||
| # build the library | ||||
| # | ||||
|  | ||||
| if (NOT TARGET ggml) | ||||
|     add_subdirectory(ggml) | ||||
|     # ... otherwise assume ggml is added by a parent CMakeLists.txt | ||||
| endif() | ||||
| add_subdirectory(src) | ||||
|  | ||||
| # | ||||
| # install | ||||
| # | ||||
|  | ||||
| include(GNUInstallDirs) | ||||
| include(CMakePackageConfigHelpers) | ||||
|  | ||||
| set(WHISPER_BUILD_NUMBER        ${BUILD_NUMBER}) | ||||
| set(WHISPER_BUILD_COMMIT        ${BUILD_COMMIT}) | ||||
| set(WHISPER_INSTALL_VERSION     ${CMAKE_PROJECT_VERSION}) | ||||
|  | ||||
| set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files") | ||||
| set(WHISPER_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files") | ||||
| set(WHISPER_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files") | ||||
|  | ||||
| get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS) | ||||
|  | ||||
| set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h) | ||||
| install(TARGETS whisper LIBRARY PUBLIC_HEADER) | ||||
|  | ||||
| configure_package_config_file( | ||||
|         ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in | ||||
|         ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake | ||||
|     INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper | ||||
|     PATH_VARS | ||||
|     WHISPER_INCLUDE_INSTALL_DIR | ||||
|     WHISPER_LIB_INSTALL_DIR | ||||
|     WHISPER_BIN_INSTALL_DIR ) | ||||
|  | ||||
| write_basic_package_version_file( | ||||
|     ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake | ||||
|     VERSION ${WHISPER_INSTALL_VERSION} | ||||
|     COMPATIBILITY SameMajorVersion) | ||||
|  | ||||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake | ||||
|               ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake | ||||
|         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper) | ||||
|  | ||||
| configure_file(cmake/whisper.pc.in | ||||
|         "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc" | ||||
|         @ONLY) | ||||
|  | ||||
| install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc" | ||||
|         DESTINATION lib/pkgconfig) | ||||
|  | ||||
| # | ||||
| # programs, examples and tests | ||||
| # | ||||
|  | ||||
| if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION) | ||||
|     #include(CTest) | ||||
|     #add_subdirectory(tests) | ||||
| endif () | ||||
|  | ||||
| if (WHISPER_BUILD_EXAMPLES) | ||||
|     add_subdirectory(examples) | ||||
| endif() | ||||
							
								
								
									
										21
									
								
								packages/app-mobile/android/vendor/whisper.cpp/LICENSE
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								packages/app-mobile/android/vendor/whisper.cpp/LICENSE
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| MIT License | ||||
|  | ||||
| Copyright (c) 2023-2024 The ggml authors | ||||
|  | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
|  | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
|  | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
							
								
								
									
										19
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Package.swift
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Package.swift
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| // swift-tools-version:5.5 | ||||
|  | ||||
| import PackageDescription | ||||
|  | ||||
| let package = Package( | ||||
|     name: "whisper", | ||||
|     platforms: [ | ||||
|         .macOS(.v12), | ||||
|         .iOS(.v14), | ||||
|         .watchOS(.v4), | ||||
|         .tvOS(.v14) | ||||
|     ], | ||||
|     products: [ | ||||
|         .library(name: "whisper", targets: ["whisper"]), | ||||
|     ], | ||||
|     targets: [ | ||||
|         .systemLibrary(name: "whisper", pkgConfig: "whisper"), | ||||
|     ] | ||||
| ) | ||||
							
								
								
									
										679
									
								
								packages/app-mobile/android/vendor/whisper.cpp/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										679
									
								
								packages/app-mobile/android/vendor/whisper.cpp/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,679 @@ | ||||
| # whisper.cpp | ||||
|  | ||||
|  | ||||
|  | ||||
| [](https://github.com/ggerganov/whisper.cpp/actions) | ||||
| [](https://opensource.org/licenses/MIT) | ||||
| [](https://conan.io/center/whisper-cpp) | ||||
| [](https://www.npmjs.com/package/whisper.cpp/) | ||||
|  | ||||
| > [!NOTE] | ||||
| > New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788 | ||||
|  | ||||
| Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126) | ||||
|  | ||||
| High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: | ||||
|  | ||||
| - Plain C/C++ implementation without dependencies | ||||
| - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support) | ||||
| - AVX intrinsics support for x86 architectures | ||||
| - [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics) | ||||
| - Mixed F16 / F32 precision | ||||
| - [Integer quantization support](#quantization) | ||||
| - Zero memory allocations at runtime | ||||
| - [Vulkan support](#vulkan-gpu-support) | ||||
| - Support for CPU-only inference | ||||
| - [Efficient GPU support for NVIDIA](#nvidia-gpu-support) | ||||
| - [OpenVINO Support](#openvino-support) | ||||
| - [Ascend NPU Support](#ascend-npu-support) | ||||
| - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h) | ||||
|  | ||||
| Supported platforms: | ||||
|  | ||||
| - [x] Mac OS (Intel and Arm) | ||||
| - [x] [iOS](examples/whisper.objc) | ||||
| - [x] [Android](examples/whisper.android) | ||||
| - [x] [Java](bindings/java/README.md) | ||||
| - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264) | ||||
| - [x] [WebAssembly](examples/whisper.wasm) | ||||
| - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)] | ||||
| - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166) | ||||
| - [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp) | ||||
|  | ||||
| The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp). | ||||
| The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library. | ||||
|  | ||||
| Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. | ||||
| As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc) | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4 | ||||
|  | ||||
| You can also easily make your own offline voice assistant application: [command](examples/command) | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4 | ||||
|  | ||||
| On Apple Silicon, the inference runs fully on the GPU via Metal: | ||||
|  | ||||
| https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225 | ||||
|  | ||||
| ## Quick start | ||||
|  | ||||
| First clone the repository: | ||||
|  | ||||
| ```bash | ||||
| git clone https://github.com/ggerganov/whisper.cpp.git | ||||
| ``` | ||||
|  | ||||
| Navigate into the directory: | ||||
|  | ||||
| ``` | ||||
| cd whisper.cpp | ||||
| ``` | ||||
|  | ||||
| Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example: | ||||
|  | ||||
| ```bash | ||||
| sh ./models/download-ggml-model.sh base.en | ||||
| ``` | ||||
|  | ||||
| Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this: | ||||
|  | ||||
| ```bash | ||||
| # build the project | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
|  | ||||
| # transcribe an audio file | ||||
| ./build/bin/whisper-cli -f samples/jfk.wav | ||||
| ``` | ||||
|  | ||||
| --- | ||||
|  | ||||
| For a quick demo, simply run `make base.en`. | ||||
|  | ||||
| The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`. | ||||
|  | ||||
| For detailed usage instructions, run: `./build/bin/whisper-cli -h` | ||||
|  | ||||
| Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool. | ||||
| For example, you can use `ffmpeg` like this: | ||||
|  | ||||
| ```bash | ||||
| ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav | ||||
| ``` | ||||
|  | ||||
| ## More audio samples | ||||
|  | ||||
| If you want some extra audio samples to play with, simply run: | ||||
|  | ||||
| ``` | ||||
| make -j samples | ||||
| ``` | ||||
|  | ||||
| This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`. | ||||
|  | ||||
| You can download and run the other models as follows: | ||||
|  | ||||
| ``` | ||||
| make -j tiny.en | ||||
| make -j tiny | ||||
| make -j base.en | ||||
| make -j base | ||||
| make -j small.en | ||||
| make -j small | ||||
| make -j medium.en | ||||
| make -j medium | ||||
| make -j large-v1 | ||||
| make -j large-v2 | ||||
| make -j large-v3 | ||||
| make -j large-v3-turbo | ||||
| ``` | ||||
|  | ||||
| ## Memory usage | ||||
|  | ||||
| | Model  | Disk    | Mem     | | ||||
| | ------ | ------- | ------- | | ||||
| | tiny   | 75 MiB  | ~273 MB | | ||||
| | base   | 142 MiB | ~388 MB | | ||||
| | small  | 466 MiB | ~852 MB | | ||||
| | medium | 1.5 GiB | ~2.1 GB | | ||||
| | large  | 2.9 GiB | ~3.9 GB | | ||||
|  | ||||
| ## POWER VSX Intrinsics | ||||
|  | ||||
| `whisper.cpp` supports POWER architectures and includes code which | ||||
| significantly speeds operation on Linux running on POWER9/10, making it | ||||
| capable of faster-than-realtime transcription on underclocked Raptor | ||||
| Talos II. Ensure you have a BLAS package installed, and replace the | ||||
| standard cmake setup with: | ||||
|  | ||||
| ```bash | ||||
| # build with GGML_BLAS defined | ||||
| cmake -B build -DGGML_BLAS=1 | ||||
| cmake --build build --config Release | ||||
| ./build/bin/whisper-cli [ .. etc .. ] | ||||
|  | ||||
| ## Quantization | ||||
|  | ||||
| `whisper.cpp` supports integer quantization of the Whisper `ggml` models. | ||||
| Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently. | ||||
|  | ||||
| Here are the steps for creating and using a quantized model: | ||||
|  | ||||
| ```bash | ||||
| # quantize a model with Q5_0 method | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
| ./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0 | ||||
|  | ||||
| # run the examples as usual, specifying the quantized model file | ||||
| ./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav | ||||
| ``` | ||||
|  | ||||
| ## Core ML support | ||||
|  | ||||
| On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant | ||||
| speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`: | ||||
|  | ||||
| - Install Python dependencies needed for the creation of the Core ML model: | ||||
|  | ||||
|   ```bash | ||||
|   pip install ane_transformers | ||||
|   pip install openai-whisper | ||||
|   pip install coremltools | ||||
|   ``` | ||||
|  | ||||
|   - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools. | ||||
|   - Python 3.10 is recommended. | ||||
|   - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination. | ||||
|   - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step: | ||||
|     - To create an environment, use: `conda create -n py310-whisper python=3.10 -y` | ||||
|     - To activate the environment, use: `conda activate py310-whisper` | ||||
|  | ||||
| - Generate a Core ML model. For example, to generate a `base.en` model, use: | ||||
|  | ||||
|   ```bash | ||||
|   ./models/generate-coreml-model.sh base.en | ||||
|   ``` | ||||
|  | ||||
|   This will generate the folder `models/ggml-base.en-encoder.mlmodelc` | ||||
|  | ||||
| - Build `whisper.cpp` with Core ML support: | ||||
|  | ||||
|   ```bash | ||||
|   # using CMake | ||||
|   cmake -B build -DWHISPER_COREML=1 | ||||
|   cmake --build build -j --config Release | ||||
|   ``` | ||||
|  | ||||
| - Run the examples as usual. For example: | ||||
|  | ||||
|   ```text | ||||
|   $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav | ||||
|  | ||||
|   ... | ||||
|  | ||||
|   whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc' | ||||
|   whisper_init_state: first run on a device may take a while ... | ||||
|   whisper_init_state: Core ML model loaded | ||||
|  | ||||
|   system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 | | ||||
|  | ||||
|   ... | ||||
|   ``` | ||||
|  | ||||
|   The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format. | ||||
|   Next runs are faster. | ||||
|  | ||||
| For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566). | ||||
|  | ||||
| ## OpenVINO support | ||||
|  | ||||
| On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed | ||||
| on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete). | ||||
|  | ||||
| This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`: | ||||
|  | ||||
| - First, setup python virtual env. and install python dependencies. Python 3.10 is recommended. | ||||
|  | ||||
|   Windows: | ||||
|  | ||||
|   ```powershell | ||||
|   cd models | ||||
|   python -m venv openvino_conv_env | ||||
|   openvino_conv_env\Scripts\activate | ||||
|   python -m pip install --upgrade pip | ||||
|   pip install -r requirements-openvino.txt | ||||
|   ``` | ||||
|  | ||||
|   Linux and macOS: | ||||
|  | ||||
|   ```bash | ||||
|   cd models | ||||
|   python3 -m venv openvino_conv_env | ||||
|   source openvino_conv_env/bin/activate | ||||
|   python -m pip install --upgrade pip | ||||
|   pip install -r requirements-openvino.txt | ||||
|   ``` | ||||
|  | ||||
| - Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use: | ||||
|  | ||||
|   ``` | ||||
|   python convert-whisper-to-openvino.py --model base.en | ||||
|   ``` | ||||
|  | ||||
|   This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that | ||||
|   is the default location that the OpenVINO extension will search at runtime. | ||||
|  | ||||
| - Build `whisper.cpp` with OpenVINO support: | ||||
|  | ||||
|   Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0). | ||||
|  | ||||
|   After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example: | ||||
|  | ||||
|   Linux: | ||||
|  | ||||
|   ```bash | ||||
|   source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh | ||||
|   ``` | ||||
|  | ||||
|   Windows (cmd): | ||||
|  | ||||
|   ```powershell | ||||
|   C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat | ||||
|   ``` | ||||
|  | ||||
|   And then build the project using cmake: | ||||
|  | ||||
|   ```bash | ||||
|   cmake -B build -DWHISPER_OPENVINO=1 | ||||
|   cmake --build build -j --config Release | ||||
|   ``` | ||||
|  | ||||
| - Run the examples as usual. For example: | ||||
|  | ||||
|   ```text | ||||
|   $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav | ||||
|  | ||||
|   ... | ||||
|  | ||||
|   whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml' | ||||
|   whisper_ctx_init_openvino_encoder: first run on a device may take a while ... | ||||
|   whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache | ||||
|   whisper_ctx_init_openvino_encoder: OpenVINO model loaded | ||||
|  | ||||
|   system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 | | ||||
|  | ||||
|   ... | ||||
|   ``` | ||||
|  | ||||
|   The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get | ||||
|   cached for the next run. | ||||
|  | ||||
| For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037). | ||||
|  | ||||
| ## NVIDIA GPU support | ||||
|  | ||||
| With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels. | ||||
| First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads | ||||
|  | ||||
| Now build `whisper.cpp` with CUDA support: | ||||
|  | ||||
| ``` | ||||
| cmake -B build -DGGML_CUDA=1 | ||||
| cmake --build build -j --config Release | ||||
| ``` | ||||
|  | ||||
| ## Vulkan GPU support | ||||
| Cross-vendor solution which allows you to accelerate workload on your GPU. | ||||
| First, make sure your graphics card driver provides support for Vulkan API. | ||||
|  | ||||
| Now build `whisper.cpp` with Vulkan support: | ||||
| ``` | ||||
| cmake -B build -DGGML_VULKAN=1 | ||||
| cmake --build build -j --config Release | ||||
| ``` | ||||
|  | ||||
| ## BLAS CPU support via OpenBLAS | ||||
|  | ||||
| Encoder processing can be accelerated on the CPU via OpenBLAS. | ||||
| First, make sure you have installed `openblas`: https://www.openblas.net/ | ||||
|  | ||||
| Now build `whisper.cpp` with OpenBLAS support: | ||||
|  | ||||
| ``` | ||||
| cmake -B build -DGGML_BLAS=1 | ||||
| cmake --build build -j --config Release | ||||
| ``` | ||||
|  | ||||
| ## Ascend NPU support | ||||
|  | ||||
| Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores. | ||||
|  | ||||
| First, check if your Ascend NPU device is supported: | ||||
|  | ||||
| **Verified devices** | ||||
| | Ascend NPU                    | Status  | | ||||
| |:-----------------------------:|:-------:| | ||||
| | Atlas 300T A2                 | Support | | ||||
|  | ||||
| Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded. | ||||
|  | ||||
| Now build `whisper.cpp` with CANN support: | ||||
|  | ||||
| ``` | ||||
| cmake -B build -DGGML_CANN=1 | ||||
| cmake --build build -j --config Release | ||||
| ``` | ||||
|  | ||||
| Run the inference examples as usual, for example: | ||||
|  | ||||
| ``` | ||||
| ./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8 | ||||
| ``` | ||||
|  | ||||
| *Notes:* | ||||
|  | ||||
| - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag. | ||||
| - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`. | ||||
|  | ||||
| ## Docker | ||||
|  | ||||
| ### Prerequisites | ||||
|  | ||||
| - Docker must be installed and running on your system. | ||||
| - Create a folder to store big models & intermediate files (ex. /whisper/models) | ||||
|  | ||||
| ### Images | ||||
|  | ||||
| We have two Docker images available for this project: | ||||
|  | ||||
| 1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) | ||||
| 2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) | ||||
|  | ||||
| ### Usage | ||||
|  | ||||
| ```shell | ||||
| # download model and persist it in a local folder | ||||
| docker run -it --rm \ | ||||
|   -v path/to/models:/models \ | ||||
|   whisper.cpp:main "./models/download-ggml-model.sh base /models" | ||||
| # transcribe an audio file | ||||
| docker run -it --rm \ | ||||
|   -v path/to/models:/models \ | ||||
|   -v path/to/audios:/audios \ | ||||
|   whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav" | ||||
| # transcribe an audio file in samples folder | ||||
| docker run -it --rm \ | ||||
|   -v path/to/models:/models \ | ||||
|   whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav" | ||||
| ``` | ||||
|  | ||||
| ## Installing with Conan | ||||
|  | ||||
| You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command: | ||||
|  | ||||
| ``` | ||||
| conan install --requires="whisper-cpp/[*]" --build=missing | ||||
| ``` | ||||
|  | ||||
| For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/). | ||||
|  | ||||
| ## Limitations | ||||
|  | ||||
| - Inference only | ||||
|  | ||||
| ## Real-time audio input example | ||||
|  | ||||
| This is a naive example of performing real-time inference on audio from your microphone. | ||||
| The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously. | ||||
| More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10). | ||||
|  | ||||
| ```bash | ||||
| cmake -B build -DWHISPER_SDL2=ON | ||||
| cmake --build build --config Release | ||||
| ./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000 | ||||
| ``` | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4 | ||||
|  | ||||
| ## Confidence color-coding | ||||
|  | ||||
| Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy | ||||
| to highlight words with high or low confidence: | ||||
|  | ||||
| ```bash | ||||
| ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors | ||||
| ``` | ||||
|  | ||||
| <img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png"> | ||||
|  | ||||
| ## Controlling the length of the generated text segments (experimental) | ||||
|  | ||||
| For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: | ||||
|  | ||||
| ```text | ||||
| $ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16 | ||||
|  | ||||
| whisper_model_load: loading model from './models/ggml-base.en.bin' | ||||
| ... | ||||
| system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | | ||||
|  | ||||
| main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... | ||||
|  | ||||
| [00:00:00.000 --> 00:00:00.850]   And so my | ||||
| [00:00:00.850 --> 00:00:01.590]   fellow | ||||
| [00:00:01.590 --> 00:00:04.140]   Americans, ask | ||||
| [00:00:04.140 --> 00:00:05.660]   not what your | ||||
| [00:00:05.660 --> 00:00:06.840]   country can do | ||||
| [00:00:06.840 --> 00:00:08.430]   for you, ask | ||||
| [00:00:08.430 --> 00:00:09.440]   what you can do | ||||
| [00:00:09.440 --> 00:00:10.020]   for your | ||||
| [00:00:10.020 --> 00:00:11.000]   country. | ||||
| ``` | ||||
|  | ||||
| ## Word-level timestamp (experimental) | ||||
|  | ||||
| The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`: | ||||
|  | ||||
| ```text | ||||
| $ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1 | ||||
|  | ||||
| whisper_model_load: loading model from './models/ggml-base.en.bin' | ||||
| ... | ||||
| system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | | ||||
|  | ||||
| main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... | ||||
|  | ||||
| [00:00:00.000 --> 00:00:00.320] | ||||
| [00:00:00.320 --> 00:00:00.370]   And | ||||
| [00:00:00.370 --> 00:00:00.690]   so | ||||
| [00:00:00.690 --> 00:00:00.850]   my | ||||
| [00:00:00.850 --> 00:00:01.590]   fellow | ||||
| [00:00:01.590 --> 00:00:02.850]   Americans | ||||
| [00:00:02.850 --> 00:00:03.300]  , | ||||
| [00:00:03.300 --> 00:00:04.140]   ask | ||||
| [00:00:04.140 --> 00:00:04.990]   not | ||||
| [00:00:04.990 --> 00:00:05.410]   what | ||||
| [00:00:05.410 --> 00:00:05.660]   your | ||||
| [00:00:05.660 --> 00:00:06.260]   country | ||||
| [00:00:06.260 --> 00:00:06.600]   can | ||||
| [00:00:06.600 --> 00:00:06.840]   do | ||||
| [00:00:06.840 --> 00:00:07.010]   for | ||||
| [00:00:07.010 --> 00:00:08.170]   you | ||||
| [00:00:08.170 --> 00:00:08.190]  , | ||||
| [00:00:08.190 --> 00:00:08.430]   ask | ||||
| [00:00:08.430 --> 00:00:08.910]   what | ||||
| [00:00:08.910 --> 00:00:09.040]   you | ||||
| [00:00:09.040 --> 00:00:09.320]   can | ||||
| [00:00:09.320 --> 00:00:09.440]   do | ||||
| [00:00:09.440 --> 00:00:09.760]   for | ||||
| [00:00:09.760 --> 00:00:10.020]   your | ||||
| [00:00:10.020 --> 00:00:10.510]   country | ||||
| [00:00:10.510 --> 00:00:11.000]  . | ||||
| ``` | ||||
|  | ||||
| ## Speaker segmentation via tinydiarize (experimental) | ||||
|  | ||||
| More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058 | ||||
|  | ||||
| Sample usage: | ||||
|  | ||||
| ```py | ||||
| # download a tinydiarize compatible model | ||||
| ./models/download-ggml-model.sh small.en-tdrz | ||||
|  | ||||
| # run as usual, adding the "-tdrz" command-line argument | ||||
| ./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz | ||||
| ... | ||||
| main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ... | ||||
| ... | ||||
| [00:00:00.000 --> 00:00:03.800]   Okay Houston, we've had a problem here. [SPEAKER_TURN] | ||||
| [00:00:03.800 --> 00:00:06.200]   This is Houston. Say again please. [SPEAKER_TURN] | ||||
| [00:00:06.200 --> 00:00:08.260]   Uh Houston we've had a problem. | ||||
| [00:00:08.260 --> 00:00:11.320]   We've had a main beam up on a volt. [SPEAKER_TURN] | ||||
| [00:00:11.320 --> 00:00:13.820]   Roger main beam interval. [SPEAKER_TURN] | ||||
| [00:00:13.820 --> 00:00:15.100]   Uh uh [SPEAKER_TURN] | ||||
| [00:00:15.100 --> 00:00:18.020]   So okay stand, by thirteen we're looking at it. [SPEAKER_TURN] | ||||
| [00:00:18.020 --> 00:00:25.740]   Okay uh right now uh Houston the uh voltage is uh is looking good um. | ||||
| [00:00:27.620 --> 00:00:29.940]   And we had a a pretty large bank or so. | ||||
| ``` | ||||
|  | ||||
| ## Karaoke-style movie generation (experimental) | ||||
|  | ||||
| The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the | ||||
| currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script. | ||||
| This requires to have `ffmpeg` installed. | ||||
|  | ||||
| Here are a few _"typical"_ examples: | ||||
|  | ||||
| ```bash | ||||
| ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts | ||||
| source ./samples/jfk.wav.wts | ||||
| ffplay ./samples/jfk.wav.mp4 | ||||
| ``` | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4 | ||||
|  | ||||
| --- | ||||
|  | ||||
| ```bash | ||||
| ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts | ||||
| source ./samples/mm0.wav.wts | ||||
| ffplay ./samples/mm0.wav.mp4 | ||||
| ``` | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4 | ||||
|  | ||||
| --- | ||||
|  | ||||
| ```bash | ||||
| ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts | ||||
| source ./samples/gb0.wav.wts | ||||
| ffplay ./samples/gb0.wav.mp4 | ||||
| ``` | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4 | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Video comparison of different models | ||||
|  | ||||
| Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: | ||||
|  | ||||
| ```bash | ||||
| ./scripts/bench-wts.sh samples/jfk.wav | ||||
| ffplay ./samples/jfk.wav.all.mp4 | ||||
| ``` | ||||
|  | ||||
| https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4 | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Benchmarks | ||||
|  | ||||
| In order to have an objective comparison of the performance of the inference across different system configurations, | ||||
| use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it | ||||
| took to execute it. The results are summarized in the following Github issue: | ||||
|  | ||||
| [Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89) | ||||
|  | ||||
| Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py). | ||||
|  | ||||
| You can run it with the following command, by default it will run against any standard model in the models folder. | ||||
|  | ||||
| ```bash | ||||
| python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2 | ||||
| ``` | ||||
|  | ||||
| It is written in python with the intention of being easy to modify and extend for your benchmarking use case. | ||||
|  | ||||
| It outputs a csv file with the results of the benchmarking. | ||||
|  | ||||
| ## `ggml` format | ||||
|  | ||||
| The original models are converted to a custom binary format. This allows to pack everything needed into a single file: | ||||
|  | ||||
| - model parameters | ||||
| - mel filters | ||||
| - vocabulary | ||||
| - weights | ||||
|  | ||||
| You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script | ||||
| or manually from here: | ||||
|  | ||||
| - https://huggingface.co/ggerganov/whisper.cpp | ||||
| - https://ggml.ggerganov.com | ||||
|  | ||||
| For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md). | ||||
|  | ||||
| ## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings) | ||||
|  | ||||
| - [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310) | ||||
| - [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309) | ||||
|   - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn) | ||||
| - [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312) | ||||
| - [x] Java: | ||||
|   - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni) | ||||
| - [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507) | ||||
| - [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313) | ||||
|   - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper) | ||||
| - [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422) | ||||
|   - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net) | ||||
|   - [NickDarvey/whisper](https://github.com/NickDarvey/whisper) | ||||
| - [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9) | ||||
|   - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython) | ||||
|   - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp) | ||||
|   - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11) | ||||
|   - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11) | ||||
| - [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) | ||||
| - [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) | ||||
|  | ||||
| ## Examples | ||||
|  | ||||
| There are various examples of using the library for different projects in the [examples](examples) folder. | ||||
| Some of the examples are even ported to run in the browser using WebAssembly. Check them out! | ||||
|  | ||||
| | Example                                             | Web                                   | Description                                                                                                                     | | ||||
| | --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       | | ||||
| | [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            | | ||||
| | [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               | | ||||
| | [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         | | ||||
| | [whisper-server](examples/server)                   |                                       | HTTP transcription server with OAI-like API                                                                                     | | ||||
| | [whisper-talk-llama](examples/talk-llama)           |                                       | Talk with a LLaMA bot                                                                                                           | | ||||
| | [whisper.objc](examples/whisper.objc)               |                                       | iOS mobile application using whisper.cpp                                                                                        | | ||||
| | [whisper.swiftui](examples/whisper.swiftui)         |                                       | SwiftUI iOS / macOS application using whisper.cpp                                                                               | | ||||
| | [whisper.android](examples/whisper.android)         |                                       | Android mobile application using whisper.cpp                                                                                    | | ||||
| | [whisper.nvim](examples/whisper.nvim)               |                                       | Speech-to-text plugin for Neovim                                                                                                | | ||||
| | [generate-karaoke.sh](examples/generate-karaoke.sh) |                                       | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture                           | | ||||
| | [livestream.sh](examples/livestream.sh)             |                                       | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185)                                           | | ||||
| | [yt-wsp.sh](examples/yt-wsp.sh)                     |                                       | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | | ||||
| | [wchess](examples/wchess)                           | [wchess.wasm](examples/wchess)        | Voice-controlled chess                                                                                                          | | ||||
|  | ||||
| ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions) | ||||
|  | ||||
| If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic. | ||||
| You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category | ||||
| to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the | ||||
| [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion. | ||||
							
								
								
									
										249
									
								
								packages/app-mobile/android/vendor/whisper.cpp/README_sycl.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								packages/app-mobile/android/vendor/whisper.cpp/README_sycl.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,249 @@ | ||||
| # whisper.cpp for SYCL | ||||
|  | ||||
| [Background](#background) | ||||
|  | ||||
| [OS](#os) | ||||
|  | ||||
| [Intel GPU](#intel-gpu) | ||||
|  | ||||
| [Linux](#linux) | ||||
|  | ||||
| [Environment Variable](#environment-variable) | ||||
|  | ||||
| [Known Issue](#known-issue) | ||||
|  | ||||
| [Todo](#todo) | ||||
|  | ||||
| ## Background | ||||
|  | ||||
| SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17. | ||||
|  | ||||
| oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms. | ||||
|  | ||||
| Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs. | ||||
|  | ||||
| To avoid  re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL. | ||||
|  | ||||
| The whisper.cpp for SYCL is used to support Intel GPUs. | ||||
|  | ||||
| For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build). | ||||
|  | ||||
| ## OS | ||||
|  | ||||
| |OS|Status|Verified| | ||||
| |-|-|-| | ||||
| |Linux|Support|Ubuntu 22.04| | ||||
| |Windows|Ongoing| | | ||||
|  | ||||
|  | ||||
| ## Intel GPU | ||||
|  | ||||
| |Intel GPU| Status | Verified Model| | ||||
| |-|-|-| | ||||
| |Intel Data Center Max Series| Support| Max 1550| | ||||
| |Intel Data Center Flex Series| Support| Flex 170| | ||||
| |Intel Arc Series| Support| Arc 770| | ||||
| |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake| | ||||
| |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7| | ||||
|  | ||||
|  | ||||
| ## Linux | ||||
|  | ||||
| ### Setup Environment | ||||
|  | ||||
| 1. Install Intel GPU driver. | ||||
|  | ||||
| a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html). | ||||
|  | ||||
| Note: for iGPU, please install the client GPU driver. | ||||
|  | ||||
| b. Add user to group: video, render. | ||||
|  | ||||
| ``` | ||||
| sudo usermod -aG render username | ||||
| sudo usermod -aG video username | ||||
| ``` | ||||
|  | ||||
| Note: re-login to enable it. | ||||
|  | ||||
| c. Check | ||||
|  | ||||
| ``` | ||||
| sudo apt install clinfo | ||||
| sudo clinfo -l | ||||
| ``` | ||||
|  | ||||
| Output (example): | ||||
|  | ||||
| ``` | ||||
| Platform #0: Intel(R) OpenCL Graphics | ||||
|  `-- Device #0: Intel(R) Arc(TM) A770 Graphics | ||||
|  | ||||
|  | ||||
| Platform #0: Intel(R) OpenCL HD Graphics | ||||
|  `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49] | ||||
| ``` | ||||
|  | ||||
| 2. Install Intel� oneAPI Base toolkit. | ||||
|  | ||||
|  | ||||
| a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html). | ||||
|  | ||||
| Recommend to install to default folder: **/opt/intel/oneapi**. | ||||
|  | ||||
| Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder. | ||||
|  | ||||
| b. Check | ||||
|  | ||||
| ``` | ||||
| source /opt/intel/oneapi/setvars.sh | ||||
|  | ||||
| sycl-ls | ||||
| ``` | ||||
|  | ||||
| There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**. | ||||
|  | ||||
| Output (example): | ||||
| ``` | ||||
| [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000] | ||||
| [opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] | ||||
| [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [23.30.26918.50] | ||||
| [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918] | ||||
|  | ||||
| ``` | ||||
|  | ||||
| 2. Build locally: | ||||
|  | ||||
| ``` | ||||
| mkdir -p build | ||||
| cd build | ||||
| source /opt/intel/oneapi/setvars.sh | ||||
|  | ||||
| #for FP16 | ||||
| #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON  | ||||
|  | ||||
| #for FP32 | ||||
| cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx | ||||
|  | ||||
| #build example/main only | ||||
| #cmake --build . --config Release --target main | ||||
|  | ||||
| #build all binary | ||||
| cmake --build . --config Release -v | ||||
|  | ||||
| ``` | ||||
|  | ||||
| or | ||||
|  | ||||
| ``` | ||||
| ./examples/sycl/build.sh | ||||
| ``` | ||||
|  | ||||
| Note: | ||||
|  | ||||
| - By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. | ||||
|  | ||||
| ### Run | ||||
|  | ||||
| 1. Put model file to folder **models** | ||||
|  | ||||
| 2. Enable oneAPI running environment | ||||
|  | ||||
| ``` | ||||
| source /opt/intel/oneapi/setvars.sh | ||||
| ``` | ||||
|  | ||||
| 3. List device ID | ||||
|  | ||||
| Run without parameter: | ||||
|  | ||||
| ``` | ||||
| ./build/bin/ls-sycl-device | ||||
|  | ||||
| or | ||||
|  | ||||
| ./build/bin/main | ||||
| ``` | ||||
|  | ||||
| Check the ID in startup log, like: | ||||
|  | ||||
| ``` | ||||
| found 4 SYCL devices: | ||||
|   Device 0: Intel(R) Arc(TM) A770 Graphics,	compute capability 1.3, | ||||
|     max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136 | ||||
|   Device 1: Intel(R) FPGA Emulation Device,	compute capability 1.2, | ||||
|     max compute_units 24,	max work group size 67108864,	max sub group size 64,	global mem size 67065057280 | ||||
|   Device 2: 13th Gen Intel(R) Core(TM) i7-13700K,	compute capability 3.0, | ||||
|     max compute_units 24,	max work group size 8192,	max sub group size 64,	global mem size 67065057280 | ||||
|   Device 3: Intel(R) Arc(TM) A770 Graphics,	compute capability 3.0, | ||||
|     max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136 | ||||
|  | ||||
| ``` | ||||
|  | ||||
| |Attribute|Note| | ||||
| |-|-| | ||||
| |compute capability 1.3|Level-zero running time, recommended | | ||||
| |compute capability 3.0|OpenCL running time, slower than level-zero in most cases| | ||||
|  | ||||
| 4. Set device ID and execute whisper.cpp | ||||
|  | ||||
| Set device ID = 0 by **GGML_SYCL_DEVICE=0** | ||||
|  | ||||
| ``` | ||||
| GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav | ||||
| ``` | ||||
| or run by script: | ||||
|  | ||||
| ``` | ||||
| ./examples/sycl/run_whisper.sh | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
| 5. Check the device ID in output | ||||
|  | ||||
| Like: | ||||
| ``` | ||||
| Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## Environment Variable | ||||
|  | ||||
| #### Build | ||||
|  | ||||
| |Name|Value|Function| | ||||
| |-|-|-| | ||||
| |WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.| | ||||
| |WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.| | ||||
| |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path| | ||||
| |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path| | ||||
|  | ||||
| #### Running | ||||
|  | ||||
|  | ||||
| |Name|Value|Function| | ||||
| |-|-|-| | ||||
| |GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output| | ||||
| |GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG| | ||||
|  | ||||
| ## Known Issue | ||||
|  | ||||
| - Error:  `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`. | ||||
|  | ||||
|   Miss to enable oneAPI running environment. | ||||
|  | ||||
|   Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`. | ||||
|  | ||||
|  | ||||
| - Hang during startup | ||||
|  | ||||
|   llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block. | ||||
|  | ||||
|   Solution: add **--no-mmap**. | ||||
|  | ||||
| ## Todo | ||||
|  | ||||
| - Support to build in Windows. | ||||
|  | ||||
| - Support multiple cards. | ||||
							
								
								
									
										5
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| module whisper [system] { | ||||
|     header "whisper.h" | ||||
|     link "whisper" | ||||
|     export * | ||||
| } | ||||
							
								
								
									
										4
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/whisper.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/whisper.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <whisper.h> | ||||
|  | ||||
							
								
								
									
										28
									
								
								packages/app-mobile/android/vendor/whisper.cpp/close-issue.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								packages/app-mobile/android/vendor/whisper.cpp/close-issue.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| name: Close inactive issues | ||||
| on: | ||||
|   schedule: | ||||
|     - cron: "42 0 * * *" | ||||
|  | ||||
| # Fine-grant permission | ||||
| # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token | ||||
| permissions: | ||||
|   issues: write | ||||
|  | ||||
| jobs: | ||||
|   close-issues: | ||||
|     runs-on: ubuntu-latest | ||||
|     permissions: | ||||
|       issues: write | ||||
|       pull-requests: write | ||||
|     steps: | ||||
|       - uses: actions/stale@v5 | ||||
|         with: | ||||
|           exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap" | ||||
|           days-before-issue-stale: 30 | ||||
|           days-before-issue-close: 14 | ||||
|           stale-issue-label: "stale" | ||||
|           close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." | ||||
|           days-before-pr-stale: -1 | ||||
|           days-before-pr-close: -1 | ||||
|           operations-per-run: 10000 | ||||
|           repo-token: ${{ secrets.GITHUB_TOKEN }} | ||||
							
								
								
									
										16
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # Set the default compile features and properties for a target. | ||||
|  | ||||
| if (NOT TARGET) | ||||
|     message(FATAL_ERROR "TARGET not set before including DefaultTargetOptions") | ||||
| endif() | ||||
|  | ||||
| target_compile_features(${TARGET} | ||||
|     PRIVATE | ||||
|         cxx_std_11 | ||||
|     ) | ||||
|  | ||||
| set_target_properties(${TARGET} | ||||
|     PROPERTIES | ||||
|         EXPORT_COMPILE_COMMANDS ON | ||||
|         RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin" | ||||
| ) | ||||
							
								
								
									
										163
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/FindFFmpeg.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/FindFFmpeg.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,163 @@ | ||||
| # From | ||||
| # https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake | ||||
| # | ||||
| # vim: ts=2 sw=2 | ||||
| # - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC) | ||||
| # | ||||
| # Once done this will define | ||||
| #  FFMPEG_FOUND         - System has the all required components. | ||||
| #  FFMPEG_INCLUDE_DIRS  - Include directory necessary for using the required components headers. | ||||
| #  FFMPEG_LIBRARIES     - Link these to use the required ffmpeg components. | ||||
| #  FFMPEG_DEFINITIONS   - Compiler switches required for using the required ffmpeg components. | ||||
| # | ||||
| # For each of the components it will additionally set. | ||||
| #   - AVCODEC | ||||
| #   - AVDEVICE | ||||
| #   - AVFORMAT | ||||
| #   - AVFILTER | ||||
| #   - AVUTIL | ||||
| #   - POSTPROC | ||||
| #   - SWSCALE | ||||
| # the following variables will be defined | ||||
| #  <component>_FOUND        - System has <component> | ||||
| #  <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers | ||||
| #  <component>_LIBRARIES    - Link these to use <component> | ||||
| #  <component>_DEFINITIONS  - Compiler switches required for using <component> | ||||
| #  <component>_VERSION      - The components version | ||||
| # | ||||
| # Copyright (c) 2006, Matthias Kretz, <kretz@kde.org> | ||||
| # Copyright (c) 2008, Alexander Neundorf, <neundorf@kde.org> | ||||
| # Copyright (c) 2011, Michael Jansen, <kde@michael-jansen.biz> | ||||
| # | ||||
| # Redistribution and use is allowed according to the terms of the BSD license. | ||||
| # For details see the accompanying COPYING-CMAKE-SCRIPTS file. | ||||
|  | ||||
| include(FindPackageHandleStandardArgs) | ||||
|  | ||||
| # The default components were taken from a survey over other FindFFMPEG.cmake files | ||||
| if (NOT FFmpeg_FIND_COMPONENTS) | ||||
|   set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE) | ||||
| endif() | ||||
|  | ||||
| # | ||||
| ### Macro: set_component_found | ||||
| # | ||||
| # Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present. | ||||
| # | ||||
| macro(set_component_found _component ) | ||||
|   if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS) | ||||
|     message(DEBUG "  - ${_component} found.") | ||||
|     set(${_component}_FOUND TRUE) | ||||
|   else () | ||||
|   message(DEBUG "  - ${_component} not found.") | ||||
|   endif () | ||||
| endmacro() | ||||
|  | ||||
| # | ||||
| ### Macro: find_component | ||||
| # | ||||
| # Checks for the given component by invoking pkgconfig and then looking up the libraries and | ||||
| # include directories. | ||||
| # | ||||
| macro(find_component _component _pkgconfig _library _header) | ||||
|  | ||||
|   if (NOT WIN32) | ||||
|      # use pkg-config to get the directories and then use these values | ||||
|      # in the FIND_PATH() and FIND_LIBRARY() calls | ||||
|      find_package(PkgConfig) | ||||
|      if (PKG_CONFIG_FOUND) | ||||
|        pkg_check_modules(PC_${_component} ${_pkgconfig}) | ||||
|        message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}") | ||||
|        message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}") | ||||
|        message(STATUS "${PC_${_component}_CFLAGS}") | ||||
|      endif () | ||||
|   endif (NOT WIN32) | ||||
|  | ||||
|  | ||||
|   find_path(${_component}_INCLUDE_DIRS ${_header} | ||||
|     HINTS | ||||
|       ${PC_${_component}_INCLUDEDIR} | ||||
|       ${PC_${_component}_INCLUDE_DIRS} | ||||
|     PATH_SUFFIXES | ||||
|       ffmpeg | ||||
|   ) | ||||
|  | ||||
|   # CMake's default is to search first for shared libraries and then for static libraries. | ||||
|   # Todo later: add option to prefer static libs over dynamic: | ||||
|   find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a | ||||
|       HINTS | ||||
|       ${PC_${_component}_LIBDIR} | ||||
|       ${PC_${_component}_LIBRARY_DIRS} | ||||
|   ) | ||||
|  | ||||
|   set(${_component}_DEFINITIONS  ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.") | ||||
|   set(${_component}_VERSION      ${PC_${_component}_VERSION}      CACHE STRING "The ${_component} version number.") | ||||
|  | ||||
|   set_component_found(${_component}) | ||||
|  | ||||
|   mark_as_advanced( | ||||
|     ${_component}_INCLUDE_DIRS | ||||
|     ${_component}_LIBRARIES | ||||
|     ${_component}_DEFINITIONS | ||||
|     ${_component}_VERSION) | ||||
|  | ||||
| endmacro() | ||||
|  | ||||
|  | ||||
| # Check for cached results. If there are skip the costly part. | ||||
| if (NOT FFMPEG_LIBRARIES) | ||||
|  | ||||
|   # Check for all possible component. | ||||
|   find_component(AVCODEC    libavcodec    avcodec  libavcodec/avcodec.h) | ||||
|   find_component(AVFORMAT   libavformat   avformat libavformat/avformat.h) | ||||
|   find_component(AVDEVICE   libavdevice   avdevice libavdevice/avdevice.h) | ||||
|   #find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample | ||||
|   find_component(AVUTIL     libavutil     avutil   libavutil/avutil.h) | ||||
|   find_component(AVFILTER   libavfilter   avfilter libavfilter/avfilter.h) | ||||
|   find_component(SWSCALE    libswscale    swscale  libswscale/swscale.h) | ||||
|   find_component(POSTPROC   libpostproc   postproc libpostproc/postprocess.h) | ||||
|   find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h) | ||||
|  | ||||
|   # Check if the required components were found and add their stuff to the FFMPEG_* vars. | ||||
|   foreach (_component ${FFmpeg_FIND_COMPONENTS}) | ||||
|     if (${_component}_FOUND) | ||||
|       # message(STATUS "Required component ${_component} present.") | ||||
|       set(FFMPEG_LIBRARIES   ${FFMPEG_LIBRARIES}   ${${_component}_LIBRARIES}) | ||||
|       set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS}) | ||||
|       list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS}) | ||||
|     else () | ||||
|       # message(STATUS "Required component ${_component} missing.") | ||||
|     endif () | ||||
|   endforeach () | ||||
|  | ||||
|   # Build the include path with duplicates removed. | ||||
|   if (FFMPEG_INCLUDE_DIRS) | ||||
|     list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS) | ||||
|   endif () | ||||
|  | ||||
|   # cache the vars. | ||||
|   set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE) | ||||
|   set(FFMPEG_LIBRARIES    ${FFMPEG_LIBRARIES}    CACHE STRING "The FFmpeg libraries." FORCE) | ||||
|   set(FFMPEG_DEFINITIONS  ${FFMPEG_DEFINITIONS}  CACHE STRING "The FFmpeg cflags." FORCE) | ||||
|  | ||||
|   mark_as_advanced(FFMPEG_INCLUDE_DIRS | ||||
|                    FFMPEG_LIBRARIES | ||||
|                    FFMPEG_DEFINITIONS) | ||||
|  | ||||
| endif () | ||||
|  | ||||
| # Now set the noncached _FOUND vars for the components. | ||||
| # whisper.cpp does not need SWSCALE | ||||
| foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS) | ||||
|   set_component_found(${_component}) | ||||
| endforeach () | ||||
|  | ||||
| # Compile the list of required vars | ||||
| set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS) | ||||
| foreach (_component ${FFmpeg_FIND_COMPONENTS}) | ||||
|   list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS) | ||||
| endforeach () | ||||
|  | ||||
| # Give a nice error message if some of the required vars are missing. | ||||
| find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS}) | ||||
|  | ||||
							
								
								
									
										60
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/build-info.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/build-info.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| set(BUILD_NUMBER 0) | ||||
| set(BUILD_COMMIT "unknown") | ||||
| set(BUILD_COMPILER "unknown") | ||||
| set(BUILD_TARGET "unknown") | ||||
|  | ||||
| # Look for git | ||||
| find_package(Git) | ||||
| if(NOT Git_FOUND) | ||||
|     find_program(GIT_EXECUTABLE NAMES git git.exe) | ||||
|     if(GIT_EXECUTABLE) | ||||
|         set(Git_FOUND TRUE) | ||||
|         message(STATUS "Found Git: ${GIT_EXECUTABLE}") | ||||
|     else() | ||||
|         message(WARNING "Git not found. Build info will not be accurate.") | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| # Get the commit count and hash | ||||
| if(Git_FOUND) | ||||
|     execute_process( | ||||
|         COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         OUTPUT_VARIABLE HEAD | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|         RESULT_VARIABLE RES | ||||
|     ) | ||||
|     if (RES EQUAL 0) | ||||
|         set(BUILD_COMMIT ${HEAD}) | ||||
|     endif() | ||||
|     execute_process( | ||||
|         COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         OUTPUT_VARIABLE COUNT | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|         RESULT_VARIABLE RES | ||||
|     ) | ||||
|     if (RES EQUAL 0) | ||||
|         set(BUILD_NUMBER ${COUNT}) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if(MSVC) | ||||
|     set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") | ||||
|     set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME}) | ||||
|     add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>") | ||||
|     add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>") | ||||
| else() | ||||
|     execute_process( | ||||
|         COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER} | ||||
|         OUTPUT_VARIABLE OUT | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|     ) | ||||
|     set(BUILD_COMPILER ${OUT}) | ||||
|     execute_process( | ||||
|         COMMAND ${CMAKE_C_COMPILER} -dumpmachine | ||||
|         OUTPUT_VARIABLE OUT | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|     ) | ||||
|     set(BUILD_TARGET ${OUT}) | ||||
| endif() | ||||
							
								
								
									
										22
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/git-vars.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/git-vars.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| find_package(Git) | ||||
|  | ||||
| # the commit's SHA1 | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_SHA1 | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
|  | ||||
| # the date of the commit | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_DATE | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
|  | ||||
| # the subject of the commit | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" log -1 --format=%s | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_COMMIT_SUBJECT | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
							
								
								
									
										65
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| set(WHISPER_VERSION      @WHISPER_INSTALL_VERSION@) | ||||
| set(WHISPER_BUILD_COMMIT @WHISPER_BUILD_COMMIT@) | ||||
| set(WHISPER_BUILD_NUMBER @WHISPER_BUILD_NUMBER@) | ||||
| set(WHISPER_SHARED_LIB   @BUILD_SHARED_LIBS@) | ||||
|  | ||||
| set(GGML_BLAS       @GGML_BLAS@) | ||||
| set(GGML_CUDA       @GGML_CUDA@) | ||||
| set(GGML_METAL      @GGML_METAL@) | ||||
| set(GGML_HIPBLAS    @GGML_HIPBLAS@) | ||||
| set(GGML_ACCELERATE @GGML_ACCELERATE@) | ||||
|  | ||||
| @PACKAGE_INIT@ | ||||
|  | ||||
| set_and_check(WHISPER_INCLUDE_DIR "@PACKAGE_WHISPER_INCLUDE_INSTALL_DIR@") | ||||
| set_and_check(WHISPER_LIB_DIR     "@PACKAGE_WHISPER_LIB_INSTALL_DIR@") | ||||
| set_and_check(WHISPER_BIN_DIR     "@PACKAGE_WHISPER_BIN_INSTALL_DIR@") | ||||
|  | ||||
| # Ensure transient dependencies satisfied | ||||
|  | ||||
| find_package(Threads REQUIRED) | ||||
|  | ||||
| if (APPLE AND GGML_ACCELERATE) | ||||
|     find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) | ||||
| endif() | ||||
|  | ||||
| if (GGML_BLAS) | ||||
|     find_package(BLAS REQUIRED) | ||||
| endif() | ||||
|  | ||||
| if (GGML_CUDA) | ||||
|     find_package(CUDAToolkit REQUIRED) | ||||
| endif() | ||||
|  | ||||
| if (GGML_METAL) | ||||
|     find_library(FOUNDATION_LIBRARY Foundation REQUIRED) | ||||
|     find_library(METAL_FRAMEWORK Metal REQUIRED) | ||||
|     find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) | ||||
| endif() | ||||
|  | ||||
| if (GGML_HIPBLAS) | ||||
|     find_package(hip REQUIRED) | ||||
|     find_package(hipblas REQUIRED) | ||||
|     find_package(rocblas REQUIRED) | ||||
| endif() | ||||
|  | ||||
| find_library(whisper_LIBRARY whisper | ||||
|     REQUIRED | ||||
|     HINTS ${WHISPER_LIB_DIR}) | ||||
|  | ||||
| set(_whisper_link_deps "Threads::Threads" "@WHISPER_EXTRA_LIBS@") | ||||
| set(_whisper_transient_defines "@WHISPER_TRANSIENT_DEFINES@") | ||||
|  | ||||
| add_library(whisper UNKNOWN IMPORTED) | ||||
|  | ||||
| set_target_properties(whisper | ||||
|     PROPERTIES | ||||
|     INTERFACE_INCLUDE_DIRECTORIES "${WHISPER_INCLUDE_DIR}" | ||||
|         INTERFACE_LINK_LIBRARIES "${_whisper_link_deps}" | ||||
|         INTERFACE_COMPILE_DEFINITIONS "${_whisper_transient_defines}" | ||||
|         IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" | ||||
|         IMPORTED_LOCATION "${whisper_LIBRARY}" | ||||
|         INTERFACE_COMPILE_FEATURES cxx_std_11 | ||||
|         POSITION_INDEPENDENT_CODE ON ) | ||||
|  | ||||
| check_required_components(whisper) | ||||
							
								
								
									
										10
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper.pc.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper.pc.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| prefix=@CMAKE_INSTALL_PREFIX@ | ||||
| exec_prefix=${prefix} | ||||
| libdir=${exec_prefix}/lib | ||||
| includedir=${prefix}/include | ||||
|  | ||||
| Name: whisper | ||||
| Description: Port of OpenAI's Whisper model in C/C++ | ||||
| Version: @PROJECT_VERSION@ | ||||
| Libs: -L${libdir} -lggml  -lggml-base -lwhisper | ||||
| Cflags: -I${includedir} | ||||
							
								
								
									
										1
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| src/ggml-metal-embed.metal | ||||
							
								
								
									
										343
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										343
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,343 @@ | ||||
| cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. | ||||
| project("ggml" C CXX) | ||||
| include(CheckIncludeFileCXX) | ||||
|  | ||||
| set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||||
|  | ||||
| if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) | ||||
|     set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) | ||||
|     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") | ||||
| endif() | ||||
|  | ||||
| if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | ||||
|     set(GGML_STANDALONE ON) | ||||
|  | ||||
|     set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||||
|  | ||||
|     # configure project version | ||||
|     # TODO | ||||
| else() | ||||
|     set(GGML_STANDALONE OFF) | ||||
| endif() | ||||
|  | ||||
| if (EMSCRIPTEN) | ||||
|     set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||||
|  | ||||
|     option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON) | ||||
| else() | ||||
|     if (MINGW) | ||||
|         set(BUILD_SHARED_LIBS_DEFAULT OFF) | ||||
|     else() | ||||
|         set(BUILD_SHARED_LIBS_DEFAULT ON) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| # remove the lib prefix on win32 mingw | ||||
| if (WIN32) | ||||
|     set(CMAKE_STATIC_LIBRARY_PREFIX "") | ||||
|     set(CMAKE_SHARED_LIBRARY_PREFIX "") | ||||
|     set(CMAKE_SHARED_MODULE_PREFIX  "") | ||||
| endif() | ||||
|  | ||||
| option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) | ||||
| option(GGML_BACKEND_DL   "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF) | ||||
|  | ||||
| # | ||||
| # option list | ||||
| # | ||||
|  | ||||
| # TODO: mark all options as advanced when not GGML_STANDALONE | ||||
|  | ||||
| if (APPLE) | ||||
|     set(GGML_METAL_DEFAULT ON) | ||||
|     set(GGML_BLAS_DEFAULT ON) | ||||
|     set(GGML_BLAS_VENDOR_DEFAULT "Apple") | ||||
| else() | ||||
|     set(GGML_METAL_DEFAULT OFF) | ||||
|     set(GGML_BLAS_DEFAULT OFF) | ||||
|     set(GGML_BLAS_VENDOR_DEFAULT "Generic") | ||||
| endif() | ||||
|  | ||||
| if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH}) | ||||
|     message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF") | ||||
|     set(GGML_NATIVE_DEFAULT OFF) | ||||
| else() | ||||
|     set(GGML_NATIVE_DEFAULT ON) | ||||
| endif() | ||||
|  | ||||
| # defaults | ||||
| if (NOT GGML_LLAMAFILE_DEFAULT) | ||||
|     set(GGML_LLAMAFILE_DEFAULT OFF) | ||||
| endif() | ||||
|  | ||||
| if (NOT GGML_CUDA_GRAPHS_DEFAULT) | ||||
|     set(GGML_CUDA_GRAPHS_DEFAULT OFF) | ||||
| endif() | ||||
|  | ||||
| # general | ||||
| option(GGML_STATIC "ggml: static link libraries"                     OFF) | ||||
| option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT}) | ||||
| option(GGML_LTO    "ggml: enable link time optimization"             OFF) | ||||
| option(GGML_CCACHE "ggml: use ccache if available"                   ON) | ||||
|  | ||||
| # debug | ||||
| option(GGML_ALL_WARNINGS           "ggml: enable all compiler warnings"                   ON) | ||||
| option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) | ||||
| option(GGML_GPROF                  "ggml: enable gprof"                                   OFF) | ||||
|  | ||||
| # build | ||||
| option(GGML_FATAL_WARNINGS    "ggml: enable -Werror flag"    OFF) | ||||
|  | ||||
| # sanitizers | ||||
| option(GGML_SANITIZE_THREAD    "ggml: enable thread sanitizer"    OFF) | ||||
| option(GGML_SANITIZE_ADDRESS   "ggml: enable address sanitizer"   OFF) | ||||
| option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) | ||||
|  | ||||
| # instruction set specific | ||||
| if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT) | ||||
|     set(INS_ENB OFF) | ||||
| else() | ||||
|     set(INS_ENB ON) | ||||
| endif() | ||||
|  | ||||
| option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF) | ||||
| option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) | ||||
| option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB}) | ||||
| option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF) | ||||
| option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB}) | ||||
| option(GGML_AVX512           "ggml: enable AVX512F"          OFF) | ||||
| option(GGML_AVX512_VBMI      "ggml: enable AVX512-VBMI"      OFF) | ||||
| option(GGML_AVX512_VNNI      "ggml: enable AVX512-VNNI"      OFF) | ||||
| option(GGML_AVX512_BF16      "ggml: enable AVX512-BF16"      OFF) | ||||
| if (NOT MSVC) | ||||
|     # in MSVC F16C and FMA is implied with AVX2/AVX512 | ||||
|     option(GGML_FMA          "ggml: enable FMA"              ${INS_ENB}) | ||||
|     option(GGML_F16C         "ggml: enable F16C"             ${INS_ENB}) | ||||
|     # MSVC does not seem to support AMX | ||||
|     option(GGML_AMX_TILE     "ggml: enable AMX-TILE"         OFF) | ||||
|     option(GGML_AMX_INT8     "ggml: enable AMX-INT8"         OFF) | ||||
|     option(GGML_AMX_BF16     "ggml: enable AMX-BF16"         OFF) | ||||
| endif() | ||||
| option(GGML_LASX             "ggml: enable lasx"             ON) | ||||
| option(GGML_LSX              "ggml: enable lsx"              ON) | ||||
| option(GGML_RVV              "ggml: enable rvv"              ON) | ||||
|  | ||||
| option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) | ||||
| set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") | ||||
|  | ||||
|  | ||||
| if (WIN32) | ||||
|     set(GGML_WIN_VER "0x602" CACHE STRING   "ggml: Windows version") | ||||
| endif() | ||||
|  | ||||
| # ggml core | ||||
| set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism") | ||||
| option(GGML_CPU                             "ggml: enable CPU backend"                        ON) | ||||
|  | ||||
| # 3rd party libs / backends | ||||
| option(GGML_ACCELERATE                      "ggml: enable Accelerate framework"               ON) | ||||
| option(GGML_BLAS                            "ggml: use BLAS"                                  ${GGML_BLAS_DEFAULT}) | ||||
| set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING | ||||
|                                             "ggml: BLAS library vendor") | ||||
| option(GGML_LLAMAFILE                       "ggml: use LLAMAFILE"                             ${GGML_LLAMAFILE_DEFAULT}) | ||||
|  | ||||
| option(GGML_CUDA                            "ggml: use CUDA"                                  OFF) | ||||
| option(GGML_MUSA                            "ggml: use MUSA"                                  OFF) | ||||
| option(GGML_CUDA_FORCE_MMQ                  "ggml: use mmq kernels instead of cuBLAS"         OFF) | ||||
| option(GGML_CUDA_FORCE_CUBLAS               "ggml: always use cuBLAS instead of mmq kernels"  OFF) | ||||
| option(GGML_CUDA_F16                        "ggml: use 16 bit floats for some calculations"   OFF) | ||||
| set   (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING | ||||
|                                             "ggml: max. batch size for using peer access") | ||||
| option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copies"            OFF) | ||||
| option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF) | ||||
| option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF) | ||||
| option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT}) | ||||
|  | ||||
| option(GGML_HIP                             "ggml: use HIP"                                   OFF) | ||||
| option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF) | ||||
| option(GGML_HIP_NO_VMM                      "ggml: do not try to use HIP VMM"                 ON) | ||||
| option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF) | ||||
| option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF) | ||||
| option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF) | ||||
| option(GGML_VULKAN_DEBUG                    "ggml: enable Vulkan debug output"                OFF) | ||||
| option(GGML_VULKAN_MEMORY_DEBUG             "ggml: enable Vulkan memory debug output"         OFF) | ||||
| option(GGML_VULKAN_SHADER_DEBUG_INFO        "ggml: enable Vulkan shader debug info"           OFF) | ||||
| option(GGML_VULKAN_PERF                     "ggml: enable Vulkan perf output"                 OFF) | ||||
| option(GGML_VULKAN_VALIDATE                 "ggml: enable Vulkan validation"                  OFF) | ||||
| option(GGML_VULKAN_RUN_TESTS                "ggml: run Vulkan tests"                          OFF) | ||||
| option(GGML_KOMPUTE                         "ggml: use Kompute"                               OFF) | ||||
| option(GGML_METAL                           "ggml: use Metal"                                 ${GGML_METAL_DEFAULT}) | ||||
| option(GGML_METAL_USE_BF16                  "ggml: use bfloat if available"                   OFF) | ||||
| option(GGML_METAL_NDEBUG                    "ggml: disable Metal debugging"                   OFF) | ||||
| option(GGML_METAL_SHADER_DEBUG              "ggml: compile Metal with -fno-fast-math"         OFF) | ||||
| option(GGML_METAL_EMBED_LIBRARY             "ggml: embed Metal library"                       ${GGML_METAL}) | ||||
| set   (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING | ||||
|                                             "ggml: metal minimum macOS version") | ||||
| set   (GGML_METAL_STD "" CACHE STRING       "ggml: metal standard version (-std flag)") | ||||
| option(GGML_OPENMP                          "ggml: use OpenMP"                                ON) | ||||
| option(GGML_RPC                             "ggml: use RPC"                                   OFF) | ||||
| option(GGML_SYCL                            "ggml: use SYCL"                                  OFF) | ||||
| option(GGML_SYCL_F16                        "ggml: use 16 bit floats for sycl calculations"   OFF) | ||||
| set   (GGML_SYCL_TARGET "INTEL" CACHE STRING | ||||
|                                             "ggml: sycl target device") | ||||
| set   (GGML_SYCL_DEVICE_ARCH "" CACHE STRING | ||||
|                                             "ggml: sycl device architecture") | ||||
|  | ||||
| option(GGML_OPENCL                          "ggml: use OpenCL"                                OFF) | ||||
| option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increases overhead)" OFF) | ||||
| option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON) | ||||
| option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON) | ||||
|  | ||||
| # toolchain for vulkan-shaders-gen | ||||
| set   (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen") | ||||
|  | ||||
| # extra artifacts | ||||
| option(GGML_BUILD_TESTS    "ggml: build tests"    ${GGML_STANDALONE}) | ||||
| option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) | ||||
|  | ||||
| # | ||||
| # dependencies | ||||
| # | ||||
|  | ||||
| set(CMAKE_C_STANDARD 11) | ||||
| set(CMAKE_C_STANDARD_REQUIRED true) | ||||
|  | ||||
| set(CMAKE_CXX_STANDARD 17) | ||||
| set(CMAKE_CXX_STANDARD_REQUIRED true) | ||||
|  | ||||
| set(THREADS_PREFER_PTHREAD_FLAG ON) | ||||
|  | ||||
| find_package(Threads REQUIRED) | ||||
|  | ||||
| # | ||||
| # build the library | ||||
| # | ||||
|  | ||||
| add_subdirectory(src) | ||||
|  | ||||
| # | ||||
| # tests and examples | ||||
| # | ||||
|  | ||||
| if (GGML_BUILD_TESTS) | ||||
|     enable_testing() | ||||
|     add_subdirectory(tests) | ||||
| endif () | ||||
|  | ||||
| if (GGML_BUILD_EXAMPLES) | ||||
|     add_subdirectory(examples) | ||||
| endif () | ||||
|  | ||||
| # | ||||
| # install | ||||
| # | ||||
|  | ||||
| include(GNUInstallDirs) | ||||
| include(CMakePackageConfigHelpers) | ||||
|  | ||||
| # all public headers | ||||
| set(GGML_PUBLIC_HEADERS | ||||
|     include/ggml.h | ||||
|     include/ggml-cpu.h | ||||
|     include/ggml-alloc.h | ||||
|     include/ggml-backend.h | ||||
|     include/ggml-blas.h | ||||
|     include/ggml-cann.h | ||||
|     include/ggml-cuda.h | ||||
|     include/ggml-kompute.h | ||||
|     include/ggml-opt.h | ||||
|     include/ggml-metal.h | ||||
|     include/ggml-rpc.h | ||||
|     include/ggml-sycl.h | ||||
|     include/ggml-vulkan.h | ||||
|     include/gguf.h) | ||||
|  | ||||
| set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") | ||||
| #if (GGML_METAL) | ||||
| #    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") | ||||
| #endif() | ||||
| install(TARGETS ggml LIBRARY PUBLIC_HEADER) | ||||
| install(TARGETS ggml-base LIBRARY) | ||||
|  | ||||
| if (GGML_STANDALONE) | ||||
|     configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in | ||||
|         ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc | ||||
|         @ONLY) | ||||
|  | ||||
|     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc | ||||
|         DESTINATION share/pkgconfig) | ||||
| endif() | ||||
|  | ||||
| # | ||||
| # Create CMake package | ||||
| # | ||||
|  | ||||
| # Generate version info based on git commit. | ||||
|  | ||||
| if(NOT DEFINED GGML_BUILD_NUMBER) | ||||
|     find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH) | ||||
|     execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         OUTPUT_VARIABLE GGML_BUILD_NUMBER | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|     ) | ||||
|  | ||||
|     if(GGML_BUILD_NUMBER EQUAL 1) | ||||
|         message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.") | ||||
|     endif() | ||||
|  | ||||
|     execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         OUTPUT_VARIABLE GGML_BUILD_COMMIT | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|     ) | ||||
| endif() | ||||
|  | ||||
|  | ||||
| # Capture variables prefixed with GGML_. | ||||
|  | ||||
| set(variable_set_statements | ||||
| " | ||||
| ####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() ####### | ||||
| ####### Any changes to this file will be overwritten by the next CMake run        ####### | ||||
|  | ||||
| ") | ||||
|  | ||||
| set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS}) | ||||
|  | ||||
| get_cmake_property(all_variables VARIABLES) | ||||
| foreach(variable_name IN LISTS all_variables) | ||||
|     if(variable_name MATCHES "^GGML_") | ||||
|         string(REPLACE ";" "\\;" | ||||
|                variable_value "${${variable_name}}") | ||||
|  | ||||
|         set(variable_set_statements | ||||
|             "${variable_set_statements}set(${variable_name} \"${variable_value}\")\n") | ||||
|     endif() | ||||
| endforeach() | ||||
|  | ||||
| set(GGML_VARIABLES_EXPANDED ${variable_set_statements}) | ||||
|  | ||||
| # Create the CMake package and set install location. | ||||
|  | ||||
| set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER}) | ||||
| set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files") | ||||
| set(GGML_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files") | ||||
| set(GGML_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files") | ||||
|  | ||||
| configure_package_config_file( | ||||
|         ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in | ||||
|         ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake | ||||
|     INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml | ||||
|     PATH_VARS GGML_INCLUDE_INSTALL_DIR | ||||
|               GGML_LIB_INSTALL_DIR | ||||
|               GGML_BIN_INSTALL_DIR) | ||||
|  | ||||
| write_basic_package_version_file( | ||||
|         ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake | ||||
|     VERSION ${GGML_INSTALL_VERSION} | ||||
|     COMPATIBILITY SameMajorVersion) | ||||
|  | ||||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake | ||||
|               ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake | ||||
|         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) | ||||
							
								
								
									
										54
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| # Add new build types | ||||
|  | ||||
| # ReleaseGG - Release with enabled asserts | ||||
|  | ||||
| SET(CMAKE_CXX_FLAGS_RELEASEGG | ||||
|     "-O3" | ||||
|     CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_C_FLAGS_RELEASEGG | ||||
|     "-O3" | ||||
|     CACHE STRING "Flags used by the compiler during release builds with enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG | ||||
|     "" | ||||
|     CACHE STRING "Flags used for linking binaries during release builds with enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG | ||||
|     "" | ||||
|     CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts." | ||||
|     FORCE ) | ||||
| MARK_AS_ADVANCED( | ||||
|     CMAKE_CXX_FLAGS_RELEASEGG | ||||
|     CMAKE_C_FLAGS_RELEASEGG | ||||
|     CMAKE_EXE_LINKER_FLAGS_RELEASEGG | ||||
|     CMAKE_SHARED_LINKER_FLAGS_RELEASEGG ) | ||||
|  | ||||
| # RelWithDebInfoGG - RelWithDebInfo with enabled asserts | ||||
|  | ||||
| SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG | ||||
|     "-O2 -g" | ||||
|     CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG | ||||
|     "-O2 -g" | ||||
|     CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG | ||||
|     "" | ||||
|     CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts." | ||||
|     FORCE ) | ||||
| SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG | ||||
|     "" | ||||
|     CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts." | ||||
|     FORCE ) | ||||
| MARK_AS_ADVANCED( | ||||
|     CMAKE_CXX_FLAGS_RELWITHDEBINFOGG | ||||
|     CMAKE_C_FLAGS_RELWITHDEBINFOGG | ||||
|     CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG | ||||
|     CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG ) | ||||
|  | ||||
| if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) | ||||
|     set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) | ||||
|     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG") | ||||
| endif() | ||||
							
								
								
									
										22
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/GitVars.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/GitVars.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| find_package(Git) | ||||
|  | ||||
| # the commit's SHA1 | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_SHA1 | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
|  | ||||
| # the date of the commit | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_DATE | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
|  | ||||
| # the subject of the commit | ||||
| execute_process(COMMAND | ||||
|     "${GIT_EXECUTABLE}" log -1 --format=%s | ||||
|     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" | ||||
|     OUTPUT_VARIABLE GIT_COMMIT_SUBJECT | ||||
|     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
							
								
								
									
										147
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | ||||
|  | ||||
| @GGML_VARIABLES_EXPANDED@ | ||||
|  | ||||
| @PACKAGE_INIT@ | ||||
|  | ||||
| set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@") | ||||
| set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@") | ||||
| set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@") | ||||
|  | ||||
| find_package(Threads REQUIRED) | ||||
|  | ||||
| find_library(GGML_LIBRARY ggml | ||||
|     REQUIRED | ||||
|     HINTS ${GGML_LIB_DIR} | ||||
|     NO_CMAKE_FIND_ROOT_PATH) | ||||
|  | ||||
| add_library(ggml::ggml UNKNOWN IMPORTED) | ||||
| set_target_properties(ggml::ggml | ||||
|     PROPERTIES | ||||
|         IMPORTED_LOCATION "${GGML_LIBRARY}") | ||||
|  | ||||
| find_library(GGML_BASE_LIBRARY ggml-base | ||||
|     REQUIRED | ||||
|     HINTS ${GGML_LIB_DIR} | ||||
|     NO_CMAKE_FIND_ROOT_PATH) | ||||
|  | ||||
| add_library(ggml::ggml-base UNKNOWN IMPORTED) | ||||
| set_target_properties(ggml::ggml-base | ||||
|     PROPERTIES | ||||
|         IMPORTED_LOCATION "${GGML_BASE_LIBRARY}") | ||||
|  | ||||
| if (NOT GGML_SHARED_LIB) | ||||
|     if (APPLE AND GGML_ACCELERATE) | ||||
|         find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK}) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_OPENMP) | ||||
|         find_package(OpenMP REQUIRED) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_CPU_HBM) | ||||
|         find_library(memkind memkind REQUIRED) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_BLAS) | ||||
|         find_package(BLAS REQUIRED) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES}) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS   ${BLAS_LINKER_FLAGS}) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_CUDA) | ||||
|         find_package(CUDAToolkit REQUIRED) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_METAL) | ||||
|         find_library(FOUNDATION_LIBRARY Foundation REQUIRED) | ||||
|         find_library(METAL_FRAMEWORK    Metal REQUIRED) | ||||
|         find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) | ||||
|  | ||||
|         list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES | ||||
|                     ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK}) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_VULKAN) | ||||
|         find_package(Vulkan REQUIRED) | ||||
|         list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_HIP) | ||||
|         find_package(hip     REQUIRED) | ||||
|         find_package(hipblas REQUIRED) | ||||
|         find_package(rocblas REQUIRED) | ||||
|         list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_SYCL) | ||||
|         find_package(DNNL) | ||||
|         if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") | ||||
|             list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl) | ||||
|         endif() | ||||
|         if (WIN32) | ||||
|             find_package(IntelSYCL REQUIRED) | ||||
|             find_package(MKL       REQUIRED) | ||||
|             list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) | ||||
|         endif() | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| set(_ggml_all_targets "") | ||||
| foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS}) | ||||
|     string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}") | ||||
|     string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx) | ||||
|  | ||||
|     find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend} | ||||
|         REQUIRED | ||||
|         HINTS ${GGML_LIB_DIR} | ||||
|         NO_CMAKE_FIND_ROOT_PATH) | ||||
|  | ||||
|     message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}") | ||||
|  | ||||
|     add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED) | ||||
|     set_target_properties(ggml::${_ggml_backend} | ||||
|         PROPERTIES | ||||
|             INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}" | ||||
|             IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" | ||||
|             IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}" | ||||
|             INTERFACE_COMPILE_FEATURES c_std_90 | ||||
|             POSITION_INDEPENDENT_CODE ON) | ||||
|  | ||||
|     string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}") | ||||
|     if(is_cpu_variant) | ||||
|         list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base") | ||||
|         set_target_properties(ggml::${_ggml_backend} | ||||
|            PROPERTIES | ||||
|                INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}") | ||||
|  | ||||
|         if(GGML_CPU_INTERFACE_LINK_OPTIONS) | ||||
|             set_target_properties(ggml::${_ggml_backend} | ||||
|                 PROPERTIES | ||||
|                     INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}") | ||||
|         endif() | ||||
|  | ||||
|     else() | ||||
|         list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base") | ||||
|         set_target_properties(ggml::${_ggml_backend} | ||||
|             PROPERTIES | ||||
|                 INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}") | ||||
|  | ||||
|         if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS) | ||||
|             set_target_properties(ggml::${_ggml_backend} | ||||
|                 PROPERTIES | ||||
|                     INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}") | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     list(APPEND _ggml_all_targets ggml::${_ggml_backend}) | ||||
| endforeach() | ||||
|  | ||||
| add_library(ggml::all INTERFACE IMPORTED) | ||||
| set_target_properties(ggml::all | ||||
|     PROPERTIES | ||||
|         INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}") | ||||
|  | ||||
| check_required_components(ggml) | ||||
							
								
								
									
										76
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-alloc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-alloc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; | ||||
| typedef struct      ggml_backend_buffer * ggml_backend_buffer_t; | ||||
| typedef struct             ggml_backend * ggml_backend_t; | ||||
|  | ||||
| // Tensor allocator | ||||
| struct ggml_tallocr { | ||||
|     ggml_backend_buffer_t buffer; | ||||
|     void * base; | ||||
|     size_t alignment; | ||||
|     size_t offset; | ||||
| }; | ||||
|  | ||||
| GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); | ||||
| GGML_API void                ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); | ||||
|  | ||||
| // Graph allocator | ||||
| /* | ||||
|   Example usage: | ||||
|     ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); | ||||
|  | ||||
|     // optional: create a worst-case graph and reserve the buffers to avoid reallocations | ||||
|     ggml_gallocr_reserve(galloc, build_graph(max_batch)); | ||||
|  | ||||
|     // allocate the graph | ||||
|     struct ggml_cgraph * graph = build_graph(batch); | ||||
|     ggml_gallocr_alloc_graph(galloc, graph); | ||||
|  | ||||
|     printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); | ||||
|  | ||||
|     // evaluate the graph | ||||
|     ggml_backend_graph_compute(backend, graph); | ||||
| */ | ||||
|  | ||||
| // special tensor flags for use with the graph allocator: | ||||
| //   ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses | ||||
| //   ggml_set_output(): output tensors are never freed and never overwritten | ||||
|  | ||||
| typedef struct ggml_gallocr * ggml_gallocr_t; | ||||
|  | ||||
| GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); | ||||
| GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); | ||||
| GGML_API void           ggml_gallocr_free(ggml_gallocr_t galloc); | ||||
|  | ||||
| // pre-allocate buffers from a measure graph - does not allocate or modify the graph | ||||
| // call with a worst-case graph to avoid buffer reallocations | ||||
| // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed | ||||
| // returns false if the buffer allocation failed | ||||
| GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); | ||||
| GGML_API bool ggml_gallocr_reserve_n( | ||||
|     ggml_gallocr_t galloc, | ||||
|     struct ggml_cgraph * graph, | ||||
|     const int * node_buffer_ids, | ||||
|     const int * leaf_buffer_ids); | ||||
|  | ||||
| // automatic reallocation if the topology changes when using a single buffer | ||||
| // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) | ||||
| GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); | ||||
|  | ||||
| GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); | ||||
|  | ||||
| // Utils | ||||
| // Create a buffer and allocate all the tensors in a ggml_context | ||||
| GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); | ||||
| GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										354
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-backend.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										354
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-backend.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,354 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-alloc.h" | ||||
|  | ||||
| #ifdef GGML_BACKEND_SHARED | ||||
| #    if defined(_WIN32) && !defined(__MINGW32__) | ||||
| #        ifdef GGML_BACKEND_BUILD | ||||
| #            define GGML_BACKEND_API __declspec(dllexport) extern | ||||
| #        else | ||||
| #            define GGML_BACKEND_API __declspec(dllimport) extern | ||||
| #        endif | ||||
| #    else | ||||
| #        define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern | ||||
| #    endif | ||||
| #else | ||||
| #    define GGML_BACKEND_API extern | ||||
| #endif | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|     typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; | ||||
|     typedef struct ggml_backend_buffer * ggml_backend_buffer_t; | ||||
|     typedef struct ggml_backend_event * ggml_backend_event_t; | ||||
|     typedef struct ggml_backend * ggml_backend_t; | ||||
|     typedef void * ggml_backend_graph_plan_t; | ||||
|     typedef struct ggml_backend_reg * ggml_backend_reg_t; | ||||
|     typedef struct ggml_backend_device * ggml_backend_dev_t; | ||||
|  | ||||
|  | ||||
|     // | ||||
|     // Backend buffer type | ||||
|     // | ||||
|  | ||||
|     GGML_API const char *          ggml_backend_buft_name          (ggml_backend_buffer_type_t buft); | ||||
|     GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer  (ggml_backend_buffer_type_t buft, size_t size); | ||||
|     GGML_API size_t                ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); | ||||
|     GGML_API size_t                ggml_backend_buft_get_max_size  (ggml_backend_buffer_type_t buft); | ||||
|     GGML_API size_t                ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); | ||||
|     GGML_API bool                  ggml_backend_buft_is_host       (ggml_backend_buffer_type_t buft); | ||||
|     GGML_API ggml_backend_dev_t    ggml_backend_buft_get_device    (ggml_backend_buffer_type_t buft); | ||||
|  | ||||
|     // | ||||
|     // Backend buffer | ||||
|     // | ||||
|  | ||||
|     enum ggml_backend_buffer_usage { | ||||
|         GGML_BACKEND_BUFFER_USAGE_ANY = 0, | ||||
|         GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1, | ||||
|         GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2, | ||||
|     }; | ||||
|  | ||||
|     GGML_API const char *                   ggml_backend_buffer_name          (ggml_backend_buffer_t buffer); | ||||
|     GGML_API void                           ggml_backend_buffer_free          (ggml_backend_buffer_t buffer); | ||||
|     GGML_API void *                         ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer); | ||||
|     GGML_API size_t                         ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer); | ||||
|     GGML_API void                           ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); | ||||
|     GGML_API size_t                         ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); | ||||
|     GGML_API size_t                         ggml_backend_buffer_get_max_size  (ggml_backend_buffer_t buffer); | ||||
|     GGML_API size_t                         ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); | ||||
|     GGML_API void                           ggml_backend_buffer_clear         (ggml_backend_buffer_t buffer, uint8_t value); | ||||
|     GGML_API bool                           ggml_backend_buffer_is_host       (ggml_backend_buffer_t buffer); | ||||
|     GGML_API void                           ggml_backend_buffer_set_usage     (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); | ||||
|     GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage     (ggml_backend_buffer_t buffer); | ||||
|     GGML_API ggml_backend_buffer_type_t     ggml_backend_buffer_get_type      (ggml_backend_buffer_t buffer); | ||||
|     GGML_API void                           ggml_backend_buffer_reset         (ggml_backend_buffer_t buffer); | ||||
|  | ||||
|     // tensor copy between different backends | ||||
|     GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); | ||||
|  | ||||
|     // | ||||
|     // Backend (stream) | ||||
|     // | ||||
|  | ||||
|     GGML_API ggml_guid_t  ggml_backend_guid(ggml_backend_t backend); | ||||
|     GGML_API const char * ggml_backend_name(ggml_backend_t backend); | ||||
|     GGML_API void         ggml_backend_free(ggml_backend_t backend); | ||||
|  | ||||
|     GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend); | ||||
|     GGML_API ggml_backend_buffer_t      ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); | ||||
|     GGML_API size_t                     ggml_backend_get_alignment(ggml_backend_t backend); | ||||
|     GGML_API size_t                     ggml_backend_get_max_size(ggml_backend_t backend); | ||||
|  | ||||
|     GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|     GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size); | ||||
|  | ||||
|     // "offset" refers to the offset in tensor->data for setting/getting data | ||||
|     GGML_API void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|     GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size); | ||||
|     GGML_API void ggml_backend_tensor_memset(   struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size); | ||||
|  | ||||
|     GGML_API void ggml_backend_synchronize(ggml_backend_t backend); | ||||
|  | ||||
|     GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||
|     GGML_API void                      ggml_backend_graph_plan_free  (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||
|  | ||||
|     GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||
|     GGML_API enum ggml_status ggml_backend_graph_compute      (ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||
|     GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||
|  | ||||
|     // NOTE: will be removed, use device version instead | ||||
|     GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); | ||||
|     GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft); | ||||
|     GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op); | ||||
|  | ||||
|     // asynchronous copy | ||||
|     // the copy is performed after all the currently queued operations in backend_src | ||||
|     // backend_dst will wait for the copy to complete before performing other operations | ||||
|     // automatic fallback to sync copy if async is not supported | ||||
|     GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst); | ||||
|  | ||||
|     GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend); | ||||
|  | ||||
|     // | ||||
|     // Events | ||||
|     // | ||||
|  | ||||
|     GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device); | ||||
|     GGML_API void                 ggml_backend_event_free(ggml_backend_event_t event); | ||||
|     GGML_API void                 ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend); | ||||
|     GGML_API void                 ggml_backend_event_synchronize(ggml_backend_event_t event); | ||||
|     GGML_API void                 ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event); | ||||
|  | ||||
|     // | ||||
|     // Backend device | ||||
|     // | ||||
|  | ||||
|     enum ggml_backend_dev_type { | ||||
|         // CPU device using system memory | ||||
|         GGML_BACKEND_DEVICE_TYPE_CPU, | ||||
|         // GPU device using dedicated memory | ||||
|         GGML_BACKEND_DEVICE_TYPE_GPU, | ||||
|         // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX) | ||||
|         GGML_BACKEND_DEVICE_TYPE_ACCEL | ||||
|     }; | ||||
|  | ||||
|     // functionality supported by the device | ||||
|     struct ggml_backend_dev_caps { | ||||
|         // asynchronous operations | ||||
|         bool async; | ||||
|         // pinned host buffer | ||||
|         bool host_buffer; | ||||
|         // creating buffers from host ptr | ||||
|         bool buffer_from_host_ptr; | ||||
|         // event synchronization | ||||
|         bool events; | ||||
|     }; | ||||
|  | ||||
|     // all the device properties | ||||
|     struct ggml_backend_dev_props { | ||||
|         const char * name; | ||||
|         const char * description; | ||||
|         size_t memory_free; | ||||
|         size_t memory_total; | ||||
|         enum ggml_backend_dev_type type; | ||||
|         struct ggml_backend_dev_caps caps; | ||||
|     }; | ||||
|  | ||||
|     GGML_API const char *                  ggml_backend_dev_name(ggml_backend_dev_t device); | ||||
|     GGML_API const char *                  ggml_backend_dev_description(ggml_backend_dev_t device); | ||||
|     GGML_API void                          ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total); | ||||
|     GGML_API enum ggml_backend_dev_type    ggml_backend_dev_type(ggml_backend_dev_t device); | ||||
|     GGML_API void                          ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props); | ||||
|     GGML_API ggml_backend_reg_t            ggml_backend_dev_backend_reg(ggml_backend_dev_t device); | ||||
|     GGML_API ggml_backend_t                ggml_backend_dev_init(ggml_backend_dev_t device, const char * params); | ||||
|     GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_buffer_type(ggml_backend_dev_t device); | ||||
|     GGML_API ggml_backend_buffer_type_t    ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); | ||||
|     GGML_API ggml_backend_buffer_t         ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); | ||||
|  | ||||
|     GGML_API bool                          ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op); | ||||
|     GGML_API bool                          ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft); | ||||
|     GGML_API bool                          ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op); | ||||
|  | ||||
|     // | ||||
|     // Backend (reg) | ||||
|     // | ||||
|  | ||||
|     GGML_API const char *       ggml_backend_reg_name(ggml_backend_reg_t reg); | ||||
|     GGML_API size_t             ggml_backend_reg_dev_count(ggml_backend_reg_t reg); | ||||
|     GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index); | ||||
|     GGML_API void *             ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name); | ||||
|  | ||||
|     // Common functions that may be obtained using ggml_backend_reg_get_proc_address | ||||
|  | ||||
|     // Split buffer type for tensor parallelism | ||||
|     typedef ggml_backend_buffer_type_t   (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split); | ||||
|     // Set the number of threads for the backend | ||||
|     typedef void                         (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads); | ||||
|     // Get additional buffer types provided by the device (returns a NULL-terminated array) | ||||
|     typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device); | ||||
|     // Set the abort callback for the backend | ||||
|     typedef void                         (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data); | ||||
|     // Get a list of feature flags supported by the backend (returns a NULL-terminated array) | ||||
|     struct ggml_backend_feature { | ||||
|         const char * name; | ||||
|         const char * value; | ||||
|     }; | ||||
|     typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg); | ||||
|  | ||||
|     // | ||||
|     // Backend registry | ||||
|     // | ||||
|  | ||||
|     GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); | ||||
|  | ||||
|     // Backend (reg) enumeration | ||||
|     GGML_API size_t             ggml_backend_reg_count(void); | ||||
|     GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index); | ||||
|     GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); | ||||
|  | ||||
|     // Device enumeration | ||||
|     GGML_API size_t             ggml_backend_dev_count(void); | ||||
|     GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index); | ||||
|     GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); | ||||
|     GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type); | ||||
|  | ||||
|     // Direct backend (stream) initialization | ||||
|     // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params) | ||||
|     GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params); | ||||
|     // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params) | ||||
|     GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params); | ||||
|     // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL) | ||||
|     GGML_API ggml_backend_t ggml_backend_init_best(void); | ||||
|  | ||||
|     // Load a backend from a dynamic library and register it | ||||
|     GGML_API ggml_backend_reg_t ggml_backend_load(const char * path); | ||||
|     // Unload a backend if loaded dynamically and unregister it | ||||
|     GGML_API void               ggml_backend_unload(ggml_backend_reg_t reg); | ||||
|     // Load all known backends from dynamic libraries | ||||
|     GGML_API void               ggml_backend_load_all(void); | ||||
|     GGML_API void               ggml_backend_load_all_from_path(const char * dir_path); | ||||
|  | ||||
|     // | ||||
|     // Backend scheduler | ||||
|     // | ||||
|  | ||||
|     // The backend scheduler allows for multiple backend devices to be used together | ||||
|     // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends | ||||
|     // The backends are selected based on: | ||||
|     // - the backend that supports the operation | ||||
|     // - the location of the pre-allocated tensors (e.g. the weights) | ||||
|     /* | ||||
|       Example usage: | ||||
|  | ||||
|         // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned | ||||
|         // preferrably to run on the same backend as the buffer | ||||
|         ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); | ||||
|  | ||||
|         sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false); | ||||
|  | ||||
|         // initialize buffers from a max size graph (optional) | ||||
|         reserve_graph = build_graph(sched, max_batch_size); | ||||
|  | ||||
|         // manually assign nodes to a backend (optional, should not be needed in most cases) | ||||
|         struct ggml_tensor * node = ggml_mul_mat(ctx, ...); | ||||
|         ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu); | ||||
|  | ||||
|         ggml_backend_sched_reserve(sched, reserve_graph); | ||||
|  | ||||
|         // compute | ||||
|         graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation | ||||
|         for (int i = 0; i < 10; ++i) { | ||||
|             ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically | ||||
|         } | ||||
|  | ||||
|         // if there are graph inputs: | ||||
|         graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called) | ||||
|         ggml_backend_sched_reset(sched); // clear the allocation of the previous graph | ||||
|         ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it | ||||
|         ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors | ||||
|         ggml_backend_sched_graph_compute(sched, graph); // execute the graph | ||||
|  | ||||
|         // as an alternative to the above it is also possible to assign the inputs to a dedicated context and | ||||
|         // allocate them statically via ggml_backend_alloc_ctx_tensors | ||||
|     } | ||||
|     */ | ||||
|  | ||||
|     typedef struct ggml_backend_sched * ggml_backend_sched_t; | ||||
|  | ||||
|     // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback) | ||||
|     // when ask == true, the scheduler wants to know if the user wants to observe this node | ||||
|     // this allows the scheduler to batch nodes together in order to evaluate them in a single call | ||||
|     // | ||||
|     // when ask == false, the scheduler is passing the node tensor to the user for observation | ||||
|     // if the user returns false, the scheduler will cancel the graph compute | ||||
|     // | ||||
|     typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); | ||||
|  | ||||
|     // Initialize a backend scheduler, backends with low index are given priority over backends with high index | ||||
|     GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel); | ||||
|     GGML_API void                 ggml_backend_sched_free(ggml_backend_sched_t sched); | ||||
|  | ||||
|     // Initialize backend buffers from a measure graph | ||||
|     GGML_API bool                 ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success | ||||
|  | ||||
|     GGML_API int                  ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched); | ||||
|     GGML_API ggml_backend_t       ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i); | ||||
|  | ||||
|     // Get the number of splits of the last graph | ||||
|     GGML_API int                  ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); | ||||
|     GGML_API int                  ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched); | ||||
|  | ||||
|     GGML_API size_t               ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); | ||||
|  | ||||
|     GGML_API void                 ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); | ||||
|     GGML_API ggml_backend_t       ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); | ||||
|  | ||||
|     // Allocate and compute graph on the backend scheduler | ||||
|     GGML_API bool                 ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success | ||||
|     GGML_API enum ggml_status     ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); | ||||
|     GGML_API enum ggml_status     ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph); | ||||
|     GGML_API void                 ggml_backend_sched_synchronize(ggml_backend_sched_t sched); | ||||
|  | ||||
|     // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. | ||||
|     // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. | ||||
|     // The correct way to use this API is to discard the deallocated tensors and create new ones. | ||||
|     GGML_API void                 ggml_backend_sched_reset(ggml_backend_sched_t sched); | ||||
|  | ||||
|     // Set a callback to be called for each resulting node during graph compute | ||||
|     GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data); | ||||
|  | ||||
|     // | ||||
|     // Utils | ||||
|     // | ||||
|  | ||||
|     struct ggml_backend_graph_copy { | ||||
|         ggml_backend_buffer_t buffer; | ||||
|         struct ggml_context * ctx_allocated; | ||||
|         struct ggml_context * ctx_unallocated; | ||||
|         struct ggml_cgraph * graph; | ||||
|     }; | ||||
|  | ||||
|     // Copy a graph to a different backend | ||||
|     GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph); | ||||
|     GGML_API void                           ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy); | ||||
|  | ||||
|     typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); | ||||
|  | ||||
|     // Compare the output of two backends | ||||
|     GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data); | ||||
|  | ||||
|     // Tensor initialization | ||||
|     GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); | ||||
|     GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor); | ||||
|  | ||||
|     // CPU buffer types are always available | ||||
|     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); | ||||
|     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										25
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-blas.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-blas.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // backend API | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); | ||||
|  | ||||
| // number of threads used for conversion to float | ||||
| // for openblas and blis, this will also set the number of threads used for blas operations | ||||
| GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); | ||||
|  | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										123
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cann.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cann.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,123 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023-2024 The ggml authors | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| /** | ||||
|  * @brief Maximum number of CANN devices supported. | ||||
|  */ | ||||
| #define GGML_CANN_MAX_DEVICES 16 | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void); | ||||
|  | ||||
| /** | ||||
|  * @brief Initializes the CANN backend for a specified device. | ||||
|  * | ||||
|  * This function initializes the CANN backend for the given device. | ||||
|  * It verifies the device index, allocates a context, and creates a backend | ||||
|  * instance. | ||||
|  * | ||||
|  * @param device The index of the device to initialize. | ||||
|  * @return A pointer to the initialized backend instance, or nullptr on failure. | ||||
|  */ | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device); | ||||
|  | ||||
| /** | ||||
|  * @brief Checks if a given backend is a CANN backend. | ||||
|  * | ||||
|  * This function verifies if the provided backend is a CANN backend by comparing | ||||
|  * its GUID with the CANN backend's GUID. | ||||
|  * | ||||
|  * @param backend The backend instance to check. | ||||
|  * @return True if the backend is a CANN backend, false otherwise. | ||||
|  */ | ||||
| GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend); | ||||
|  | ||||
| /** | ||||
|  * @brief Retrieves the CANN buffer type for a specified device. | ||||
|  * | ||||
|  * This function initializes and returns the buffer type interface associated | ||||
|  * with the given device. It ensures thread-safe access using a mutex. | ||||
|  * | ||||
|  * @param device The device index for which to retrieve the buffer type. | ||||
|  * @return A pointer to the buffer type interface for the specified device, or | ||||
|  * nullptr if the device index is out of range. | ||||
|  */ | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t | ||||
| ggml_backend_cann_buffer_type(int32_t device); | ||||
|  | ||||
| /** | ||||
|  * @brief Retrieves the number of CANN devices available. | ||||
|  * | ||||
|  * This function returns the number of CANN devices available based on | ||||
|  * information obtained from `ggml_cann_info()`. | ||||
|  * | ||||
|  * @return The number of CANN devices available. | ||||
|  */ | ||||
| GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void); | ||||
|  | ||||
| /** | ||||
|  * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU. | ||||
|  * | ||||
|  * @return A pointer to the host buffer type interface. | ||||
|  */ | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void); | ||||
|  | ||||
| /** | ||||
|  * @brief Retrieves the description of a specific CANN device. | ||||
|  * | ||||
|  * This function sets the specified device, retrieves the SoC name, | ||||
|  * and writes it into the provided description buffer. | ||||
|  * | ||||
|  * @param device The device index to retrieve the description for. | ||||
|  * @param description Pointer to a buffer where the description will be written. | ||||
|  * @param description_size Size of the description buffer. | ||||
|  */ | ||||
| GGML_BACKEND_API void ggml_backend_cann_get_device_description( | ||||
|     int32_t device, char* description, size_t description_size); | ||||
|  | ||||
| /** | ||||
|  * @brief Retrieves the memory information of a specific CANN device. | ||||
|  * | ||||
|  * This function sets the specified device, retrieves the free and total | ||||
|  * memory information of the specified type (ACL_HBM_MEM), and stores them | ||||
|  * in the provided pointers. | ||||
|  * | ||||
|  * @param device The device index to retrieve memory information for. | ||||
|  * @param free Pointer to a variable where the free memory size will be stored. | ||||
|  * @param total Pointer to a variable where the total memory size will be | ||||
|  * stored. | ||||
|  */ | ||||
| GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device, | ||||
|                                                   size_t* free, | ||||
|                                                   size_t* total); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										39
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpp.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpp.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| #pragma once | ||||
|  | ||||
| #ifndef __cplusplus | ||||
| #error "This header is for C++ only" | ||||
| #endif | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-alloc.h" | ||||
| #include "ggml-backend.h" | ||||
| #include "gguf.h" | ||||
| #include <memory> | ||||
|  | ||||
| // Smart pointers for ggml types | ||||
|  | ||||
| // ggml | ||||
|  | ||||
| struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } }; | ||||
| struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } }; | ||||
|  | ||||
| typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr; | ||||
| typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr; | ||||
|  | ||||
| // ggml-alloc | ||||
|  | ||||
| struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } }; | ||||
|  | ||||
| typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr; | ||||
|  | ||||
| // ggml-backend | ||||
|  | ||||
| struct ggml_backend_deleter        { void operator()(ggml_backend_t backend)       { ggml_backend_free(backend); } }; | ||||
| struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } }; | ||||
| struct ggml_backend_event_deleter  { void operator()(ggml_backend_event_t event)   { ggml_backend_event_free(event); } }; | ||||
| struct ggml_backend_sched_deleter  { void operator()(ggml_backend_sched_t sched)   { ggml_backend_sched_free(sched); } }; | ||||
|  | ||||
| typedef std::unique_ptr<ggml_backend,        ggml_backend_deleter>        ggml_backend_ptr; | ||||
| typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr; | ||||
| typedef std::unique_ptr<ggml_backend_event,  ggml_backend_event_deleter>  ggml_backend_event_ptr; | ||||
| typedef std::unique_ptr<ggml_backend_sched,  ggml_backend_sched_deleter>  ggml_backend_sched_ptr; | ||||
							
								
								
									
										135
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpu.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cpu.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|     // the compute plan that needs to be prepared for ggml_graph_compute() | ||||
|     // since https://github.com/ggerganov/ggml/issues/287 | ||||
|     struct ggml_cplan { | ||||
|         size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()` | ||||
|         uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` | ||||
|  | ||||
|         int n_threads; | ||||
|         struct ggml_threadpool * threadpool; | ||||
|  | ||||
|         // abort ggml_graph_compute when true | ||||
|         ggml_abort_callback abort_callback; | ||||
|         void *              abort_callback_data; | ||||
|     }; | ||||
|  | ||||
|     // numa strategies | ||||
|     enum ggml_numa_strategy { | ||||
|         GGML_NUMA_STRATEGY_DISABLED   = 0, | ||||
|         GGML_NUMA_STRATEGY_DISTRIBUTE = 1, | ||||
|         GGML_NUMA_STRATEGY_ISOLATE    = 2, | ||||
|         GGML_NUMA_STRATEGY_NUMACTL    = 3, | ||||
|         GGML_NUMA_STRATEGY_MIRROR     = 4, | ||||
|         GGML_NUMA_STRATEGY_COUNT | ||||
|     }; | ||||
|  | ||||
|     GGML_BACKEND_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems | ||||
|     GGML_BACKEND_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node | ||||
|  | ||||
|     GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value); | ||||
|     GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); | ||||
|  | ||||
|     GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); | ||||
|     GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value); | ||||
|  | ||||
|     GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i); | ||||
|     GGML_BACKEND_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value); | ||||
|  | ||||
|     GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); | ||||
|     GGML_BACKEND_API void    ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value); | ||||
|  | ||||
|     GGML_BACKEND_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i); | ||||
|     GGML_BACKEND_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value); | ||||
|  | ||||
|     GGML_BACKEND_API float   ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); | ||||
|     GGML_BACKEND_API void    ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value); | ||||
|  | ||||
|     GGML_BACKEND_API struct ggml_threadpool *      ggml_threadpool_new           (struct ggml_threadpool_params  * params); | ||||
|     GGML_BACKEND_API void                          ggml_threadpool_free          (struct ggml_threadpool * threadpool); | ||||
|     GGML_BACKEND_API int                           ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool); | ||||
|     GGML_BACKEND_API void                          ggml_threadpool_pause         (struct ggml_threadpool * threadpool); | ||||
|     GGML_BACKEND_API void                          ggml_threadpool_resume        (struct ggml_threadpool * threadpool); | ||||
|  | ||||
|     // ggml_graph_plan() has to be called before ggml_graph_compute() | ||||
|     // when plan.work_size > 0, caller must allocate memory for plan.work_data | ||||
|     GGML_BACKEND_API struct ggml_cplan ggml_graph_plan( | ||||
|                   const struct ggml_cgraph * cgraph, | ||||
|                                        int   n_threads, /* = GGML_DEFAULT_N_THREADS */ | ||||
|                     struct ggml_threadpool * threadpool /* = NULL */ ); | ||||
|     GGML_BACKEND_API enum ggml_status  ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); | ||||
|  | ||||
|     // same as ggml_graph_compute() but the work data is allocated as a part of the context | ||||
|     // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data | ||||
|     GGML_BACKEND_API enum ggml_status  ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); | ||||
|  | ||||
|     // | ||||
|     // system info | ||||
|     // | ||||
|  | ||||
|     // x86 | ||||
|     GGML_BACKEND_API int ggml_cpu_has_sse3       (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_ssse3      (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx        (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx2       (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_f16c       (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_fma        (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx512     (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_amx_int8   (void); | ||||
|     // ARM | ||||
|     GGML_BACKEND_API int ggml_cpu_has_neon       (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_arm_fma    (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_fp16_va    (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_dotprod    (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_sve        (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes | ||||
|     // other | ||||
|     GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_vsx        (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void); | ||||
|     GGML_BACKEND_API int ggml_cpu_has_llamafile  (void); | ||||
|  | ||||
|     // Internal types and functions exposed for tests and benchmarks | ||||
|  | ||||
|     typedef void (*ggml_vec_dot_t)  (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx, | ||||
|                                        const void * GGML_RESTRICT y, size_t by, int nrc); | ||||
|  | ||||
|     struct ggml_type_traits_cpu { | ||||
|         ggml_from_float_t        from_float; | ||||
|         ggml_vec_dot_t           vec_dot; | ||||
|         enum ggml_type           vec_dot_type; | ||||
|         int64_t                  nrows; // number of rows to process simultaneously | ||||
|     }; | ||||
|  | ||||
|     GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type); | ||||
|  | ||||
|     GGML_BACKEND_API void ggml_cpu_init(void); | ||||
|  | ||||
|     // | ||||
|     // CPU backend | ||||
|     // | ||||
|  | ||||
|     GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void); | ||||
|  | ||||
|     GGML_BACKEND_API bool ggml_backend_is_cpu                (ggml_backend_t backend); | ||||
|     GGML_BACKEND_API void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads); | ||||
|     GGML_BACKEND_API void ggml_backend_cpu_set_threadpool    (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool); | ||||
|     GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data); | ||||
|  | ||||
|     GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										47
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-cuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_HIP | ||||
| #define GGML_CUDA_NAME "ROCm" | ||||
| #define GGML_CUBLAS_NAME "hipBLAS" | ||||
| #elif defined(GGML_USE_MUSA) | ||||
| #define GGML_CUDA_NAME "MUSA" | ||||
| #define GGML_CUBLAS_NAME "muBLAS" | ||||
| #else | ||||
| #define GGML_CUDA_NAME "CUDA" | ||||
| #define GGML_CUBLAS_NAME "cuBLAS" | ||||
| #endif | ||||
| #define GGML_CUDA_MAX_DEVICES       16 | ||||
|  | ||||
| // backend API | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend); | ||||
|  | ||||
| // device buffer | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device); | ||||
|  | ||||
| // split tensor buffer that splits matrices by rows across multiple devices | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split); | ||||
|  | ||||
| // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void); | ||||
|  | ||||
| GGML_BACKEND_API int  ggml_backend_cuda_get_device_count(void); | ||||
| GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size); | ||||
| GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size); | ||||
| GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										50
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-kompute.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-kompute.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #include <stdbool.h> | ||||
| #include <stddef.h> | ||||
| #include <stdint.h> | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define GGML_KOMPUTE_MAX_DEVICES 16 | ||||
|  | ||||
| struct ggml_vk_device { | ||||
|     int index; | ||||
|     int type; // same as VkPhysicalDeviceType | ||||
|     size_t heapSize; | ||||
|     const char * name; | ||||
|     const char * vendor; | ||||
|     int subgroupSize; | ||||
|     uint64_t bufferAlignment; | ||||
|     uint64_t maxAlloc; | ||||
| }; | ||||
|  | ||||
| struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count); | ||||
| bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name); | ||||
| bool ggml_vk_has_vulkan(void); | ||||
| bool ggml_vk_has_device(void); | ||||
| struct ggml_vk_device ggml_vk_current_device(void); | ||||
|  | ||||
| // | ||||
| // backend API | ||||
| // | ||||
|  | ||||
| // forward declaration | ||||
| typedef struct ggml_backend * ggml_backend_t; | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										66
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-metal.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-metal.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| // Note: this description is outdated | ||||
| // | ||||
| // An interface allowing to compute ggml_cgraph with Metal | ||||
| // | ||||
| // This is a fully functional interface that extends ggml with GPU support for Apple devices. | ||||
| // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.) | ||||
| // | ||||
| // How it works? | ||||
| // | ||||
| // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this | ||||
| // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you | ||||
| // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.) | ||||
| // | ||||
| // You only need to make sure that all memory buffers that you used during the graph creation | ||||
| // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is | ||||
| // used during the graph evaluation to determine the arguments of the compute kernels. | ||||
| // | ||||
| // Synchronization between device and host memory (for example for input and output tensors) | ||||
| // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions. | ||||
| // | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #include <stddef.h> | ||||
| #include <stdbool.h> | ||||
|  | ||||
| struct ggml_tensor; | ||||
| struct ggml_cgraph; | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // | ||||
| // backend API | ||||
| // user-code should use only these functions | ||||
| // | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend); | ||||
|  | ||||
| GGML_DEPRECATED( | ||||
|         GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size), | ||||
|         "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713"); | ||||
|  | ||||
| GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void); | ||||
|  | ||||
| // helper to check if the device supports a specific family | ||||
| // ideally, the user code should be doing these checks | ||||
| // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf | ||||
| GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family); | ||||
|  | ||||
| // capture all command buffers committed the next time `ggml_backend_graph_compute` is called | ||||
| GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opencl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opencl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| #ifndef GGML_OPENCL_H | ||||
| #define GGML_OPENCL_H | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // | ||||
| // backend API | ||||
| // | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void); | ||||
| GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void); | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif // GGML_OPENCL_H | ||||
							
								
								
									
										216
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opt.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										216
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-opt.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,216 @@ | ||||
| // This file contains functionality for training models using GGML. | ||||
| // It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets. | ||||
| // At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code. | ||||
| // | ||||
| // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #include <stdint.h> | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|     struct ggml_opt_dataset; | ||||
|     struct ggml_opt_context; | ||||
|     struct ggml_opt_result; | ||||
|  | ||||
|     typedef struct ggml_opt_dataset * ggml_opt_dataset_t; | ||||
|     typedef struct ggml_opt_context * ggml_opt_context_t; | ||||
|     typedef struct ggml_opt_result  * ggml_opt_result_t; | ||||
|  | ||||
|     // ====== Loss ====== | ||||
|  | ||||
|     // built-in loss types, i.e. the built-in quantities minimized by the optimizer | ||||
|     // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value | ||||
|     enum ggml_opt_loss_type { | ||||
|         GGML_OPT_LOSS_TYPE_MEAN, | ||||
|         GGML_OPT_LOSS_TYPE_SUM, | ||||
|         GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, | ||||
|         GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR, | ||||
|     }; | ||||
|  | ||||
|     // ====== Dataset ====== | ||||
|  | ||||
|     GGML_API ggml_opt_dataset_t ggml_opt_dataset_init( | ||||
|             int64_t ne_datapoint, // number of elements per datapoint | ||||
|             int64_t ne_label,     // number of elements per label | ||||
|             int64_t ndata,        // total number of datapoints/labels | ||||
|             int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied) | ||||
|     GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset); | ||||
|  | ||||
|     // get underlying tensors that store the data | ||||
|     GGML_API struct ggml_tensor * ggml_opt_dataset_data  (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata] | ||||
|     GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label,     ndata] | ||||
|  | ||||
|     // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative | ||||
|     GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata); | ||||
|  | ||||
|     // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch | ||||
|     GGML_API void ggml_opt_dataset_get_batch( | ||||
|             ggml_opt_dataset_t   dataset, | ||||
|             struct ggml_tensor * data_batch,   // shape = [ne_datapoint, ndata_batch] | ||||
|             struct ggml_tensor * labels_batch, // shape = [ne_label,     ndata_batch] | ||||
|             int64_t              ibatch); | ||||
|  | ||||
|     // ====== Model / Context ====== | ||||
|  | ||||
|     enum ggml_opt_build_type { | ||||
|         GGML_OPT_BUILD_TYPE_FORWARD, | ||||
|         GGML_OPT_BUILD_TYPE_GRAD, | ||||
|         GGML_OPT_BUILD_TYPE_OPT, | ||||
|     }; | ||||
|  | ||||
|     // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss | ||||
|     struct ggml_opt_optimizer_params { | ||||
|         // AdamW optimizer parameters | ||||
|         struct { | ||||
|             float alpha; // learning rate | ||||
|             float beta1; | ||||
|             float beta2; | ||||
|             float eps;   // epsilon for numerical stability | ||||
|             float wd;    // weight decay for AdamW, use 0.0f to disable | ||||
|         } adamw; | ||||
|     }; | ||||
|  | ||||
|     // callback to calculate optimizer parameters prior to a backward pass | ||||
|     // userdata can be used to pass arbitrary data | ||||
|     typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata); | ||||
|  | ||||
|     // returns the default optimizer params (constant) | ||||
|     // userdata is not used | ||||
|     GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata); | ||||
|  | ||||
|     // parameters for initializing a new optimization context | ||||
|     struct ggml_opt_params { | ||||
|         ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs | ||||
|  | ||||
|         struct ggml_context * ctx_compute; // created in user code, holds non-static tensors | ||||
|  | ||||
|         // the forward graph is defined by inputs and outputs | ||||
|         // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts | ||||
|         struct ggml_tensor * inputs; | ||||
|         struct ggml_tensor * outputs; | ||||
|  | ||||
|         enum ggml_opt_loss_type  loss_type; | ||||
|         enum ggml_opt_build_type build_type; | ||||
|  | ||||
|         int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done | ||||
|  | ||||
|         ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters | ||||
|         void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters | ||||
|     }; | ||||
|  | ||||
|     // get parameters for an optimization context with defaults set where possible | ||||
|     // parameters for which no sensible defaults exist are supplied as arguments to this function | ||||
|     GGML_API ggml_opt_params ggml_opt_default_params( | ||||
|             ggml_backend_sched_t      backend_sched, | ||||
|             struct ggml_context     * ctx_compute, | ||||
|             struct ggml_tensor      * inputs, | ||||
|             struct ggml_tensor      * outputs, | ||||
|             enum ggml_opt_loss_type   loss_type); | ||||
|  | ||||
|     GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params); | ||||
|     GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx); | ||||
|  | ||||
|     // set gradients to zero, initilize loss, and optionally reset the optimizer | ||||
|     GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer); | ||||
|  | ||||
|     // get underlying tensors that store data | ||||
|     GGML_API struct ggml_tensor * ggml_opt_inputs(  ggml_opt_context_t opt_ctx); // forward graph input tensor | ||||
|     GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor | ||||
|     GGML_API struct ggml_tensor * ggml_opt_labels(  ggml_opt_context_t opt_ctx); // labels to compare outputs against | ||||
|     GGML_API struct ggml_tensor * ggml_opt_loss(    ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss | ||||
|     GGML_API struct ggml_tensor * ggml_opt_pred(    ggml_opt_context_t opt_ctx); // predictions made by outputs | ||||
|     GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels | ||||
|  | ||||
|     GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node); | ||||
|  | ||||
|     // ====== Optimization Result ====== | ||||
|  | ||||
|     GGML_API ggml_opt_result_t ggml_opt_result_init(); | ||||
|     GGML_API void ggml_opt_result_free(ggml_opt_result_t result); | ||||
|     GGML_API void ggml_opt_result_reset(ggml_opt_result_t result); | ||||
|  | ||||
|     // get data from result, uncertainties are optional and can be ignored by passing NULL | ||||
|     GGML_API void ggml_opt_result_ndata(   ggml_opt_result_t result, int64_t * ndata);                  // writes 1 value, number of datapoints | ||||
|     GGML_API void ggml_opt_result_loss(    ggml_opt_result_t result, double  * loss,     double * unc); // writes 1 value | ||||
|     GGML_API void ggml_opt_result_pred(    ggml_opt_result_t result, int32_t * pred);                   // writes ndata values | ||||
|     GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double  * accuracy, double * unc); // writes 1 value | ||||
|  | ||||
|     // ====== Computation ====== | ||||
|  | ||||
|     // do forward pass, increment result if not NULL | ||||
|     GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result); | ||||
|  | ||||
|     // do forward pass, increment result if not NULL, do backward pass | ||||
|     GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result); | ||||
|  | ||||
|     // ############################################################################ | ||||
|     // ## The high-level functions start here. They do not depend on any private ## | ||||
|     // ## functions or structs and can be copied to and adapted for user code.   ## | ||||
|     // ############################################################################ | ||||
|  | ||||
|     // ====== Intended Usage ====== | ||||
|     // | ||||
|     // 1. Select the appropriate loss for your problem. | ||||
|     // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them. | ||||
|     //    Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster). | ||||
|     // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors. | ||||
|     //    The first context should contain the model parameters and inputs and be allocated statically in user code. | ||||
|     //    The second context should contain all other tensors and will be (re)allocated automatically. | ||||
|     //    Due to this automated allocation the data of the second context is not defined when accessed in user code. | ||||
|     //    Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors. | ||||
|     // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead. | ||||
|  | ||||
|     // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation | ||||
|     typedef void (*ggml_opt_epoch_callback)( | ||||
|             bool               train,       // true after training evaluation, false after validation evaluation | ||||
|             ggml_opt_context_t opt_ctx, | ||||
|             ggml_opt_dataset_t dataset, | ||||
|             ggml_opt_result_t  result,      // result associated with the dataset subsection | ||||
|             int64_t            ibatch,      // number of batches that have been evaluated so far | ||||
|             int64_t            ibatch_max,  // total number of batches in this dataset subsection | ||||
|             int64_t            t_start_us); // time at which the evaluation on the dataset subsection was started | ||||
|  | ||||
|     // do training on front of dataset, do evaluation only on back of dataset | ||||
|     GGML_API void ggml_opt_epoch( | ||||
|             ggml_opt_context_t      opt_ctx, | ||||
|             ggml_opt_dataset_t      dataset, | ||||
|             ggml_opt_result_t       result_train,   // result to increment during training, ignored if NULL | ||||
|             ggml_opt_result_t       result_eval,    // result to increment during evaluation, ignored if NULL | ||||
|             int64_t                 idata_split,    // data index at which to split training and evaluation | ||||
|             ggml_opt_epoch_callback callback_train, | ||||
|             ggml_opt_epoch_callback callback_eval); | ||||
|  | ||||
|     // callback that prints a progress bar on stderr | ||||
|     GGML_API void ggml_opt_epoch_callback_progress_bar( | ||||
|             bool               train, | ||||
|             ggml_opt_context_t opt_ctx, | ||||
|             ggml_opt_dataset_t dataset, | ||||
|             ggml_opt_result_t  result, | ||||
|             int64_t            ibatch, | ||||
|             int64_t            ibatch_max, | ||||
|             int64_t            t_start_us); | ||||
|  | ||||
|     // fit model defined by inputs and outputs to dataset | ||||
|     GGML_API void ggml_opt_fit( | ||||
|             ggml_backend_sched_t            backend_sched,  // backend scheduler for constructing the compute graphs | ||||
|             ggml_context                  * ctx_compute,    // context with temporarily allocated tensors to calculate the outputs | ||||
|             ggml_tensor                   * inputs,         // input tensor with shape [ne_datapoint, ndata_batch] | ||||
|             ggml_tensor                   * outputs,        // output tensor, must have shape [ne_label, ndata_batch] if labels are used | ||||
|             ggml_opt_dataset_t              dataset,        // dataset with data and optionally also labels | ||||
|             enum ggml_opt_loss_type         loss_type,      // loss to minimize | ||||
|             ggml_opt_get_optimizer_params   get_opt_pars,   // callback to get optimizer params, userdata is pointer to epoch (of type int64_t) | ||||
|             int64_t                         nepoch,         // how many times the dataset should be iterated over | ||||
|             int64_t                         nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs | ||||
|             float                           val_split,      // fraction of the dataset to use for validation, must be in [0.0f, 1.0f) | ||||
|             bool                            silent);        // whether or not info prints to stderr should be suppressed | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										28
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-rpc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-rpc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define GGML_RPC_MAX_SERVERS       16 | ||||
|  | ||||
| // backend API | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint); | ||||
| GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint); | ||||
|  | ||||
| GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total); | ||||
|  | ||||
| GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										49
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-sycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-sycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| // | ||||
| //  MIT license | ||||
| //  Copyright (C) 2024 Intel Corporation | ||||
| //  SPDX-License-Identifier: MIT | ||||
| // | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #define GGML_SYCL_NAME "SYCL" | ||||
| #define GGML_SYCL_MAX_DEVICES 48 | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // backend API | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend); | ||||
|  | ||||
| // devide buffer | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device); | ||||
|  | ||||
| // split tensor buffer that splits matrices by rows across multiple devices | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split); | ||||
|  | ||||
| // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void); | ||||
|  | ||||
| GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void); | ||||
| GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len); | ||||
| GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device, | ||||
|                                                        char *description, | ||||
|                                                        size_t description_size); | ||||
| GGML_BACKEND_API int  ggml_backend_sycl_get_device_count(); | ||||
| GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total); | ||||
|  | ||||
| // SYCL doesn't support registering host memory, keep here for reference | ||||
| // GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size); | ||||
| // GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										31
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-vulkan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml-vulkan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define GGML_VK_NAME "Vulkan" | ||||
| #define GGML_VK_MAX_DEVICES 16 | ||||
|  | ||||
| GGML_BACKEND_API void ggml_vk_instance_init(void); | ||||
|  | ||||
| // backend API | ||||
| GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num); | ||||
|  | ||||
| GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend); | ||||
| GGML_BACKEND_API int  ggml_backend_vk_get_device_count(void); | ||||
| GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size); | ||||
| GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num); | ||||
| // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU | ||||
| GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void); | ||||
|  | ||||
| GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										2193
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2193
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/ggml.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										202
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/gguf.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/include/gguf.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | ||||
| // This file contains functionality related to "GGUF" files, the binary file format used by ggml. | ||||
| // GGUF files have the following structure: | ||||
| // | ||||
| // 1. File magic "GGUF" (4 bytes). | ||||
| // 2. File version (uint32_t). | ||||
| // 3. Number of ggml tensors in file (int64_t). | ||||
| // 4. Number of key-value-pairs in file (int64_t). | ||||
| // 5. For each KV pair: | ||||
| //   1. The key (string). | ||||
| //   2. The value type (gguf_type). | ||||
| //   3a. If the value type is GGUF_TYPE_ARRAY: | ||||
| //     1. The type of the array (gguf_type). | ||||
| //     2. The number of elements in the array (uint64_t). | ||||
| //     3. The binary representation of each element in the array. | ||||
| //   3b. Otherwise: | ||||
| //     1. The binary representation of the value. | ||||
| // 6. For each ggml tensor: | ||||
| //   1. The tensor name (string). | ||||
| //   2. The number of dimensions of the tensor (uint32_t). | ||||
| //   3. For each dimension: | ||||
| //     1. The size of the tensor in the dimension (int64_t). | ||||
| //   4. The tensor data type (ggml_type). | ||||
| //   5. The tensor data offset in the tensor data binary blob (uint64_t). | ||||
| // 7. The tensor data binary blob (optional, aligned). | ||||
| // | ||||
| // Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator. | ||||
| // All enums are stored as int32_t. | ||||
| // All bool values are stored as int8_t. | ||||
| // If the special key "general.alignment" (uint32_t) is defined it is used for alignment, | ||||
| //   otherwise GGUF_DEFAULT_ALIGNMENT is used. | ||||
| // | ||||
| // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
|  | ||||
| #include <stdbool.h> | ||||
| #include <stdint.h> | ||||
|  | ||||
| #define GGUF_MAGIC   "GGUF" | ||||
| #define GGUF_VERSION 3 | ||||
|  | ||||
| #define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment" | ||||
|  | ||||
| #define GGUF_DEFAULT_ALIGNMENT 32 | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|     // types that can be stored as GGUF KV data | ||||
|     enum gguf_type { | ||||
|         GGUF_TYPE_UINT8   = 0, | ||||
|         GGUF_TYPE_INT8    = 1, | ||||
|         GGUF_TYPE_UINT16  = 2, | ||||
|         GGUF_TYPE_INT16   = 3, | ||||
|         GGUF_TYPE_UINT32  = 4, | ||||
|         GGUF_TYPE_INT32   = 5, | ||||
|         GGUF_TYPE_FLOAT32 = 6, | ||||
|         GGUF_TYPE_BOOL    = 7, | ||||
|         GGUF_TYPE_STRING  = 8, | ||||
|         GGUF_TYPE_ARRAY   = 9, | ||||
|         GGUF_TYPE_UINT64  = 10, | ||||
|         GGUF_TYPE_INT64   = 11, | ||||
|         GGUF_TYPE_FLOAT64 = 12, | ||||
|         GGUF_TYPE_COUNT,       // marks the end of the enum | ||||
|     }; | ||||
|  | ||||
|     struct gguf_context; | ||||
|  | ||||
|     struct gguf_init_params { | ||||
|         bool no_alloc; | ||||
|  | ||||
|         // if not NULL, create a ggml_context and allocate the tensor data in it | ||||
|         struct ggml_context ** ctx; | ||||
|     }; | ||||
|  | ||||
|     GGML_API struct gguf_context * gguf_init_empty(void); | ||||
|     GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); | ||||
|     //GGML_API struct gguf_context * gguf_init_from_buffer(..); | ||||
|  | ||||
|     GGML_API void gguf_free(struct gguf_context * ctx); | ||||
|  | ||||
|     GGML_API const char * gguf_type_name(enum gguf_type type); | ||||
|  | ||||
|     GGML_API uint32_t gguf_get_version    (const struct gguf_context * ctx); | ||||
|     GGML_API size_t   gguf_get_alignment  (const struct gguf_context * ctx); | ||||
|     GGML_API size_t   gguf_get_data_offset(const struct gguf_context * ctx); | ||||
|  | ||||
|     GGML_API int64_t      gguf_get_n_kv(const struct gguf_context * ctx); | ||||
|     GGML_API int64_t      gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found | ||||
|     GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id); | ||||
|  | ||||
|     GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id); | ||||
|  | ||||
|     // will abort if the wrong type is used for the key | ||||
|     GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id); | ||||
|     GGML_API size_t       gguf_get_arr_n   (const struct gguf_context * ctx, int64_t key_id); | ||||
|  | ||||
|     // get raw pointer to the first element of the array with the given key_id | ||||
|     // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference) | ||||
|     GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id); | ||||
|  | ||||
|     // get ith C string from array with given key_id | ||||
|     GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i); | ||||
|  | ||||
|     GGML_API int64_t        gguf_get_n_tensors    (const struct gguf_context * ctx); | ||||
|     GGML_API int64_t        gguf_find_tensor      (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found | ||||
|     GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id); | ||||
|     GGML_API const char *   gguf_get_tensor_name  (const struct gguf_context * ctx, int64_t tensor_id); | ||||
|     GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int64_t tensor_id); | ||||
|     GGML_API size_t         gguf_get_tensor_size  (const struct gguf_context * ctx, int64_t tensor_id); | ||||
|  | ||||
|     // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist) | ||||
|     GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key); | ||||
|  | ||||
|     // overrides an existing KV pair or adds a new one, the new KV pair is always at the back | ||||
|     GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t      val); | ||||
|     GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t       val); | ||||
|     GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t     val); | ||||
|     GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t      val); | ||||
|     GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t     val); | ||||
|     GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t      val); | ||||
|     GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float        val); | ||||
|     GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t     val); | ||||
|     GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t      val); | ||||
|     GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double       val); | ||||
|     GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool         val); | ||||
|     GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); | ||||
|  | ||||
|     // creates a new array with n elements of the given type and copies the corresponding number of bytes from data | ||||
|     GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n); | ||||
|  | ||||
|     // creates a new array with n strings and copies the corresponding strings from data | ||||
|     GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n); | ||||
|  | ||||
|     // set or add KV pairs from another context | ||||
|     GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src); | ||||
|  | ||||
|     // add tensor to GGUF context, tensor name must be unique | ||||
|     GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); | ||||
|  | ||||
|     // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated | ||||
|     //   in such a way that the tensor data remains as one contiguous block (except for padding) | ||||
|     GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); | ||||
|  | ||||
|     // assumes that at least gguf_get_tensor_size bytes can be read from data | ||||
|     GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data); | ||||
|  | ||||
|     // writing gguf files can be done in 3 ways: | ||||
|     // | ||||
|     // - write the entire gguf_context to a binary file in a single pass: | ||||
|     // | ||||
|     //   gguf_write_to_file(ctx, fname, /*only_meta =*/ false); | ||||
|     // | ||||
|     // - write only the meta data to a file, then re-open the file and append the tensor data: | ||||
|     // | ||||
|     //   gguf_write_to_file(ctx, fname, /*only_meta =*/ true); | ||||
|     //   FILE * f = fopen(fname, "ab"); | ||||
|     //   fwrite(f, ...); // write tensor data | ||||
|     //   fclose(f); | ||||
|     // | ||||
|     // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: | ||||
|     // | ||||
|     //   FILE * f = fopen(fname, "wb"); | ||||
|     //   const size_t size_meta = gguf_get_meta_size(ctx); | ||||
|     //   fseek(f, size_meta, SEEK_SET); | ||||
|     //   fwrite(f, ...); // write tensor data | ||||
|     //   void * data = malloc(size_meta); | ||||
|     //   gguf_get_meta_data(ctx, data); | ||||
|     //   rewind(f); | ||||
|     //   fwrite(data, 1, data, f); | ||||
|     //   free(data); | ||||
|     //   fclose(f); | ||||
|     // | ||||
|  | ||||
|     // write the entire context to a binary file | ||||
|     GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); | ||||
|  | ||||
|     // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding | ||||
|     GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); | ||||
|  | ||||
|     // writes the meta data to pointer "data" | ||||
|     GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										357
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										357
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,357 @@ | ||||
| include(CheckCXXCompilerFlag) | ||||
|  | ||||
| add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) | ||||
|  | ||||
| # enable libstdc++ assertions for debug builds | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
|     add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>) | ||||
| endif() | ||||
|  | ||||
| if (NOT MSVC) | ||||
|     if (GGML_SANITIZE_THREAD) | ||||
|         add_compile_options(-fsanitize=thread) | ||||
|         link_libraries     (-fsanitize=thread) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_SANITIZE_ADDRESS) | ||||
|         add_compile_options(-fsanitize=address -fno-omit-frame-pointer) | ||||
|         link_libraries     (-fsanitize=address) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_SANITIZE_UNDEFINED) | ||||
|         add_compile_options(-fsanitize=undefined) | ||||
|         link_libraries     (-fsanitize=undefined) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| function(ggml_get_flags CCID CCVER) | ||||
|     set(C_FLAGS "") | ||||
|     set(CXX_FLAGS "") | ||||
|  | ||||
|     if (CCID MATCHES "Clang") | ||||
|         set(C_FLAGS   -Wunreachable-code-break -Wunreachable-code-return) | ||||
|         set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) | ||||
|  | ||||
|         if ( | ||||
|             (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR | ||||
|             (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) | ||||
|         ) | ||||
|             list(APPEND C_FLAGS -Wdouble-promotion) | ||||
|         endif() | ||||
|     elseif (CCID STREQUAL "GNU") | ||||
|         set(C_FLAGS   -Wdouble-promotion) | ||||
|         set(CXX_FLAGS -Wno-array-bounds) | ||||
|  | ||||
|         if (CCVER VERSION_GREATER_EQUAL 8.1.0) | ||||
|             list(APPEND CXX_FLAGS -Wextra-semi) | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     set(GF_C_FLAGS   ${C_FLAGS}   PARENT_SCOPE) | ||||
|     set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) | ||||
| endfunction() | ||||
|  | ||||
| if (GGML_FATAL_WARNINGS) | ||||
|     if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") | ||||
|         list(APPEND C_FLAGS   -Werror) | ||||
|         list(APPEND CXX_FLAGS -Werror) | ||||
|     elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") | ||||
|         add_compile_options(/WX) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if (GGML_ALL_WARNINGS) | ||||
|     if (NOT MSVC) | ||||
|         list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) | ||||
|         list(APPEND C_FLAGS       -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes | ||||
|                                   -Werror=implicit-int -Werror=implicit-function-declaration) | ||||
|         list(APPEND CXX_FLAGS     -Wmissing-declarations -Wmissing-noreturn) | ||||
|  | ||||
|         list(APPEND C_FLAGS   ${WARNING_FLAGS}) | ||||
|         list(APPEND CXX_FLAGS ${WARNING_FLAGS}) | ||||
|  | ||||
|         ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) | ||||
|  | ||||
|         add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>" | ||||
|                             "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>") | ||||
|     else() | ||||
|         # todo : msvc | ||||
|         set(C_FLAGS   "") | ||||
|         set(CXX_FLAGS "") | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if (GGML_LTO) | ||||
|     include(CheckIPOSupported) | ||||
|     check_ipo_supported(RESULT result OUTPUT output) | ||||
|     if (result) | ||||
|         set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) | ||||
|     else() | ||||
|         message(WARNING "IPO is not supported: ${output}") | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if (GGML_CCACHE) | ||||
|     find_program(GGML_CCACHE_FOUND ccache) | ||||
|     find_program(GGML_SCCACHE_FOUND sccache) | ||||
|  | ||||
|     if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND) | ||||
|         if(GGML_CCACHE_FOUND) | ||||
|             set(GGML_CCACHE_VARIANT ccache) | ||||
|         else() | ||||
|             set(GGML_CCACHE_VARIANT sccache) | ||||
|         endif() | ||||
|         # TODO: should not be set globally | ||||
|         set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}") | ||||
|         set(ENV{CCACHE_SLOPPINESS} time_macros) | ||||
|         message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") | ||||
|     else() | ||||
|         message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF") | ||||
|     endif () | ||||
| endif() | ||||
|  | ||||
| # this version of Apple ld64 is buggy | ||||
| execute_process( | ||||
|     COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v | ||||
|     ERROR_VARIABLE output | ||||
|     OUTPUT_QUIET | ||||
| ) | ||||
|  | ||||
| if (output MATCHES "dyld-1015\.7") | ||||
|     add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) | ||||
| endif() | ||||
|  | ||||
| # architecture specific | ||||
| # TODO: probably these flags need to be tweaked on some architectures | ||||
| #       feel free to update the Makefile for your architecture and send a pull request or issue | ||||
| message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") | ||||
| if (MSVC) | ||||
|     string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) | ||||
|     message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") | ||||
| else () | ||||
|     set(CMAKE_GENERATOR_PLATFORM_LWR "") | ||||
| endif () | ||||
|  | ||||
| if (NOT MSVC) | ||||
|     if (GGML_STATIC) | ||||
|         add_link_options(-static) | ||||
|         if (MINGW) | ||||
|             add_link_options(-static-libgcc -static-libstdc++) | ||||
|         endif() | ||||
|     endif() | ||||
|     if (GGML_GPROF) | ||||
|         add_compile_options(-pg) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if (MINGW) | ||||
|     # Target Windows 8 for PrefetchVirtualMemory | ||||
|     add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER}) | ||||
| endif() | ||||
|  | ||||
| # | ||||
| # POSIX conformance | ||||
| # | ||||
|  | ||||
| # clock_gettime came in POSIX.1b (1993) | ||||
| # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional | ||||
| # posix_memalign came in POSIX.1-2001 / SUSv3 | ||||
| # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) | ||||
|  | ||||
| # Somehow in OpenBSD whenever POSIX conformance is specified | ||||
| # some string functions rely on locale_t availability, | ||||
| # which was introduced in POSIX.1-2008, forcing us to go higher | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") | ||||
|     add_compile_definitions(_XOPEN_SOURCE=700) | ||||
| else() | ||||
|     add_compile_definitions(_XOPEN_SOURCE=600) | ||||
| endif() | ||||
|  | ||||
| # Data types, macros and functions related to controlling CPU affinity and | ||||
| # some memory allocation are available on Linux through GNU extensions in libc | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android") | ||||
|     add_compile_definitions(_GNU_SOURCE) | ||||
| endif() | ||||
|  | ||||
| # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, | ||||
| # and on macOS its availability depends on enabling Darwin extensions | ||||
| # similarly on DragonFly, enabling BSD extensions is necessary | ||||
| if ( | ||||
|     CMAKE_SYSTEM_NAME MATCHES "Darwin" OR | ||||
|     CMAKE_SYSTEM_NAME MATCHES "iOS"    OR | ||||
|     CMAKE_SYSTEM_NAME MATCHES "tvOS"   OR | ||||
|     CMAKE_SYSTEM_NAME MATCHES "DragonFly" | ||||
| ) | ||||
|     add_compile_definitions(_DARWIN_C_SOURCE) | ||||
| endif() | ||||
|  | ||||
| # alloca is a non-standard interface that is not visible on BSDs when | ||||
| # POSIX conformance is specified, but not all of them provide a clean way | ||||
| # to enable it in such cases | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") | ||||
|     add_compile_definitions(__BSD_VISIBLE) | ||||
| endif() | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") | ||||
|     add_compile_definitions(_NETBSD_SOURCE) | ||||
| endif() | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") | ||||
|     add_compile_definitions(_BSD_SOURCE) | ||||
| endif() | ||||
|  | ||||
| if (WIN32) | ||||
|     add_compile_definitions(_CRT_SECURE_NO_WARNINGS) | ||||
| endif() | ||||
|  | ||||
| # ggml | ||||
|  | ||||
| if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS) | ||||
|     message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS") | ||||
| endif() | ||||
|  | ||||
| add_library(ggml-base | ||||
|             ../include/ggml.h | ||||
|             ../include/ggml-alloc.h | ||||
|             ../include/ggml-backend.h | ||||
|             ../include/ggml-cpp.h | ||||
|             ../include/ggml-opt.h | ||||
|             ../include/gguf.h | ||||
|             ggml.c | ||||
|             ggml-alloc.c | ||||
|             ggml-backend.cpp | ||||
|             ggml-opt.cpp | ||||
|             ggml-threading.cpp | ||||
|             ggml-threading.h | ||||
|             ggml-quants.c | ||||
|             ggml-quants.h | ||||
|             gguf.cpp) | ||||
|  | ||||
| target_include_directories(ggml-base PRIVATE .) | ||||
|  | ||||
| add_library(ggml | ||||
|             ggml-backend-reg.cpp) | ||||
|  | ||||
| target_link_libraries(ggml PUBLIC ggml-base) | ||||
|  | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
|     target_link_libraries(ggml PRIVATE dl) | ||||
| endif() | ||||
|  | ||||
| function(ggml_add_backend_library backend) | ||||
|     if (GGML_BACKEND_DL) | ||||
|         add_library(${backend} MODULE ${ARGN}) | ||||
|         # write the shared library to the output directory | ||||
|         set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) | ||||
|         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL) | ||||
|         add_dependencies(ggml ${backend}) | ||||
|     else() | ||||
|         add_library(${backend} ${ARGN}) | ||||
|         target_link_libraries(ggml PUBLIC ${backend}) | ||||
|         install(TARGETS ${backend} LIBRARY) | ||||
|     endif() | ||||
|  | ||||
|     target_link_libraries(${backend} PRIVATE ggml-base) | ||||
|     target_include_directories(${backend} PRIVATE ..) | ||||
|  | ||||
|     if (${BUILD_SHARED_LIBS}) | ||||
|         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD) | ||||
|         target_compile_definitions(${backend} PUBLIC  GGML_BACKEND_SHARED) | ||||
|     endif() | ||||
|  | ||||
|     if(NOT GGML_AVAILABLE_BACKENDS) | ||||
|         set(GGML_AVAILABLE_BACKENDS "${backend}" | ||||
|             CACHE INTERNAL "List of backends for cmake package") | ||||
|     else() | ||||
|         list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend) | ||||
|         if(has_backend EQUAL -1) | ||||
|             set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}" | ||||
|                 CACHE INTERNAL "List of backends for cmake package") | ||||
|         endif() | ||||
|     endif() | ||||
| endfunction() | ||||
|  | ||||
| function(ggml_add_backend backend) | ||||
|     string(TOUPPER "GGML_${backend}" backend_id) | ||||
|     if (${backend_id}) | ||||
|         string(TOLOWER "ggml-${backend}" backend_target) | ||||
|         add_subdirectory(${backend_target}) | ||||
|         message(STATUS "Including ${backend} backend") | ||||
|         if (NOT GGML_BACKEND_DL) | ||||
|             string(TOUPPER "GGML_USE_${backend}" backend_use) | ||||
|             target_compile_definitions(ggml PUBLIC ${backend_use}) | ||||
|         endif() | ||||
|     endif() | ||||
| endfunction() | ||||
|  | ||||
| function(ggml_add_cpu_backend_variant tag_name) | ||||
|     set(GGML_CPU_TAG_NAME ${tag_name}) | ||||
|     # other: OPENMP LLAMAFILE CPU_HBM | ||||
|     foreach (feat NATIVE | ||||
|                   AVX AVX2 AVX_VNNI FMA F16C | ||||
|                   AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 | ||||
|                   AMX_TILE AMX_INT8 AMX_BF16) | ||||
|         set(GGML_${feat} OFF) | ||||
|     endforeach() | ||||
|  | ||||
|     foreach (feat ${ARGN}) | ||||
|         set(GGML_${feat} ON) | ||||
|     endforeach() | ||||
|  | ||||
|     ggml_add_cpu_backend_variant_impl(${tag_name}) | ||||
| endfunction() | ||||
|  | ||||
| ggml_add_backend(CPU) | ||||
|  | ||||
| if (GGML_CPU_ALL_VARIANTS) | ||||
|     if (NOT GGML_BACKEND_DL) | ||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") | ||||
|     endif() | ||||
|     ggml_add_cpu_backend_variant(sandybridge    AVX) | ||||
|     ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 FMA) | ||||
|     ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512) | ||||
|     ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI) | ||||
|     ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI) | ||||
|     if (NOT MSVC) | ||||
|         # MSVC doesn't support AMX | ||||
|         ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) | ||||
|     endif() | ||||
| elseif (GGML_CPU) | ||||
|     ggml_add_cpu_backend_variant_impl("") | ||||
| endif() | ||||
|  | ||||
| ggml_add_backend(BLAS) | ||||
| ggml_add_backend(CANN) | ||||
| ggml_add_backend(CUDA) | ||||
| ggml_add_backend(HIP) | ||||
| ggml_add_backend(Kompute) | ||||
| ggml_add_backend(METAL) | ||||
| ggml_add_backend(MUSA) | ||||
| ggml_add_backend(RPC) | ||||
| ggml_add_backend(SYCL) | ||||
| ggml_add_backend(Vulkan) | ||||
| ggml_add_backend(OpenCL) | ||||
|  | ||||
| foreach (target ggml-base ggml) | ||||
|     target_include_directories(${target} PUBLIC    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>) | ||||
|     target_compile_features   (${target} PRIVATE c_std_11 cxx_std_17) # don't bump | ||||
| endforeach() | ||||
|  | ||||
| target_link_libraries(ggml-base PRIVATE Threads::Threads) | ||||
|  | ||||
| find_library(MATH_LIBRARY m) | ||||
| if (MATH_LIBRARY) | ||||
|     if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT}) | ||||
|         target_link_libraries(ggml-base PRIVATE m) | ||||
|     endif() | ||||
| endif() | ||||
|  | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Android") | ||||
|     target_link_libraries(ggml-base PRIVATE dl) | ||||
| endif() | ||||
|  | ||||
| if (BUILD_SHARED_LIBS) | ||||
|     foreach (target ggml-base ggml) | ||||
|         set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
|         target_compile_definitions(${target} PRIVATE GGML_BUILD) | ||||
|         target_compile_definitions(${target} PUBLIC  GGML_SHARED) | ||||
|     endforeach() | ||||
| endif() | ||||
							
								
								
									
										1042
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-alloc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1042
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-alloc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										107
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR | ||||
|         (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|          CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$") AND | ||||
|         CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0) | ||||
|     message(STATUS "Using AMX") | ||||
|  | ||||
|     file(GLOB   GGML_HEADERS_AMX "*.h") | ||||
|     list(APPEND GGML_HEADERS_AMX "../../include/ggml-amx.h") | ||||
|  | ||||
|     file(GLOB   GGML_SOURCES_AMX "*.cpp") | ||||
|  | ||||
|     add_library(ggml-amx | ||||
|                 ${GGML_HEADERS_AMX} | ||||
|                 ${GGML_SOURCES_AMX}) | ||||
|  | ||||
|     target_link_libraries(ggml-amx PRIVATE ggml-base) | ||||
|     target_include_directories(ggml-amx PRIVATE . ..) | ||||
|  | ||||
|     # this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags | ||||
|     # TODO: integrate AMX backend into the CPU backend | ||||
|     if (MSVC) | ||||
|         # instruction set detection for MSVC only | ||||
|         if (GGML_NATIVE) | ||||
|             # TODO: improve, should not reference files from the parent folder | ||||
|             include(../ggml-cpu/cmake/FindSIMD.cmake) | ||||
|         endif () | ||||
|         if (GGML_AVX512) | ||||
|             list(APPEND ARCH_FLAGS /arch:AVX512) | ||||
|             # MSVC has no compile-time flags enabling specific | ||||
|             # AVX512 extensions, neither it defines the | ||||
|             # macros corresponding to the extensions. | ||||
|             # Do it manually. | ||||
|             if (GGML_AVX512_VBMI) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>) | ||||
|             endif() | ||||
|             if (GGML_AVX512_VNNI) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>) | ||||
|             endif() | ||||
|             if (GGML_AVX512_BF16) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>) | ||||
|             endif() | ||||
|             if (GGML_AMX_TILE) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>) | ||||
|             endif() | ||||
|             if (GGML_AMX_INT8) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>) | ||||
|             endif() | ||||
|             if (GGML_AMX_BF16) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>) | ||||
|                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>) | ||||
|             endif() | ||||
|         elseif (GGML_AVX2) | ||||
|             list(APPEND ARCH_FLAGS /arch:AVX2) | ||||
|         elseif (GGML_AVX) | ||||
|             list(APPEND ARCH_FLAGS /arch:AVX) | ||||
|         endif() | ||||
|     else() | ||||
|         if (GGML_NATIVE) | ||||
|             list(APPEND ARCH_FLAGS -march=native) | ||||
|         endif() | ||||
|         if (GGML_F16C) | ||||
|             list(APPEND ARCH_FLAGS -mf16c) | ||||
|         endif() | ||||
|         if (GGML_FMA) | ||||
|             list(APPEND ARCH_FLAGS -mfma) | ||||
|         endif() | ||||
|         if (GGML_AVX) | ||||
|             list(APPEND ARCH_FLAGS -mavx) | ||||
|         endif() | ||||
|         if (GGML_AVX2) | ||||
|             list(APPEND ARCH_FLAGS -mavx2) | ||||
|         endif() | ||||
|         if (GGML_AVX512) | ||||
|             list(APPEND ARCH_FLAGS -mavx512f) | ||||
|             list(APPEND ARCH_FLAGS -mavx512dq) | ||||
|             list(APPEND ARCH_FLAGS -mavx512bw) | ||||
|         endif() | ||||
|         if (GGML_AVX512_VBMI) | ||||
|             list(APPEND ARCH_FLAGS -mavx512vbmi) | ||||
|         endif() | ||||
|         if (GGML_AVX512_VNNI) | ||||
|             list(APPEND ARCH_FLAGS -mavx512vnni) | ||||
|         endif() | ||||
|         if (GGML_AVX512_BF16) | ||||
|             list(APPEND ARCH_FLAGS -mavx512bf16) | ||||
|         endif() | ||||
|         if (GGML_AMX_TILE) | ||||
|             list(APPEND ARCH_FLAGS -mamx-tile) | ||||
|         endif() | ||||
|         if (GGML_AMX_INT8) | ||||
|             list(APPEND ARCH_FLAGS -mamx-int8) | ||||
|         endif() | ||||
|         if (GGML_AMX_BF16) | ||||
|             list(APPEND ARCH_FLAGS -mamx-bf16) | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     target_compile_options(ggml-amx PRIVATE ${ARCH_FLAGS}) | ||||
| else() | ||||
|     set(GGML_AMX OFF PARENT_SCOPE) | ||||
|     message(WARNING "AMX requires x86 and gcc version > 11.0. Turning off GGML_AMX.") | ||||
| endif() | ||||
							
								
								
									
										94
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| // hack until AMX is moved into the CPU backend | ||||
| #include "../ggml-cpu/ggml-cpu-impl.h" // <immintrin.h> | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <memory> | ||||
| #include <type_traits> | ||||
|  | ||||
| #if defined(_OPENMP) | ||||
| #include <omp.h> | ||||
| #endif | ||||
|  | ||||
| #define TILE_M 16 | ||||
| #define TILE_N 16 | ||||
| #define TILE_K 32 | ||||
| #define VNNI_BLK 4 | ||||
|  | ||||
| #define AMX_BLK_SIZE 32 | ||||
|  | ||||
| #define TMM0 0 | ||||
| #define TMM1 1 | ||||
| #define TMM2 2 | ||||
| #define TMM3 3 | ||||
| #define TMM4 4 | ||||
| #define TMM5 5 | ||||
| #define TMM6 6 | ||||
| #define TMM7 7 | ||||
|  | ||||
| // parallel routines | ||||
| template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0> | ||||
| inline T div_up(T x, T y) { return (x + y - 1) / y; } | ||||
|  | ||||
| template <typename T> | ||||
| inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) { | ||||
| #if 0 | ||||
|     // onednn partition pattern | ||||
|     T& n_my = n_end; | ||||
|     if (nth <= 1 || n == 0) { | ||||
|         n_start = 0; | ||||
|         n_my = n; | ||||
|     } else { | ||||
|         T n1 = div_up(n, nth); | ||||
|         T n2 = n1 - 1; | ||||
|         T T1 = n - n2 * nth; | ||||
|         n_my = ith < T1 ? n1 : n2; | ||||
|         n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2; | ||||
|     } | ||||
|     n_end += n_start; | ||||
| #else | ||||
|     // pytorch aten partition pattern | ||||
|     T n_my = div_up(n, nth); | ||||
|     n_start = ith * n_my; | ||||
|     n_end = std::min(n_start + n_my, n); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| template <typename func_t> | ||||
| inline void parallel_for(int nth, int n, const func_t& f) { | ||||
| #if defined(_OPENMP) | ||||
| #pragma omp parallel num_threads(nth) | ||||
| { | ||||
|     //int nth = omp_get_num_threads(); | ||||
|     int ith = omp_get_thread_num(); | ||||
|     int tbegin, tend; | ||||
|     balance211(n, nth, ith, tbegin, tend); | ||||
|     f(tbegin, tend); | ||||
| } | ||||
| #else | ||||
|     f(0, n); | ||||
|  | ||||
|     GGML_UNUSED(nth); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| // quantized types that have AMX support | ||||
| inline bool qtype_has_amx_kernels(const enum ggml_type type) { | ||||
|     // TODO: fix padding for vnni format | ||||
|     return (type == GGML_TYPE_Q4_0) || | ||||
|         (type == GGML_TYPE_Q4_1); | ||||
|         //(type == GGML_TYPE_Q8_0) || | ||||
|         //(type == GGML_TYPE_Q4_K) || | ||||
|         //(type == GGML_TYPE_Q5_K) || | ||||
|         //(type == GGML_TYPE_Q6_K) || | ||||
|         //(type == GGML_TYPE_IQ4_XS); | ||||
| } | ||||
|  | ||||
| // ggml backend context | ||||
| struct ggml_backend_amx_context { | ||||
|     int n_threads = GGML_DEFAULT_N_THREADS; | ||||
|     std::unique_ptr<char[]> work_data; | ||||
|     size_t work_size = 0; | ||||
| }; | ||||
							
								
								
									
										446
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										446
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,446 @@ | ||||
| #include "ggml-amx.h" | ||||
| #include "ggml-amx/common.h" | ||||
| #include "ggml-amx/mmq.h" | ||||
| #include "ggml-backend-impl.h" | ||||
| #include "ggml-impl.h" | ||||
|  | ||||
| #if defined(__gnu_linux__) | ||||
| #include <sys/syscall.h> | ||||
| #include <unistd.h> | ||||
| #endif | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
|  | ||||
| #if defined(__AMX_INT8__) | ||||
|  | ||||
| // AMX buffer interface | ||||
| static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) { | ||||
|     free(buffer->context); | ||||
| } | ||||
|  | ||||
| static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) { | ||||
|     return (void *)(buffer->context); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { | ||||
|     memset((char *)tensor->data + offset, value, size); | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { | ||||
|     if (qtype_has_amx_kernels(tensor->type)) { | ||||
|         ggml_backend_amx_convert_weight(tensor, data, offset, size); | ||||
|     } else { | ||||
|         memcpy((char *)tensor->data + offset, data, size); | ||||
|     } | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { | ||||
|     GGML_ASSERT(!qtype_has_amx_kernels(tensor->type)); | ||||
|     memcpy(data, (const char *)tensor->data + offset, size); | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { | ||||
|     if (ggml_backend_buffer_is_host(src->buffer)) { | ||||
|         if (qtype_has_amx_kernels(src->type)) { | ||||
|             ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_backend_amx_get_alloc_size(dst)); | ||||
|         } else { | ||||
|             memcpy(dst->data, src->data, ggml_nbytes(src)); | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|     return false; | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { | ||||
|     memset(buffer->context, value, buffer->size); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = { | ||||
|     /* .free_buffer     = */ ggml_backend_amx_buffer_free_buffer, | ||||
|     /* .get_base        = */ ggml_backend_amx_buffer_get_base, | ||||
|     /* .init_tensor     = */ NULL, // no initialization required | ||||
|     /* .memset_tensor   = */ ggml_backend_amx_buffer_memset_tensor, | ||||
|     /* .set_tensor      = */ ggml_backend_amx_buffer_set_tensor, | ||||
|     /* .get_tensor      = */ ggml_backend_amx_buffer_get_tensor, | ||||
|     /* .cpy_tensor      = */ ggml_backend_amx_buffer_cpy_tensor, | ||||
|     /* .clear           = */ ggml_backend_amx_buffer_clear, | ||||
|     /* .reset           = */ NULL, | ||||
| }; | ||||
|  | ||||
| static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | ||||
|     return "AMX"; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { | ||||
|     void * data = aligned_alloc(TENSOR_ALIGNMENT, size); | ||||
|     if (data == NULL) { | ||||
|         fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size); | ||||
| } | ||||
|  | ||||
| static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { | ||||
|     return TENSOR_ALIGNMENT; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) { | ||||
|     return ggml_backend_amx_get_alloc_size(tensor); | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_amx_buffer_type_is_host(ggml_backend_buffer_type_t buft) { | ||||
|     return false; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() { | ||||
|     static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = { | ||||
|         /* .iface = */ { | ||||
|             /* .get_name         = */ ggml_backend_amx_buffer_type_get_name, | ||||
|             /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer, | ||||
|             /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment, | ||||
|             /* .get_max_size     = */ NULL, // defaults to SIZE_MAX | ||||
|             /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size, | ||||
|             /* .is_host          = */ ggml_backend_amx_buffer_type_is_host, | ||||
|         }, | ||||
|         /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0), | ||||
|         /* .context = */ NULL, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_buffer_type_amx; | ||||
| } | ||||
|  | ||||
| // backend interface | ||||
|  | ||||
| static const char * ggml_backend_amx_name(ggml_backend_t backend) { | ||||
|     return "AMX"; | ||||
|  | ||||
|     GGML_UNUSED(backend); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_free(ggml_backend_t backend) { | ||||
|     ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context; | ||||
|     delete ctx; | ||||
|     delete backend; | ||||
| } | ||||
|  | ||||
| static enum ggml_status ggml_backend_amx_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||||
|     ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context; | ||||
|  | ||||
|     for (int i = 0; i < cgraph->n_nodes; i++) { | ||||
|         struct ggml_tensor * node = cgraph->nodes[i]; | ||||
|  | ||||
|         switch (node->op) { | ||||
|         case GGML_OP_MUL_MAT: | ||||
|             ggml_backend_amx_mul_mat(ctx, node); | ||||
|             break; | ||||
|  | ||||
|         case GGML_OP_NONE: | ||||
|         case GGML_OP_RESHAPE: | ||||
|         case GGML_OP_VIEW: | ||||
|         case GGML_OP_PERMUTE: | ||||
|         case GGML_OP_TRANSPOSE: | ||||
|             break; | ||||
|  | ||||
|         default: | ||||
|             fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node)); | ||||
|             GGML_ASSERT(false); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return GGML_STATUS_SUCCESS; | ||||
|  | ||||
|     GGML_UNUSED(backend); | ||||
| } | ||||
|  | ||||
| static struct ggml_backend_i ggml_backend_amx_i = { | ||||
|     /* .get_name                = */ ggml_backend_amx_name, | ||||
|     /* .free                    = */ ggml_backend_amx_free, | ||||
|     /* .set_tensor_async        = */ NULL, | ||||
|     /* .get_tensor_async        = */ NULL, | ||||
|     /* .cpy_tensor_async        = */ NULL, | ||||
|     /* .synchronize             = */ NULL, | ||||
|     /* .graph_plan_create       = */ NULL, | ||||
|     /* .graph_plan_free         = */ NULL, | ||||
|     /* .graph_plan_update       = */ NULL, | ||||
|     /* .graph_plan_compute      = */ NULL, | ||||
|     /* .graph_compute           = */ ggml_backend_amx_graph_compute, | ||||
|     /* .event_record            = */ NULL, | ||||
|     /* .event_wait              = */ NULL, | ||||
| }; | ||||
|  | ||||
| static ggml_guid_t ggml_backend_amx_guid() { | ||||
|     static ggml_guid guid = { 0x13, 0xb8, 0xa4, 0xc4, 0xba, 0xfe, 0x51, 0x67, 0x87, 0x44, 0x55, 0x15, 0xb2, 0x35, 0x62, 0x3e }; | ||||
|     return &guid; | ||||
| } | ||||
|  | ||||
| #define ARCH_GET_XCOMP_PERM     0x1022 | ||||
| #define ARCH_REQ_XCOMP_PERM     0x1023 | ||||
| #define XFEATURE_XTILECFG       17 | ||||
| #define XFEATURE_XTILEDATA      18 | ||||
|  | ||||
| static bool ggml_amx_init() { | ||||
| #if defined(__gnu_linux__) | ||||
|     if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) { | ||||
|         fprintf(stderr, "AMX is not ready to be used!\n"); | ||||
|         return false; | ||||
|     } | ||||
|     return true; | ||||
| #elif defined(_WIN32) | ||||
|     return true; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| ggml_backend_t ggml_backend_amx_init() { | ||||
|  | ||||
|     // invoke a Linux system call to request access to AMX features | ||||
|     ggml_amx_init(); | ||||
|  | ||||
|     // backend context | ||||
|     ggml_backend_amx_context * ctx = new ggml_backend_amx_context; | ||||
|  | ||||
|     // ggml amx backend | ||||
|     ggml_backend_t backend = new ggml_backend { | ||||
|         /* .guid      = */ ggml_backend_amx_guid(), | ||||
|         /* .interface = */ ggml_backend_amx_i, | ||||
|         /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0), | ||||
|         /* .context   = */ ctx, | ||||
|     }; | ||||
|  | ||||
|     return backend; | ||||
| } | ||||
|  | ||||
| bool ggml_backend_is_amx(ggml_backend_t backend) { | ||||
|     return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_amx_guid()); | ||||
| } | ||||
|  | ||||
| void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) { | ||||
|     GGML_ASSERT(ggml_backend_is_amx(backend_amx)); | ||||
|  | ||||
|     ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend_amx->context; | ||||
|     ctx->n_threads = n_threads; | ||||
| } | ||||
|  | ||||
| // device interface | ||||
|  | ||||
| static const char * ggml_backend_amx_device_get_name(ggml_backend_dev_t dev) { | ||||
|     return "AMX"; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static const char * ggml_backend_amx_device_get_description(ggml_backend_dev_t dev) { | ||||
|     return "Intel Advanced Matrix Extensions"; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { | ||||
|     // TODO | ||||
|     *free = 0; | ||||
|     *total = 0; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static enum ggml_backend_dev_type ggml_backend_amx_device_get_type(ggml_backend_dev_t dev) { | ||||
|     return GGML_BACKEND_DEVICE_TYPE_ACCEL; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { | ||||
|     props->name        = ggml_backend_amx_device_get_name(dev); | ||||
|     props->description = ggml_backend_amx_device_get_description(dev); | ||||
|     props->type        = ggml_backend_amx_device_get_type(dev); | ||||
|     ggml_backend_amx_device_get_memory(dev, &props->memory_free, &props->memory_total); | ||||
|  | ||||
|     // `buffer_from_host_ptr` is intended to be used in mmap, when memory layout unchanged | ||||
|     props->caps = { | ||||
|         /* .async                 = */ false, | ||||
|         /* .host_buffer           = */ false, | ||||
|         /* .buffer_from_host_ptr  = */ false, | ||||
|         /* .events                = */ false, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| static ggml_backend_t ggml_backend_amx_device_init(ggml_backend_dev_t dev, const char * params) { | ||||
|     return ggml_backend_amx_init(); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
|     GGML_UNUSED(params); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_type_t ggml_backend_amx_device_get_buffer_type(ggml_backend_dev_t dev) { | ||||
|     return ggml_backend_amx_buffer_type(); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_amx_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | ||||
|  | ||||
|     // handle only 2d gemm for now | ||||
|     auto is_contiguous_2d = [](const struct ggml_tensor * t) { | ||||
|         return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1; | ||||
|     }; | ||||
|  | ||||
|     switch (op->op) { | ||||
|         case GGML_OP_NONE: | ||||
|         case GGML_OP_RESHAPE: | ||||
|         case GGML_OP_VIEW: | ||||
|         case GGML_OP_PERMUTE: | ||||
|         case GGML_OP_TRANSPOSE: | ||||
|             return true; | ||||
|  | ||||
|         case GGML_OP_MUL_MAT: { | ||||
|             const struct ggml_tensor * src0 = op->src[0]; | ||||
|             const struct ggml_tensor * src1 = op->src[1]; | ||||
|  | ||||
|             const enum ggml_type type = src0->type; | ||||
|             const int64_t ne0 = op->ne[0]; | ||||
|  | ||||
|             // amx kernels enables for Q4_0, Q4_1, Q8_0, F16 | ||||
|             // Q4_K, Q5_K, Q6_K, IQ4_XS enabled for QK_K = 256 | ||||
|             bool has_amx_kernels = qtype_has_amx_kernels(type) || (type == GGML_TYPE_F16); | ||||
|  | ||||
|             bool can_use_amx = | ||||
|                 is_contiguous_2d(src0) &&       // src0 must be contiguous | ||||
|                 is_contiguous_2d(src1) &&       // src1 must be contiguous | ||||
|                 src1->type == GGML_TYPE_F32 &&  // src1 must be float32 | ||||
|                 has_amx_kernels &&              // with amx kernel impls | ||||
|                 ne0 % (TILE_N * 2) == 0;        // out_features is 32x | ||||
|  | ||||
|             return can_use_amx; | ||||
|         } | ||||
|         default: | ||||
|             return false; | ||||
|     } | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_amx_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { | ||||
|     return buft->iface.get_name == ggml_backend_amx_buffer_type_get_name; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static const struct ggml_backend_device_i ggml_backend_amx_device_i = { | ||||
|     /* .get_name             = */ ggml_backend_amx_device_get_name, | ||||
|     /* .get_description      = */ ggml_backend_amx_device_get_description, | ||||
|     /* .get_memory           = */ ggml_backend_amx_device_get_memory, | ||||
|     /* .get_type             = */ ggml_backend_amx_device_get_type, | ||||
|     /* .get_props            = */ ggml_backend_amx_device_get_props, | ||||
|     /* .init_backend         = */ ggml_backend_amx_device_init, | ||||
|     /* .get_buffer_type      = */ ggml_backend_amx_device_get_buffer_type, | ||||
|     /* .get_host_buffer_type = */ NULL, | ||||
|     /* .buffer_from_host_ptr = */ NULL, | ||||
|     /* .supports_op          = */ ggml_backend_amx_device_supports_op, | ||||
|     /* .supports_buft        = */ ggml_backend_amx_device_supports_buft, | ||||
|     /* .offload_op           = */ NULL, | ||||
|     /* .event_new            = */ NULL, | ||||
|     /* .event_free           = */ NULL, | ||||
|     /* .event_synchronize    = */ NULL, | ||||
| }; | ||||
|  | ||||
| // backend reg interface | ||||
|  | ||||
| static const char * ggml_backend_amx_reg_get_name(ggml_backend_reg_t reg) { | ||||
|     return "AMX"; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
| } | ||||
|  | ||||
| static size_t ggml_backend_amx_reg_get_device_count(ggml_backend_reg_t reg) { | ||||
|     return 1; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
| } | ||||
|  | ||||
| static ggml_backend_dev_t ggml_backend_amx_reg_get_device(ggml_backend_reg_t reg, size_t index) { | ||||
|     GGML_ASSERT(index == 0); | ||||
|  | ||||
|     static ggml_backend_device ggml_backend_amx_device = { | ||||
|         /* .iface   = */ ggml_backend_amx_device_i, | ||||
|         /* .reg     = */ reg, | ||||
|         /* .context = */ nullptr, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_amx_device; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
|     GGML_UNUSED(index); | ||||
| } | ||||
|  | ||||
| static void * ggml_backend_amx_get_proc_address(ggml_backend_reg_t reg, const char * name) { | ||||
|     if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) { | ||||
|         return (void *)ggml_backend_amx_set_n_threads; | ||||
|     } | ||||
|     return NULL; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
|     GGML_UNUSED(name); | ||||
| } | ||||
|  | ||||
| static const struct ggml_backend_reg_i ggml_backend_amx_reg_i = { | ||||
|     /* .get_name         = */ ggml_backend_amx_reg_get_name, | ||||
|     /* .get_device_count = */ ggml_backend_amx_reg_get_device_count, | ||||
|     /* .get_device       = */ ggml_backend_amx_reg_get_device, | ||||
|     /* .get_proc_address = */ ggml_backend_amx_get_proc_address, | ||||
| }; | ||||
|  | ||||
| ggml_backend_reg_t ggml_backend_amx_reg(void) { | ||||
|     static struct ggml_backend_reg ggml_backend_amx_reg = { | ||||
|         /* .iface   = */ ggml_backend_amx_reg_i, | ||||
|         /* .context = */ NULL, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_amx_reg; | ||||
| } | ||||
|  | ||||
| #else // if defined(__AMX_INT8__) | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) { | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| bool ggml_backend_is_amx(ggml_backend_t backend) { | ||||
|     GGML_UNUSED(backend); | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| ggml_backend_t ggml_backend_amx_init(void) { | ||||
|     fprintf(stderr, "GGML is not compiled with AMX support!\n"); | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) { | ||||
|     fprintf(stderr, "GGML is not compiled with AMX support!\n"); | ||||
|  | ||||
|     GGML_UNUSED(backend_amx); | ||||
|     GGML_UNUSED(n_threads); | ||||
| } | ||||
|  | ||||
| ggml_backend_reg_t ggml_backend_amx_reg(void) { | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										2510
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2510
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										17
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| #pragma once | ||||
| #include "common.h" | ||||
| #include <stdint.h> | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor); | ||||
|  | ||||
| void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|  | ||||
| void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										255
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										255
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,255 @@ | ||||
| #pragma once | ||||
|  | ||||
| // ggml-backend internal header | ||||
|  | ||||
| #include "ggml-backend.h" | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|     #define GGML_BACKEND_API_VERSION 1 | ||||
|  | ||||
|     // | ||||
|     // Backend buffer type | ||||
|     // | ||||
|  | ||||
|     struct ggml_backend_buffer_type_i { | ||||
|         const char *          (*get_name)      (ggml_backend_buffer_type_t buft); | ||||
|         // allocate a buffer of this type | ||||
|         ggml_backend_buffer_t (*alloc_buffer)  (ggml_backend_buffer_type_t buft, size_t size); | ||||
|         // tensor alignment | ||||
|         size_t                (*get_alignment) (ggml_backend_buffer_type_t buft); | ||||
|         // (optional) max buffer size that can be allocated (defaults to SIZE_MAX) | ||||
|         size_t                (*get_max_size)  (ggml_backend_buffer_type_t buft); | ||||
|         // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes) | ||||
|         size_t                (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); | ||||
|         // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false) | ||||
|         bool                  (*is_host)       (ggml_backend_buffer_type_t buft); | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend_buffer_type { | ||||
|         struct ggml_backend_buffer_type_i  iface; | ||||
|         ggml_backend_dev_t device; | ||||
|         void * context; | ||||
|     }; | ||||
|  | ||||
|     // | ||||
|     // Backend buffer | ||||
|     // | ||||
|  | ||||
|     struct ggml_backend_buffer_i { | ||||
|         // (optional) free the buffer | ||||
|         void         (*free_buffer)  (ggml_backend_buffer_t buffer); | ||||
|         // base address of the buffer | ||||
|         void *       (*get_base)     (ggml_backend_buffer_t buffer); | ||||
|         // (optional) initialize a tensor in the buffer (eg. add tensor extras) | ||||
|         void         (*init_tensor)  (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); | ||||
|         // tensor data access | ||||
|         void         (*memset_tensor)(ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size); | ||||
|         void         (*set_tensor)   (ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|         void         (*get_tensor)   (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size); | ||||
|         // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported) | ||||
|         bool         (*cpy_tensor)   (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); | ||||
|         // clear the entire buffer | ||||
|         void         (*clear)        (ggml_backend_buffer_t buffer, uint8_t value); | ||||
|         // (optional) reset any internal state due to tensor initialization, such as tensor extras | ||||
|         void         (*reset)        (ggml_backend_buffer_t buffer); | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend_buffer { | ||||
|         struct ggml_backend_buffer_i  iface; | ||||
|         ggml_backend_buffer_type_t    buft; | ||||
|         void * context; | ||||
|         size_t size; | ||||
|         enum ggml_backend_buffer_usage usage; | ||||
|     }; | ||||
|  | ||||
|     GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( | ||||
|                    ggml_backend_buffer_type_t buft, | ||||
|             struct ggml_backend_buffer_i      iface, | ||||
|                    void *                     context, | ||||
|                    size_t                     size); | ||||
|  | ||||
|     // do not use directly, use ggml_backend_tensor_copy instead | ||||
|     GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); | ||||
|  | ||||
|     // multi-buffer | ||||
|     // buffer that contains a collection of buffers | ||||
|     GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); | ||||
|     GGML_API bool                  ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); | ||||
|     GGML_API void                  ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); | ||||
|  | ||||
|     // | ||||
|     // Backend (stream) | ||||
|     // | ||||
|  | ||||
|     struct ggml_backend_i { | ||||
|         const char * (*get_name)(ggml_backend_t backend); | ||||
|  | ||||
|         void (*free)(ggml_backend_t backend); | ||||
|  | ||||
|         // (optional) asynchronous tensor data access | ||||
|         void (*set_tensor_async)(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|         void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size); | ||||
|         bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); | ||||
|  | ||||
|         // (optional) complete all pending operations (required if the backend supports async operations) | ||||
|         void (*synchronize)(ggml_backend_t backend); | ||||
|  | ||||
|         // (optional) graph plans (not used currently) | ||||
|         // compute graph with a plan | ||||
|         ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); | ||||
|         void                      (*graph_plan_free)   (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||
|         // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology | ||||
|         void                      (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph); | ||||
|         // compute the graph with the plan | ||||
|         enum ggml_status          (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||
|  | ||||
|         // compute graph (always async if supported by the backend) | ||||
|         enum ggml_status          (*graph_compute)     (ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||
|  | ||||
|         // (optional) event synchronization | ||||
|         // record an event on this stream | ||||
|         void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event); | ||||
|         // wait for an event on on a different stream | ||||
|         void (*event_wait)  (ggml_backend_t backend, ggml_backend_event_t event); | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend { | ||||
|         ggml_guid_t guid; | ||||
|         struct ggml_backend_i iface; | ||||
|         ggml_backend_dev_t device; | ||||
|         void * context; | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend_event { | ||||
|         struct ggml_backend_device * device; | ||||
|         void * context; | ||||
|     }; | ||||
|  | ||||
|     // | ||||
|     // Backend device | ||||
|     // | ||||
|  | ||||
|     // Note: if additional properties are needed, we should add a struct with all of them | ||||
|     //       the current functions to obtain the properties can remain, since they are more convenient for often used properties | ||||
|     struct ggml_backend_device_i { | ||||
|         // device name: short identifier for this device, such as "CPU" or "CUDA0" | ||||
|         const char * (*get_name)(ggml_backend_dev_t dev); | ||||
|  | ||||
|         // device description: short informative description of the device, could be the model name | ||||
|         const char * (*get_description)(ggml_backend_dev_t dev); | ||||
|  | ||||
|         // device memory in bytes | ||||
|         void         (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total); | ||||
|  | ||||
|         // device type | ||||
|         enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev); | ||||
|  | ||||
|         // device properties | ||||
|         void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props); | ||||
|  | ||||
|         // backend (stream) initialization | ||||
|         ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params); | ||||
|  | ||||
|         // preferred buffer type | ||||
|         ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev); | ||||
|  | ||||
|         // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device) | ||||
|         ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev); | ||||
|  | ||||
|         // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries) | ||||
|         ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size); | ||||
|  | ||||
|         // check if the backend can compute an operation | ||||
|         bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); | ||||
|  | ||||
|         // check if the backend can use tensors allocated in a buffer type | ||||
|         bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft); | ||||
|  | ||||
|         // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer | ||||
|         // these should be expensive operations that may benefit from running on this backend instead of the CPU backend | ||||
|         bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); | ||||
|  | ||||
|         // (optional) event synchronization | ||||
|         ggml_backend_event_t (*event_new)         (ggml_backend_dev_t dev); | ||||
|         void                 (*event_free)        (ggml_backend_dev_t dev, ggml_backend_event_t event); | ||||
|         void                 (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event); | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend_device { | ||||
|         struct ggml_backend_device_i iface; | ||||
|         ggml_backend_reg_t reg; | ||||
|         void * context; | ||||
|     }; | ||||
|  | ||||
|     // | ||||
|     // Backend (reg) | ||||
|     // | ||||
|  | ||||
|     struct ggml_backend_reg_i { | ||||
|         const char * (*get_name)(ggml_backend_reg_t reg); | ||||
|  | ||||
|         // enumerate available devices | ||||
|         size_t             (*get_device_count)(ggml_backend_reg_t reg); | ||||
|         ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index); | ||||
|  | ||||
|         // (optional) get a pointer to a function in the backend | ||||
|         // backends can add custom functions that are not part of the standard ggml-backend interface | ||||
|         void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name); | ||||
|     }; | ||||
|  | ||||
|     struct ggml_backend_reg { | ||||
|         int api_version; // initialize to GGML_BACKEND_API_VERSION | ||||
|         struct ggml_backend_reg_i iface; | ||||
|         void * context; | ||||
|     }; | ||||
|  | ||||
|     // Internal backend registry API | ||||
|     GGML_API void ggml_backend_register(ggml_backend_reg_t reg); | ||||
|  | ||||
|     // Add backend dynamic loading support to the backend | ||||
|  | ||||
|     // Initialize the backend | ||||
|     typedef ggml_backend_reg_t (*ggml_backend_init_t)(void); | ||||
|     // Optional: obtain a score for the backend based on the system configuration | ||||
|     // Higher scores are preferred, 0 means the backend is not supported in the current system | ||||
|     typedef int                (*ggml_backend_score_t)(void); | ||||
|  | ||||
| #ifdef GGML_BACKEND_DL | ||||
| #    ifdef __cplusplus | ||||
| #        define GGML_BACKEND_DL_IMPL(reg_fn)                             \ | ||||
|             extern "C" {                                                 \ | ||||
|             GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ | ||||
|             }                                                            \ | ||||
|             ggml_backend_reg_t ggml_backend_init(void) {                 \ | ||||
|                 return reg_fn();                                         \ | ||||
|             } | ||||
| #        define GGML_BACKEND_DL_SCORE_IMPL(score_fn)       \ | ||||
|             extern "C" {                                   \ | ||||
|             GGML_BACKEND_API int ggml_backend_score(void); \ | ||||
|             }                                              \ | ||||
|             int ggml_backend_score(void) {                 \ | ||||
|                 return score_fn();                         \ | ||||
|             } | ||||
| #    else | ||||
| #        define GGML_BACKEND_DL_IMPL(reg_fn)                              \ | ||||
|             GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void);  \ | ||||
|             ggml_backend_reg_t                  ggml_backend_init(void) { \ | ||||
|                 return reg_fn();                                          \ | ||||
|             } | ||||
| #        define GGML_BACKEND_DL_SCORE_IMPL(score_fn)        \ | ||||
|             GGML_BACKEND_API int ggml_backend_score(void);  \ | ||||
|             int                  ggml_backend_score(void) { \ | ||||
|                 return score_fn();                          \ | ||||
|             } | ||||
| #    endif | ||||
| #else | ||||
| #    define GGML_BACKEND_DL_IMPL(reg_fn) | ||||
| #    define GGML_BACKEND_DL_SCORE_IMPL(score_fn) | ||||
| #endif | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										582
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										582
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,582 @@ | ||||
| #include "ggml-backend-impl.h" | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml-impl.h" | ||||
| #include <algorithm> | ||||
| #include <codecvt> | ||||
| #include <cstring> | ||||
| #include <filesystem> | ||||
| #include <locale> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <type_traits> | ||||
| #include <vector> | ||||
|  | ||||
| #ifdef _WIN32 | ||||
| #    define WIN32_LEAN_AND_MEAN | ||||
| #    ifndef NOMINMAX | ||||
| #        define NOMINMAX | ||||
| #    endif | ||||
| #    include <windows.h> | ||||
| #elif defined(__APPLE__) | ||||
| #    include <mach-o/dyld.h> | ||||
| #    include <dlfcn.h> | ||||
| #else | ||||
| #    include <dlfcn.h> | ||||
| #    include <unistd.h> | ||||
| #endif | ||||
|  | ||||
| // Backend registry | ||||
| #ifdef GGML_USE_CPU | ||||
| #include "ggml-cpu.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_CUDA | ||||
| #include "ggml-cuda.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_METAL | ||||
| #include "ggml-metal.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_SYCL | ||||
| #include "ggml-sycl.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_VULKAN | ||||
| #include "ggml-vulkan.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_OPENCL | ||||
| #include "ggml-opencl.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_BLAS | ||||
| #include "ggml-blas.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_RPC | ||||
| #include "ggml-rpc.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_CANN | ||||
| #include "ggml-cann.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GGML_USE_KOMPUTE | ||||
| #include "ggml-kompute.h" | ||||
| #endif | ||||
|  | ||||
| // disable C++17 deprecation warning for std::codecvt_utf8 | ||||
| #if defined(__clang__) | ||||
| #    pragma clang diagnostic push | ||||
| #    pragma clang diagnostic ignored "-Wdeprecated-declarations" | ||||
| #endif | ||||
|  | ||||
| static std::wstring utf8_to_utf16(const std::string & str) { | ||||
|     std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter; | ||||
|     return converter.from_bytes(str); | ||||
| } | ||||
|  | ||||
| static std::string utf16_to_utf8(const std::wstring & str) { | ||||
|     std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter; | ||||
|     return converter.to_bytes(str); | ||||
| } | ||||
|  | ||||
| #if defined(__clang__) | ||||
| #    pragma clang diagnostic pop | ||||
| #endif | ||||
|  | ||||
| #ifdef _WIN32 | ||||
|  | ||||
| using dl_handle = std::remove_pointer_t<HMODULE>; | ||||
|  | ||||
| struct dl_handle_deleter { | ||||
|     void operator()(HMODULE handle) { | ||||
|         FreeLibrary(handle); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static dl_handle * dl_load_library(const std::wstring & path) { | ||||
|     // suppress error dialogs for missing DLLs | ||||
|     DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); | ||||
|     SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); | ||||
|  | ||||
|     HMODULE handle = LoadLibraryW(path.c_str()); | ||||
|  | ||||
|     SetErrorMode(old_mode); | ||||
|  | ||||
|     return handle; | ||||
| } | ||||
|  | ||||
| static void * dl_get_sym(dl_handle * handle, const char * name) { | ||||
|     DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); | ||||
|     SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); | ||||
|  | ||||
|     void * p = (void *) GetProcAddress(handle, name); | ||||
|  | ||||
|     SetErrorMode(old_mode); | ||||
|  | ||||
|     return p; | ||||
| } | ||||
|  | ||||
| #else | ||||
|  | ||||
| using dl_handle = void; | ||||
|  | ||||
| struct dl_handle_deleter { | ||||
|     void operator()(void * handle) { | ||||
|         dlclose(handle); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static void * dl_load_library(const std::wstring & path) { | ||||
|     dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL); | ||||
|  | ||||
|     return handle; | ||||
| } | ||||
|  | ||||
| static void * dl_get_sym(dl_handle * handle, const char * name) { | ||||
|     return dlsym(handle, name); | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>; | ||||
|  | ||||
| struct ggml_backend_reg_entry { | ||||
|     ggml_backend_reg_t reg; | ||||
|     dl_handle_ptr handle; | ||||
| }; | ||||
|  | ||||
| struct ggml_backend_registry { | ||||
|     std::vector<ggml_backend_reg_entry> backends; | ||||
|     std::vector<ggml_backend_dev_t> devices; | ||||
|  | ||||
|     ggml_backend_registry() { | ||||
| #ifdef GGML_USE_CUDA | ||||
|         register_backend(ggml_backend_cuda_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_METAL | ||||
|         register_backend(ggml_backend_metal_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_SYCL | ||||
|         register_backend(ggml_backend_sycl_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_VULKAN | ||||
|         register_backend(ggml_backend_vk_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_OPENCL | ||||
|         register_backend(ggml_backend_opencl_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_CANN | ||||
|         register_backend(ggml_backend_cann_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_BLAS | ||||
|         register_backend(ggml_backend_blas_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_RPC | ||||
|         register_backend(ggml_backend_rpc_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_KOMPUTE | ||||
|         register_backend(ggml_backend_kompute_reg()); | ||||
| #endif | ||||
| #ifdef GGML_USE_CPU | ||||
|         register_backend(ggml_backend_cpu_reg()); | ||||
| #endif | ||||
|     } | ||||
|  | ||||
|     ~ggml_backend_registry() { | ||||
|         // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources, | ||||
|         // since backend threads may still be running and accessing resources from the dynamic library | ||||
|         for (auto & entry : backends) { | ||||
|             if (entry.handle) { | ||||
|                 entry.handle.release(); // NOLINT | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) { | ||||
|         if (!reg) { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
| #ifndef NDEBUG | ||||
|         GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n", | ||||
|             __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg)); | ||||
| #endif | ||||
|         backends.push_back({ reg, std::move(handle) }); | ||||
|         for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { | ||||
|             register_device(ggml_backend_reg_dev_get(reg, i)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void register_device(ggml_backend_dev_t device) { | ||||
| #ifndef NDEBUG | ||||
|         GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); | ||||
| #endif | ||||
|         devices.push_back(device); | ||||
|     } | ||||
|  | ||||
|     ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { | ||||
|         dl_handle_ptr handle { dl_load_library(path) }; | ||||
|         if (!handle) { | ||||
|             if (!silent) { | ||||
|                 GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str()); | ||||
|             } | ||||
|             return nullptr; | ||||
|         } | ||||
|  | ||||
|         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); | ||||
|         if (score_fn && score_fn() == 0) { | ||||
|             if (!silent) { | ||||
|                 GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str()); | ||||
|             } | ||||
|             return nullptr; | ||||
|         } | ||||
|  | ||||
|         auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init"); | ||||
|         if (!backend_init_fn) { | ||||
|             if (!silent) { | ||||
|                 GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str()); | ||||
|             } | ||||
|             return nullptr; | ||||
|         } | ||||
|  | ||||
|         ggml_backend_reg_t reg = backend_init_fn(); | ||||
|         if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) { | ||||
|             if (!silent) { | ||||
|                 if (!reg) { | ||||
|                     GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str()); | ||||
|                 } else { | ||||
|                     GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n", | ||||
|                         __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION); | ||||
|                 } | ||||
|             } | ||||
|             return nullptr; | ||||
|         } | ||||
|  | ||||
|         GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); | ||||
|  | ||||
|         register_backend(reg, std::move(handle)); | ||||
|  | ||||
|         return reg; | ||||
|     } | ||||
|  | ||||
|     void unload_backend(ggml_backend_reg_t reg, bool silent) { | ||||
|         auto it = std::find_if(backends.begin(), backends.end(), | ||||
|                                [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; }); | ||||
|  | ||||
|         if (it == backends.end()) { | ||||
|             if (!silent) { | ||||
|                 GGML_LOG_ERROR("%s: backend not found\n", __func__); | ||||
|             } | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         if (!silent) { | ||||
|             GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg)); | ||||
|         } | ||||
|  | ||||
|         // remove devices | ||||
|         devices.erase( | ||||
|             std::remove_if(devices.begin(), devices.end(), | ||||
|                             [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }), | ||||
|             devices.end()); | ||||
|  | ||||
|         // remove backend | ||||
|         backends.erase(it); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static ggml_backend_registry & get_reg() { | ||||
|     static ggml_backend_registry reg; | ||||
|     return reg; | ||||
| } | ||||
|  | ||||
| // Internal API | ||||
| void ggml_backend_register(ggml_backend_reg_t reg) { | ||||
|     get_reg().register_backend(reg); | ||||
| } | ||||
|  | ||||
| void ggml_backend_device_register(ggml_backend_dev_t device) { | ||||
|     get_reg().register_device(device); | ||||
| } | ||||
|  | ||||
| // Backend (reg) enumeration | ||||
| static bool striequals(const char * a, const char * b) { | ||||
|     for (; *a && *b; a++, b++) { | ||||
|         if (std::tolower(*a) != std::tolower(*b)) { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
|     return *a == *b; | ||||
| } | ||||
|  | ||||
| size_t ggml_backend_reg_count() { | ||||
|     return get_reg().backends.size(); | ||||
| } | ||||
|  | ||||
| ggml_backend_reg_t ggml_backend_reg_get(size_t index) { | ||||
|     GGML_ASSERT(index < ggml_backend_reg_count()); | ||||
|     return get_reg().backends[index].reg; | ||||
| } | ||||
|  | ||||
| ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) { | ||||
|     for (size_t i = 0; i < ggml_backend_reg_count(); i++) { | ||||
|         ggml_backend_reg_t reg = ggml_backend_reg_get(i); | ||||
|         if (striequals(ggml_backend_reg_name(reg), name)) { | ||||
|             return reg; | ||||
|         } | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| // Device enumeration | ||||
| size_t ggml_backend_dev_count() { | ||||
|     return get_reg().devices.size(); | ||||
| } | ||||
|  | ||||
| ggml_backend_dev_t ggml_backend_dev_get(size_t index) { | ||||
|     GGML_ASSERT(index < ggml_backend_dev_count()); | ||||
|     return get_reg().devices[index]; | ||||
| } | ||||
|  | ||||
| ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) { | ||||
|     for (size_t i = 0; i < ggml_backend_dev_count(); i++) { | ||||
|         ggml_backend_dev_t dev = ggml_backend_dev_get(i); | ||||
|         if (striequals(ggml_backend_dev_name(dev), name)) { | ||||
|             return dev; | ||||
|         } | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) { | ||||
|     for (size_t i = 0; i < ggml_backend_dev_count(); i++) { | ||||
|         ggml_backend_dev_t dev = ggml_backend_dev_get(i); | ||||
|         if (ggml_backend_dev_type(dev) == type) { | ||||
|             return dev; | ||||
|         } | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| // Convenience functions | ||||
| ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) { | ||||
|     ggml_backend_dev_t dev = ggml_backend_dev_by_name(name); | ||||
|     if (!dev) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     return ggml_backend_dev_init(dev, params); | ||||
| } | ||||
|  | ||||
| ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) { | ||||
|     ggml_backend_dev_t dev = ggml_backend_dev_by_type(type); | ||||
|     if (!dev) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     return ggml_backend_dev_init(dev, params); | ||||
| } | ||||
|  | ||||
| ggml_backend_t ggml_backend_init_best(void) { | ||||
|     ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU); | ||||
|     if (!dev) { | ||||
|         dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); | ||||
|     } | ||||
|     if (!dev) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     return ggml_backend_dev_init(dev, nullptr); | ||||
| } | ||||
|  | ||||
| // Dynamic loading | ||||
| ggml_backend_reg_t ggml_backend_load(const char * path) { | ||||
|     return get_reg().load_backend(utf8_to_utf16(path), false); | ||||
| } | ||||
|  | ||||
| void ggml_backend_unload(ggml_backend_reg_t reg) { | ||||
|     get_reg().unload_backend(reg, true); | ||||
| } | ||||
|  | ||||
| static std::wstring get_executable_path() { | ||||
| #if defined(__APPLE__) | ||||
|     // get executable path | ||||
|     std::vector<char> path; | ||||
|     uint32_t size; | ||||
|     while (true) { | ||||
|         size = path.size(); | ||||
|         if (_NSGetExecutablePath(path.data(), &size) == 0) { | ||||
|             break; | ||||
|         } | ||||
|         path.resize(size); | ||||
|     } | ||||
|     std::string base_path(path.data(), size); | ||||
|     // remove executable name | ||||
|     auto last_slash = base_path.find_last_of('/'); | ||||
|     if (last_slash != std::string::npos) { | ||||
|         base_path = base_path.substr(0, last_slash); | ||||
|     } | ||||
|     return utf8_to_utf16(base_path + "/"); | ||||
| #elif defined(__linux__) || defined(__FreeBSD__) | ||||
|     std::string base_path = "."; | ||||
|     std::vector<char> path(1024); | ||||
|     while (true) { | ||||
|         // get executable path | ||||
| #    if defined(__linux__) | ||||
|         ssize_t len = readlink("/proc/self/exe", path.data(), path.size()); | ||||
| #    elif defined(__FreeBSD__) | ||||
|         ssize_t len = readlink("/proc/curproc/file", path.data(), path.size()); | ||||
| #    endif | ||||
|         if (len == -1) { | ||||
|             break; | ||||
|         } | ||||
|         if (len < (ssize_t) path.size()) { | ||||
|             base_path = std::string(path.data(), len); | ||||
|             // remove executable name | ||||
|             auto last_slash = base_path.find_last_of('/'); | ||||
|             if (last_slash != std::string::npos) { | ||||
|                 base_path = base_path.substr(0, last_slash); | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
|         path.resize(path.size() * 2); | ||||
|     } | ||||
|  | ||||
|     return utf8_to_utf16(base_path + "/"); | ||||
| #elif defined(_WIN32) | ||||
|     std::vector<wchar_t> path(MAX_PATH); | ||||
|     DWORD len = GetModuleFileNameW(NULL, path.data(), path.size()); | ||||
|     if (len == 0) { | ||||
|         return {}; | ||||
|     } | ||||
|     std::wstring base_path(path.data(), len); | ||||
|     // remove executable name | ||||
|     auto last_slash = base_path.find_last_of('\\'); | ||||
|     if (last_slash != std::string::npos) { | ||||
|         base_path = base_path.substr(0, last_slash); | ||||
|     } | ||||
|     return base_path + L"\\"; | ||||
| #else | ||||
|     return {}; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static std::wstring backend_filename_prefix() { | ||||
| #ifdef _WIN32 | ||||
|     return L"ggml-"; | ||||
| #else | ||||
|     return L"libggml-"; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static std::wstring backend_filename_suffix() { | ||||
| #ifdef _WIN32 | ||||
|     return L".dll"; | ||||
| #else | ||||
|     return L".so"; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static std::wstring path_separator() { | ||||
| #ifdef _WIN32 | ||||
|     return L"\\"; | ||||
| #else | ||||
|     return L"/"; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { | ||||
|     // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths | ||||
|      // TODO: search system paths | ||||
|     std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-"; | ||||
|     std::vector<std::wstring> search_paths; | ||||
|     if (user_search_path == nullptr) { | ||||
|         search_paths.push_back(L"." + path_separator()); | ||||
|         search_paths.push_back(get_executable_path()); | ||||
|     } else { | ||||
|         search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator()); | ||||
|     } | ||||
|  | ||||
|     int best_score = 0; | ||||
|     std::wstring best_path; | ||||
|  | ||||
|     namespace fs = std::filesystem; | ||||
|     for (const auto & search_path : search_paths) { | ||||
|         if (!fs::exists(search_path)) { | ||||
|             continue; | ||||
|         } | ||||
|         fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied); | ||||
|         for (const auto & entry : dir_it) { | ||||
|             if (entry.is_regular_file()) { | ||||
|                 std::wstring filename = entry.path().filename().wstring(); | ||||
|                 std::wstring ext = entry.path().extension().wstring(); | ||||
|                 if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) { | ||||
|                     dl_handle_ptr handle { dl_load_library(entry.path().wstring()) }; | ||||
|                     if (!handle && !silent) { | ||||
|                         GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); | ||||
|                     } | ||||
|                     if (handle) { | ||||
|                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); | ||||
|                         if (score_fn) { | ||||
|                             int s = score_fn(); | ||||
| #ifndef NDEBUG | ||||
|                             GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s); | ||||
| #endif | ||||
|                             if (s > best_score) { | ||||
|                                 best_score = s; | ||||
|                                 best_path = entry.path().wstring(); | ||||
|                             } | ||||
|                         } else { | ||||
|                             if (!silent) { | ||||
|                                 GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (best_score == 0) { | ||||
|         // try to load the base backend | ||||
|         for (const auto & search_path : search_paths) { | ||||
|             std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix(); | ||||
|             if (fs::exists(path)) { | ||||
|                 return get_reg().load_backend(path, silent); | ||||
|             } | ||||
|         } | ||||
|         return nullptr; | ||||
|     } | ||||
|  | ||||
|     return get_reg().load_backend(best_path, silent); | ||||
| } | ||||
|  | ||||
| void ggml_backend_load_all() { | ||||
|     ggml_backend_load_all_from_path(nullptr); | ||||
| } | ||||
|  | ||||
| void ggml_backend_load_all_from_path(const char * dir_path) { | ||||
| #ifdef NDEBUG | ||||
|     bool silent = true; | ||||
| #else | ||||
|     bool silent = false; | ||||
| #endif | ||||
|  | ||||
|     ggml_backend_load_best("blas", silent, dir_path); | ||||
|     ggml_backend_load_best("cann", silent, dir_path); | ||||
|     ggml_backend_load_best("cuda", silent, dir_path); | ||||
|     ggml_backend_load_best("hip", silent, dir_path); | ||||
|     ggml_backend_load_best("kompute", silent, dir_path); | ||||
|     ggml_backend_load_best("metal", silent, dir_path); | ||||
|     ggml_backend_load_best("rpc", silent, dir_path); | ||||
|     ggml_backend_load_best("sycl", silent, dir_path); | ||||
|     ggml_backend_load_best("vulkan", silent, dir_path); | ||||
|     ggml_backend_load_best("opencl", silent, dir_path); | ||||
|     ggml_backend_load_best("musa", silent, dir_path); | ||||
|     ggml_backend_load_best("cpu", silent, dir_path); | ||||
|     // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend | ||||
|     const char * backend_path = std::getenv("GGML_BACKEND_PATH"); | ||||
|     if (backend_path) { | ||||
|         ggml_backend_load(backend_path); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										2002
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2002
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										87
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| if (GGML_STATIC) | ||||
|     set(BLA_STATIC ON) | ||||
| endif() | ||||
| #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) | ||||
| #    set(BLA_SIZEOF_INTEGER 8) | ||||
| #endif() | ||||
|  | ||||
| set(BLA_VENDOR ${GGML_BLAS_VENDOR}) | ||||
| find_package(BLAS) | ||||
|  | ||||
| if (BLAS_FOUND) | ||||
|     message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") | ||||
|  | ||||
|     ggml_add_backend_library(ggml-blas | ||||
|                              ggml-blas.cpp | ||||
|                             ) | ||||
|  | ||||
|     if (${GGML_BLAS_VENDOR} MATCHES "Apple") | ||||
|         add_compile_definitions(ACCELERATE_NEW_LAPACK) | ||||
|         add_compile_definitions(ACCELERATE_LAPACK_ILP64) | ||||
|         add_compile_definitions(GGML_BLAS_USE_ACCELERATE) | ||||
|     elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "") | ||||
|         # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. | ||||
|         # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 | ||||
|         find_package(PkgConfig REQUIRED) | ||||
|         if (${GGML_BLAS_VENDOR} MATCHES "Generic") | ||||
|             pkg_check_modules(DepBLAS blas) | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS") | ||||
|             # As of openblas v0.3.22, the 64-bit is named openblas64.pc | ||||
|             pkg_check_modules(DepBLAS openblas64) | ||||
|             if (NOT DepBLAS_FOUND) | ||||
|                 pkg_check_modules(DepBLAS openblas) | ||||
|             endif() | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME") | ||||
|             add_compile_definitions(GGML_BLAS_USE_BLIS) | ||||
|             pkg_check_modules(DepBLAS blis) | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS") | ||||
|             pkg_check_modules(DepBLAS blas-atlas) | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS") | ||||
|             pkg_check_modules(DepBLAS flexiblas_api) | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "Intel") | ||||
|             add_compile_definitions(GGML_BLAS_USE_MKL) | ||||
|             # all Intel* libraries share the same include path | ||||
|             pkg_check_modules(DepBLAS mkl-sdl) | ||||
|         elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC") | ||||
|             # this doesn't provide pkg-config | ||||
|             # suggest to assign BLAS_INCLUDE_DIRS on your own | ||||
|             if ("${NVHPC_VERSION}" STREQUAL "") | ||||
|                 message(WARNING "Better to set NVHPC_VERSION") | ||||
|             else() | ||||
|                 set(DepBLAS_FOUND ON) | ||||
|                 set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") | ||||
|             endif() | ||||
|         endif() | ||||
|         if (DepBLAS_FOUND) | ||||
|             set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) | ||||
|         else() | ||||
|             message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" | ||||
|             " detected by pkgconfig, trying to find cblas.h from possible paths...") | ||||
|             find_path(BLAS_INCLUDE_DIRS | ||||
|                 NAMES cblas.h | ||||
|                 HINTS | ||||
|                     /usr/include | ||||
|                     /usr/local/include | ||||
|                     /usr/include/openblas | ||||
|                     /opt/homebrew/opt/openblas/include | ||||
|                     /usr/local/opt/openblas/include | ||||
|                     /usr/include/x86_64-linux-gnu/openblas/include | ||||
|             ) | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") | ||||
|  | ||||
|     target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS}) | ||||
|  | ||||
|     if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) | ||||
|         add_compile_definitions(GGML_BLAS_USE_MKL) | ||||
|     endif() | ||||
|  | ||||
|     target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES}) | ||||
|     target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS}) | ||||
| else() | ||||
|     message(ERROR "BLAS not found, please refer to " | ||||
|                   "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" | ||||
|                   " to set correct GGML_BLAS_VENDOR") | ||||
| endif() | ||||
							
								
								
									
										517
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										517
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,517 @@ | ||||
| #include "ggml-impl.h" | ||||
| #include "ggml-blas.h" | ||||
| #include "ggml-backend-impl.h" | ||||
|  | ||||
| #include <future> | ||||
| #include <vector> | ||||
| #include <cstring> | ||||
|  | ||||
| #if defined(GGML_BLAS_USE_ACCELERATE) | ||||
| #   include <Accelerate/Accelerate.h> | ||||
| #elif defined(GGML_BLAS_USE_MKL) | ||||
| #   include <mkl.h> | ||||
| #elif defined(GGML_BLAS_USE_BLIS) | ||||
| #   include <blis.h> | ||||
| #elif defined(GGML_BLAS_USE_NVPL) | ||||
| #   include <nvpl_blas.h> | ||||
| #else | ||||
| #   include <cblas.h> | ||||
| #endif | ||||
|  | ||||
| struct ggml_backend_blas_context { | ||||
|     int n_threads = GGML_DEFAULT_N_THREADS; | ||||
|     std::unique_ptr<char[]> work_data; | ||||
|     size_t work_size = 0; | ||||
| #ifndef GGML_USE_OPENMP | ||||
|     std::vector<std::future<void>> tasks; | ||||
| #endif | ||||
| }; | ||||
|  | ||||
| static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { | ||||
|     const struct ggml_tensor * src0 = dst->src[0]; | ||||
|     const struct ggml_tensor * src1 = dst->src[1]; | ||||
|  | ||||
|     GGML_TENSOR_BINARY_OP_LOCALS | ||||
|  | ||||
|     const enum ggml_type type = src0->type; | ||||
|  | ||||
|     GGML_ASSERT(ne0 == ne01); | ||||
|     GGML_ASSERT(ne1 == ne11); | ||||
|     GGML_ASSERT(ne2 == ne12); | ||||
|     GGML_ASSERT(ne3 == ne13); | ||||
|  | ||||
|     // we don't support permuted src0 or src1 | ||||
|     GGML_ASSERT(nb00 == ggml_type_size(type)); | ||||
|     GGML_ASSERT(nb10 == ggml_type_size(src1->type)); | ||||
|  | ||||
|     // dst cannot be transposed or permuted | ||||
|     GGML_ASSERT(nb0 == sizeof(float)); | ||||
|     GGML_ASSERT(nb0 <= nb1); | ||||
|     GGML_ASSERT(nb1 <= nb2); | ||||
|     GGML_ASSERT(nb2 <= nb3); | ||||
|  | ||||
|     // broadcast factors | ||||
|     const int64_t r2 = ne12/ne02; | ||||
|     const int64_t r3 = ne13/ne03; | ||||
|  | ||||
|     const int64_t ne_plane      = ne01*ne00; | ||||
|     const size_t  desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float); | ||||
|  | ||||
|     if (ctx->work_size < desired_wsize) { | ||||
|         ctx->work_data.reset(new char[desired_wsize]); | ||||
|         ctx->work_size = desired_wsize; | ||||
|     } | ||||
|     void * wdata = ctx->work_data.get(); | ||||
|  | ||||
|     // convert src0 to float | ||||
|     if (type != GGML_TYPE_F32) { | ||||
|         const auto * type_traits = ggml_get_type_traits(type); | ||||
|         ggml_to_float_t const to_float = type_traits->to_float; | ||||
|  | ||||
|         for (int64_t i03 = 0; i03 < ne03; i03++) { | ||||
|             for (int64_t i02 = 0; i02 < ne02; i02++) { | ||||
|                 const void  *       x      = (char *)  src0->data + i02*nb02          + i03*nb03; | ||||
|                       float * const wplane = (float *) wdata      + i02*ne_plane      + i03*ne02*ne_plane; | ||||
|  | ||||
|                 const int min_cols_per_thread = 4096; | ||||
|                 const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1); | ||||
|                 const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1); | ||||
|  | ||||
| #ifdef GGML_USE_OPENMP | ||||
|                 #pragma omp parallel for num_threads(n_threads) | ||||
|                 for (int64_t i01 = 0; i01 < ne01; i01++) { | ||||
|                     to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||||
|                 } | ||||
| #else | ||||
|                 for (int i = 1; i < n_threads; i++) { | ||||
|                     const int64_t start =       i*ne01/n_threads; | ||||
|                     const int64_t end   = (i + 1)*ne01/n_threads; | ||||
|                     if (start < end) { | ||||
|                         ctx->tasks.push_back(std::async(std::launch::async, [=]() { | ||||
|                             for (int64_t i01 = start; i01 < end; i01++) { | ||||
|                                 to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||||
|                             } | ||||
|                         })); | ||||
|                     } | ||||
|                 } | ||||
|                 { | ||||
|                     // reuse the current thread for the first task | ||||
|                     const int64_t start = 0; | ||||
|                     const int64_t end   = ne01/n_threads; | ||||
|                     for (int64_t i01 = start; i01 < end; i01++) { | ||||
|                         to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); | ||||
|                     } | ||||
|                 } | ||||
| #endif | ||||
|             } | ||||
|         } | ||||
|  | ||||
| #ifndef GGML_USE_OPENMP | ||||
|         // wait for all tasks to finish | ||||
|         for (auto & task : ctx->tasks) { | ||||
|             task.get(); | ||||
|         } | ||||
|         ctx->tasks.clear(); | ||||
| #endif | ||||
|     } | ||||
|  | ||||
| #if defined(OPENBLAS_VERSION) | ||||
|     openblas_set_num_threads(ctx->n_threads); | ||||
| #endif | ||||
|  | ||||
| #if defined(GGML_BLAS_USE_BLIS) | ||||
|     bli_thread_set_num_threads(ctx->n_threads); | ||||
| #endif | ||||
|  | ||||
| #if defined(GGML_BLAS_USE_NVPL) | ||||
|     nvpl_blas_set_num_threads(ctx->n_threads); | ||||
| #endif | ||||
|  | ||||
|     for (int64_t i13 = 0; i13 < ne13; i13++) { | ||||
|         for (int64_t i12 = 0; i12 < ne12; i12++) { | ||||
|             const int64_t i03 = i13/r3; | ||||
|             const int64_t i02 = i12/r2; | ||||
|  | ||||
|             const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03); | ||||
|             const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13); | ||||
|                   float * d = (float *) ((char *)  dst->data + i12*nb2  + i13*nb3); | ||||
|  | ||||
|             if (type != GGML_TYPE_F32) { | ||||
|                 x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane; | ||||
|             } | ||||
|  | ||||
|             cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, | ||||
|                         ne1, ne01, ne10, | ||||
|                         1.0f,   y, ne10, | ||||
|                                 x, ne00, | ||||
|                         0.0f,   d, ne01); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { | ||||
|     const struct ggml_tensor * src0 = dst->src[0]; | ||||
|     const struct ggml_tensor * src1 = dst->src[1]; | ||||
|  | ||||
|     GGML_TENSOR_BINARY_OP_LOCALS | ||||
|  | ||||
|     GGML_ASSERT(ne0  == ne00); | ||||
|     GGML_ASSERT(ne1  == ne10); | ||||
|     GGML_ASSERT(ne2  == ne02); | ||||
|     GGML_ASSERT(ne02 == ne12); | ||||
|     GGML_ASSERT(ne3  == ne13); | ||||
|     GGML_ASSERT(ne03 == ne13); | ||||
|  | ||||
|     // we don't support permuted src0 or src1 | ||||
|     GGML_ASSERT(nb00 == sizeof(float)); | ||||
|  | ||||
|     // dst cannot be transposed or permuted | ||||
|     GGML_ASSERT(nb0 == sizeof(float)); | ||||
|     // GGML_ASSERT(nb0 <= nb1); | ||||
|     // GGML_ASSERT(nb1 <= nb2); | ||||
|     // GGML_ASSERT(nb2 <= nb3); | ||||
|  | ||||
|     // Arguments to ggml_compute_forward_out_prod (expressed as major,minor) | ||||
|     // src0: (k,n) | ||||
|     // src1: (k,m) | ||||
|     // dst:  (m,n) | ||||
|     // | ||||
|     // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f) | ||||
|     // Also expressed as (major,minor) | ||||
|     // a: (m,k): so src1 transposed | ||||
|     // b: (k,n): so src0 | ||||
|     // c: (m,n) | ||||
|     // | ||||
|     // However, if ggml_is_transposed(src1) is true, then | ||||
|     // src1->data already contains a transposed version, so sgemm mustn't | ||||
|     // transpose it further. | ||||
|  | ||||
|     int n = src0->ne[0]; | ||||
|     int k = src0->ne[1]; | ||||
|     int m = src1->ne[0]; | ||||
|  | ||||
|     CBLAS_TRANSPOSE transposeA; | ||||
|     int lda; | ||||
|  | ||||
|     if (!ggml_is_transposed(src1)) { | ||||
|         transposeA = CblasTrans; | ||||
|         lda = m; | ||||
|     } else { | ||||
|         transposeA = CblasNoTrans; | ||||
|         lda = k; | ||||
|     } | ||||
|  | ||||
|     float * a = (float *) ((char *) src1->data); | ||||
|     float * b = (float *) ((char *) src0->data); | ||||
|     float * c = (float *) ((char *) dst->data); | ||||
|  | ||||
|     cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n); | ||||
|  | ||||
|     GGML_UNUSED(ctx); | ||||
| } | ||||
|  | ||||
| // backend interface | ||||
|  | ||||
| static const char * ggml_backend_blas_get_name(ggml_backend_t backend) { | ||||
|     return "BLAS"; | ||||
|  | ||||
|     GGML_UNUSED(backend); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_blas_free(ggml_backend_t backend) { | ||||
|     ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; | ||||
|     delete ctx; | ||||
|     delete backend; | ||||
| } | ||||
|  | ||||
| static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||||
|     ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; | ||||
|  | ||||
|     for (int i = 0; i < cgraph->n_nodes; i++) { | ||||
|         struct ggml_tensor * node = cgraph->nodes[i]; | ||||
|  | ||||
|         switch (node->op) { | ||||
|             case GGML_OP_MUL_MAT: | ||||
|                 ggml_backend_blas_mul_mat(ctx, node); | ||||
|                 break; | ||||
|  | ||||
|             case GGML_OP_OUT_PROD: | ||||
|                 ggml_backend_blas_out_prod(ctx, node); | ||||
|                 break; | ||||
|  | ||||
|             case GGML_OP_NONE: | ||||
|             case GGML_OP_RESHAPE: | ||||
|             case GGML_OP_VIEW: | ||||
|             case GGML_OP_PERMUTE: | ||||
|             case GGML_OP_TRANSPOSE: | ||||
|                 break; | ||||
|  | ||||
|             default: | ||||
|                 GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return GGML_STATUS_SUCCESS; | ||||
|  | ||||
|     GGML_UNUSED(backend); | ||||
| } | ||||
|  | ||||
| static struct ggml_backend_i blas_backend_i = { | ||||
|     /* .get_name                = */ ggml_backend_blas_get_name, | ||||
|     /* .free                    = */ ggml_backend_blas_free, | ||||
|     /* .set_tensor_async        = */ NULL, | ||||
|     /* .get_tensor_async        = */ NULL, | ||||
|     /* .cpy_tensor_async        = */ NULL, | ||||
|     /* .synchronize             = */ NULL, | ||||
|     /* .graph_plan_create       = */ NULL, | ||||
|     /* .graph_plan_free         = */ NULL, | ||||
|     /* .graph_plan_update       = */ NULL, | ||||
|     /* .graph_plan_compute      = */ NULL, | ||||
|     /* .graph_compute           = */ ggml_backend_blas_graph_compute, | ||||
|     /* .event_record            = */ NULL, | ||||
|     /* .event_wait              = */ NULL, | ||||
| }; | ||||
|  | ||||
| static ggml_guid_t ggml_backend_blas_guid(void) { | ||||
|     static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d }; | ||||
|     return &guid; | ||||
| } | ||||
|  | ||||
| ggml_backend_t ggml_backend_blas_init(void) { | ||||
|     ggml_backend_blas_context * ctx = new ggml_backend_blas_context; | ||||
|  | ||||
|     ggml_backend_t backend = new ggml_backend { | ||||
|         /* .guid      = */ ggml_backend_blas_guid(), | ||||
|         /* .interface = */ blas_backend_i, | ||||
|         /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0), | ||||
|         /* .context   = */ ctx, | ||||
|     }; | ||||
|  | ||||
| #if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP) | ||||
|     if (openblas_get_parallel() != OPENBLAS_OPENMP) { | ||||
|         GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
| #if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP) | ||||
|     GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__); | ||||
| #endif | ||||
|  | ||||
|     return backend; | ||||
| } | ||||
|  | ||||
| bool ggml_backend_is_blas(ggml_backend_t backend) { | ||||
|     return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid()); | ||||
| } | ||||
|  | ||||
| void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) { | ||||
|     GGML_ASSERT(ggml_backend_is_blas(backend_blas)); | ||||
|  | ||||
|     ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context; | ||||
|     ctx->n_threads = n_threads; | ||||
| } | ||||
|  | ||||
| // device interface | ||||
|  | ||||
| static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) { | ||||
|     return "BLAS"; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) { | ||||
|     #if defined(GGML_BLAS_USE_ACCELERATE) | ||||
|         return "Accelerate"; | ||||
|     #elif defined(GGML_BLAS_USE_MKL) | ||||
|         return "MKL"; | ||||
|     #elif defined(GGML_BLAS_USE_BLIS) | ||||
|         return "BLIS"; | ||||
|     #elif defined(GGML_BLAS_USE_NVPL) | ||||
|         return "NVPL"; | ||||
|     #elif defined(OPENBLAS_VERSION) | ||||
|         return "OpenBLAS"; | ||||
|     #else | ||||
|         return "BLAS"; | ||||
|     #endif | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { | ||||
|     // TODO | ||||
|     *free = 0; | ||||
|     *total = 0; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) { | ||||
|     return GGML_BACKEND_DEVICE_TYPE_ACCEL; | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { | ||||
|     props->name        = ggml_backend_blas_device_get_name(dev); | ||||
|     props->description = ggml_backend_blas_device_get_description(dev); | ||||
|     props->type        = ggml_backend_blas_device_get_type(dev); | ||||
|     ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total); | ||||
|     props->caps = { | ||||
|         /* .async                 = */ false, | ||||
|         /* .host_buffer           = */ false, | ||||
|         /* .buffer_from_host_ptr  = */ true, | ||||
|         /* .events                = */ false, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) { | ||||
|     return ggml_backend_blas_init(); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
|     GGML_UNUSED(params); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) { | ||||
|     return ggml_backend_cpu_buffer_type(); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { | ||||
|     return ggml_backend_cpu_buffer_from_ptr(ptr, size); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
|     GGML_UNUSED(max_tensor_size); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | ||||
|     const struct ggml_tensor * src0 = op->src[0]; | ||||
|     const struct ggml_tensor * src1 = op->src[1]; | ||||
|  | ||||
|     switch (op->op) { | ||||
|         case GGML_OP_NONE: | ||||
|         case GGML_OP_RESHAPE: | ||||
|         case GGML_OP_VIEW: | ||||
|         case GGML_OP_PERMUTE: | ||||
|         case GGML_OP_TRANSPOSE: | ||||
|             return true; | ||||
|  | ||||
|         case GGML_OP_MUL_MAT: | ||||
|         { | ||||
|             // BLAS usually is only faster for large matrices | ||||
|             const struct ggml_tensor * src0 = op->src[0]; | ||||
|             const struct ggml_tensor * src1 = op->src[1]; | ||||
|  | ||||
|             const int64_t ne10 = src1->ne[0]; | ||||
|  | ||||
|             const int64_t ne0 = op->ne[0]; | ||||
|             const int64_t ne1 = op->ne[1]; | ||||
|  | ||||
|             // TODO: find the optimal value | ||||
|             const int64_t min_batch = 32; | ||||
|  | ||||
|             return ggml_is_contiguous(src0) && | ||||
|                    ggml_is_contiguous(src1) && | ||||
|                    src1->type == GGML_TYPE_F32 && | ||||
|                    (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && | ||||
|                    (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); | ||||
|         } | ||||
|  | ||||
|         case GGML_OP_OUT_PROD: | ||||
|             return op->src[0]->type == GGML_TYPE_F32 && | ||||
|                    op->src[1]->type == GGML_TYPE_F32 && | ||||
|                    ggml_is_matrix(src0) && | ||||
|                    ggml_is_matrix(src1) && | ||||
|                    ggml_is_contiguous(src0) && | ||||
|                    (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) && | ||||
|                    (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); | ||||
|  | ||||
|         default: | ||||
|             return false; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { | ||||
|     return ggml_backend_buft_is_host(buft); | ||||
|  | ||||
|     GGML_UNUSED(dev); | ||||
| } | ||||
|  | ||||
| static const struct ggml_backend_device_i ggml_backend_blas_device_i = { | ||||
|     /* .get_name             = */ ggml_backend_blas_device_get_name, | ||||
|     /* .get_description      = */ ggml_backend_blas_device_get_description, | ||||
|     /* .get_memory           = */ ggml_backend_blas_device_get_memory, | ||||
|     /* .get_type             = */ ggml_backend_blas_device_get_type, | ||||
|     /* .get_props            = */ ggml_backend_blas_device_get_props, | ||||
|     /* .init_backend         = */ ggml_backend_blas_device_init_backend, | ||||
|     /* .get_buffer_type      = */ ggml_backend_blas_device_get_buffer_type, | ||||
|     /* .get_host_buffer_type = */ NULL, | ||||
|     /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr, | ||||
|     /* .supports_op          = */ ggml_backend_blas_device_supports_op, | ||||
|     /* .supports_buft        = */ ggml_backend_blas_device_supports_buft, | ||||
|     /* .offload_op           = */ NULL, | ||||
|     /* .event_new            = */ NULL, | ||||
|     /* .event_free           = */ NULL, | ||||
|     /* .event_synchronize    = */ NULL, | ||||
| }; | ||||
|  | ||||
| // backend reg interface | ||||
|  | ||||
| static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) { | ||||
|     return "BLAS"; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
| } | ||||
|  | ||||
| static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) { | ||||
|     return 1; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
| } | ||||
|  | ||||
| static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) { | ||||
|     GGML_ASSERT(index == 0); | ||||
|  | ||||
|     static ggml_backend_device ggml_backend_blas_device = { | ||||
|         /* .iface   = */ ggml_backend_blas_device_i, | ||||
|         /* .reg     = */ reg, | ||||
|         /* .context = */ nullptr, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_blas_device; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
|     GGML_UNUSED(index); | ||||
| } | ||||
|  | ||||
| static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) { | ||||
|     if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) { | ||||
|         return (void *)ggml_backend_blas_set_n_threads; | ||||
|     } | ||||
|     return NULL; | ||||
|  | ||||
|     GGML_UNUSED(reg); | ||||
|     GGML_UNUSED(name); | ||||
| } | ||||
|  | ||||
| static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { | ||||
|     /* .get_name         = */ ggml_backend_blas_reg_get_name, | ||||
|     /* .get_device_count = */ ggml_backend_blas_reg_get_device_count, | ||||
|     /* .get_device       = */ ggml_backend_blas_reg_get_device, | ||||
|     /* .get_proc_address = */ ggml_backend_blas_get_proc_address, | ||||
| }; | ||||
|  | ||||
| ggml_backend_reg_t ggml_backend_blas_reg(void) { | ||||
|     static struct ggml_backend_reg ggml_backend_blas_reg = { | ||||
|         /* .api_version = */ GGML_BACKEND_API_VERSION, | ||||
|         /* .iface       = */ ggml_backend_blas_reg_i, | ||||
|         /* .context     = */ NULL, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_blas_reg; | ||||
| } | ||||
|  | ||||
| GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) | ||||
							
								
								
									
										76
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME}) | ||||
|     set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) | ||||
|     message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") | ||||
| endif() | ||||
|  | ||||
| # Auto-detech Soc type and Soc version, if detect failed, will abort build | ||||
| set(SOC_VERSION "") | ||||
| function(detect_ascend_soc_type SOC_VERSION) | ||||
|     execute_process( | ||||
|         COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'" | ||||
|         OUTPUT_VARIABLE npu_info | ||||
|         RESULT_VARIABLE npu_result | ||||
|         OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|     ) | ||||
|     if("${npu_info}" STREQUAL "" OR ${npu_result}) | ||||
|         message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.") | ||||
|     endif() | ||||
|     set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE) | ||||
| endfunction() | ||||
|  | ||||
| if(NOT SOC_TYPE) | ||||
|     detect_ascend_soc_type(SOC_VERSION) | ||||
|     set(SOC_TYPE "${SOC_VERSION}") | ||||
|     message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}") | ||||
| endif() | ||||
|  | ||||
| string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower | ||||
|  | ||||
| # Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P. | ||||
| string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}") | ||||
| set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}") | ||||
| string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION) | ||||
|  | ||||
| if (CANN_INSTALL_DIR) | ||||
|     # Only Support Linux. | ||||
|     if (NOT UNIX) | ||||
|         message(FATAL_ERROR "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}") | ||||
|     endif() | ||||
|  | ||||
|     # Supported platforms: x86-64, arm64 | ||||
|     if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") | ||||
|     elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") | ||||
|     else() | ||||
|         message(FATAL_ERROR "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}") | ||||
|     endif() | ||||
|  | ||||
|     # Set header and libs | ||||
|     set(CANN_INCLUDE_DIRS | ||||
|         ${CANN_INSTALL_DIR}/include | ||||
|         ${CANN_INSTALL_DIR}/include/aclnn | ||||
|         ${CANN_INSTALL_DIR}/acllib/include | ||||
|     ) | ||||
|  | ||||
|     add_subdirectory(kernels) | ||||
|     list(APPEND CANN_LIBRARIES | ||||
|         ascendcl | ||||
|         nnopbase | ||||
|         opapi | ||||
|         acl_op_compiler | ||||
|         ascendc_kernels | ||||
|     ) | ||||
|  | ||||
|     file(GLOB GGML_SOURCES_CANN "*.cpp") | ||||
|  | ||||
|     ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN}) | ||||
|     target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES}) | ||||
|     target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS}) | ||||
|     target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64) | ||||
|  | ||||
|     target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}") | ||||
|  | ||||
|     message(STATUS "CANN: CANN_INCLUDE_DIRS =  ${CANN_INCLUDE_DIRS}") | ||||
|     message(STATUS "CANN: CANN_LIBRARIES =  ${CANN_LIBRARIES}") | ||||
| else() | ||||
|     message(FATAL_ERROR "CANN: Can't find CANN_INSTALL_DIR, did you forget to source set_var.sh?") | ||||
| endif() | ||||
							
								
								
									
										2579
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2579
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										175
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,175 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023-2024 The ggml authors | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
|  | ||||
| #include "acl_tensor.h" | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
|  | ||||
| aclDataType ggml_cann_type_mapping(ggml_type type) { | ||||
|     switch (type) { | ||||
|         case GGML_TYPE_F32: | ||||
|             return ACL_FLOAT; | ||||
|         case GGML_TYPE_F16: | ||||
|             return ACL_FLOAT16; | ||||
|         case GGML_TYPE_I8: | ||||
|             return ACL_INT8; | ||||
|         case GGML_TYPE_I16: | ||||
|             return ACL_INT16; | ||||
|         case GGML_TYPE_I32: | ||||
|             return ACL_INT32; | ||||
|         case GGML_TYPE_Q4_0: | ||||
|             return ACL_INT4; | ||||
|         case GGML_TYPE_Q8_0: | ||||
|             return ACL_INT8; | ||||
|         default: | ||||
|             return ACL_DT_UNDEFINED; | ||||
|     } | ||||
|     return ACL_DT_UNDEFINED; | ||||
| } | ||||
|  | ||||
| aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, | ||||
|                                    size_t* nb, int64_t dims, aclFormat format, | ||||
|                                    size_t offset) { | ||||
|     // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be | ||||
|     // added. | ||||
|     int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2]; | ||||
|  | ||||
|     int64_t acl_storage_len = 0; | ||||
|     if (ne == nullptr) { | ||||
|         acl_storage_len = ggml_nbytes(tensor); | ||||
|         for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|             acl_ne[i] = tensor->ne[i]; | ||||
|             // The step size of acl is in elements. | ||||
|             acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor); | ||||
|         } | ||||
|     } else { | ||||
|         // With bcast | ||||
|         for (int i = 0; i < dims; i++) { | ||||
|             acl_storage_len += (ne[i] - 1) * nb[i]; | ||||
|             acl_ne[i] = ne[i]; | ||||
|             acl_stride[i] = nb[i] / ggml_element_size(tensor); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Reverse ne and stride. | ||||
|     int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims); | ||||
|     std::reverse(acl_ne, acl_ne + final_dims); | ||||
|     std::reverse(acl_stride, acl_stride + final_dims); | ||||
|  | ||||
|     aclTensor* acl_tensor = aclCreateTensor( | ||||
|         acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, | ||||
|         offset / ggml_element_size(tensor), format, &acl_storage_len, 1, | ||||
|         tensor->data); | ||||
|  | ||||
|     return acl_tensor; | ||||
| } | ||||
|  | ||||
| bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) { | ||||
|     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|         if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) { | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, | ||||
|                                   const ggml_tensor* src1, | ||||
|                                   int64_t* bcast_src0_ne, | ||||
|                                   int64_t* bcast_src1_ne, size_t* bcast_src0_nb, | ||||
|                                   size_t* bcast_src1_nb) { | ||||
|     GGML_ASSERT(ggml_can_repeat(src1, src0)); | ||||
|     int bcast_dim_cnt = 0; | ||||
|     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|         int64_t nr = src0->ne[i] / src1->ne[i]; | ||||
|         bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr; | ||||
|         bcast_src1_ne[bcast_dim_cnt] = src1->ne[i]; | ||||
|         bcast_src0_nb[bcast_dim_cnt] = src0->nb[i]; | ||||
|         bcast_src1_nb[bcast_dim_cnt] = src1->nb[i]; | ||||
|         bcast_dim_cnt++; | ||||
|         if (nr != 1) { | ||||
|             // Need to add an extra dim. | ||||
|             bcast_src0_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_src1_ne[bcast_dim_cnt] = 1; | ||||
|             bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * | ||||
|                                            bcast_src0_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * | ||||
|                                            bcast_src1_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dim_cnt++; | ||||
|         } | ||||
|     } | ||||
|     return bcast_dim_cnt; | ||||
| } | ||||
|  | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape( | ||||
|     const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, | ||||
|     const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, | ||||
|     int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, | ||||
|     size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) { | ||||
|     // input and dst shoule in same shape, except first two dims. | ||||
|     GGML_ASSERT(input_ne[2] == dst_ne[2]); | ||||
|     GGML_ASSERT(input_ne[3] == dst_ne[3]); | ||||
|  | ||||
|     int bcast_dim_cnt = 0; | ||||
|  | ||||
|     // For mul_mat, a dimension needs to be added before the dimension that | ||||
|     // weight needs to be expanded to satisfy the bcast rule of matrix | ||||
|     // multiplication. | ||||
|     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|         int64_t nr = input_ne[i] / weight_ne[i]; | ||||
|         // Do not use bcast in the first two dimensions because we only support | ||||
|         // the bcast batch dimension. Just copy them. | ||||
|         if (i < 2 || nr == 1) { | ||||
|             bcast_input_ne[bcast_dim_cnt] = input_ne[i]; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = dst_ne[i]; | ||||
|  | ||||
|             bcast_input_nb[bcast_dim_cnt] = input_nb[i]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; | ||||
|             bcast_dim_cnt++; | ||||
|         } else { | ||||
|             // Need to add an extra dim. | ||||
|             bcast_input_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = 1; | ||||
|             bcast_input_nb[bcast_dim_cnt] = input_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; | ||||
|             bcast_dim_cnt++; | ||||
|  | ||||
|             bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; | ||||
|             bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * | ||||
|                                             bcast_input_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * | ||||
|                                           bcast_dst_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = | ||||
|                 bcast_weight_nb[bcast_dim_cnt - 1] * | ||||
|                 bcast_weight_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dim_cnt++; | ||||
|         } | ||||
|     } | ||||
|     return bcast_dim_cnt; | ||||
| } | ||||
							
								
								
									
										258
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,258 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023-2024 The ggml authors | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
|  | ||||
| #ifndef CANN_ACL_TENSOR_H | ||||
| #define CANN_ACL_TENSOR_H | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
|  | ||||
| #include <aclnn/aclnn_base.h> | ||||
| #include "common.h" | ||||
|  | ||||
| /** | ||||
|  * @brief	Maps a ggml_type to its corresponding aclDataType. | ||||
|  * | ||||
|  * @details	This function takes a ggml_type as input and returns the corresponding | ||||
|  *			aclDataType. It supports mapping for various ggml_types. If the input type | ||||
|  *			does not match any of the predefined ggml_types, the function returns | ||||
|  *          ACL_DT_UNDEFINED. | ||||
|  * | ||||
|  * @param	type    The ggml_type to be mapped. | ||||
|  * @return	The corresponding aclDataType. If the input type is not recognized, | ||||
|  *			ACL_DT_UNDEFINED is returned. | ||||
|  */ | ||||
| aclDataType ggml_cann_type_mapping(ggml_type type); | ||||
|  | ||||
| /** | ||||
|  * @brief   Creates an ACL tensor from a ggml_tensor with optional shape. | ||||
|  * | ||||
|  * @details This function creates an ACL tensor based on the properties of the | ||||
|  *          provided ggml_tensor. It supports customer shape by adjusting dimensions | ||||
|  *          and strides accordingly. If customer shape is applied, additional | ||||
|  *          dimensions and strides are calculated based on the provided parameters. | ||||
|  * | ||||
|  * @param   tensor      Pointer to the ggml_tensor to be converted to ACL tensor. | ||||
|  * @param   ne          Pointer to an array containing dimensions. Defaults to nullptr | ||||
|  *                      if no customer shape is applied. | ||||
|  * @param   nb          Pointer to an array containing strides. Defaults to nullptr | ||||
|  *                      if no customer shape is applied. | ||||
|  * @param   dims        Number of dimensions in the tensor. Defaults to 0 if no customer | ||||
|  *                      shape is applied. | ||||
|  * @param   format      ACL tensor format. Defaults to ACL_FORMAT_ND. | ||||
|  * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0. | ||||
|  * @return  Pointer to the created ACL tensor. | ||||
|  */ | ||||
| aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr, | ||||
|                              size_t* nb = nullptr, int64_t dims = 0, | ||||
|                              aclFormat format = ACL_FORMAT_ND, | ||||
|                              size_t offset = 0); | ||||
|  | ||||
| /** | ||||
|  * @brief   Template for creating an ACL tensor from provided parameters. typename TYPE | ||||
|  *          should be size_t or float. | ||||
|  * | ||||
|  * @details This function creates an ACL tensor using the provided data pointer, | ||||
|  *          data type, dimensions, strides, format, offset, and additional parameters. | ||||
|  *          It calculates necessary dimensions and strides based on the provided ne and nb | ||||
|  *          arrays, adjusting them for the ACL tensor creation. The ACL storage length | ||||
|  *          is also calculated based on the provided dimensions and strides. | ||||
|  * | ||||
|  * @param   data_ptr    Pointer to the data buffer for the ACL tensor. | ||||
|  * @param   dtype       ACL data type of the tensor. | ||||
|  * @param   type_size   Size of each element in the tensor data buffer. | ||||
|  * @param   ne          Pointer to an array containing tensor dimensions. | ||||
|  * @param   nb          Pointer to an array containing tensor strides. | ||||
|  * @param   dims        Number of dimensions of the tensor. | ||||
|  * @param   format      ACL tensor format. Defaults to ACL_FORMAT_ND. | ||||
|  * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0. | ||||
|  * @return  Pointer to the created ACL tensor. | ||||
|  */ | ||||
| template<typename TYPE> | ||||
| aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, | ||||
|                                    TYPE type_size, int64_t* ne, TYPE* nb, | ||||
|                                    int64_t dims, | ||||
|                                    aclFormat format = ACL_FORMAT_ND, | ||||
|                                    size_t offset = 0) { | ||||
|     int64_t tmp_ne[GGML_MAX_DIMS * 2]; | ||||
|     int64_t tmp_stride[GGML_MAX_DIMS * 2]; | ||||
|  | ||||
|     memcpy(tmp_ne, ne, dims * sizeof(int64_t)); | ||||
|     for (int i = 0; i < dims; i++) { | ||||
|         tmp_stride[i] = nb[i] / type_size; | ||||
|     } | ||||
|  | ||||
|     std::reverse(tmp_ne, tmp_ne + dims); | ||||
|     std::reverse(tmp_stride, tmp_stride + dims); | ||||
|  | ||||
|     int64_t acl_storage_len = 0; | ||||
|     for (int i = 0; i < dims; i++) { | ||||
|         acl_storage_len += (ne[i] - 1) * nb[i]; | ||||
|     } | ||||
|  | ||||
|     aclTensor* acl_tensor = | ||||
|         aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, | ||||
|                         format, &acl_storage_len, 1, data_ptr); | ||||
|  | ||||
|     return acl_tensor; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * @brief   Checks if tensors require broadcasting based on their shapes. | ||||
|  * | ||||
|  * @details This function determines if two ggml_tensors need to be broadcasted for | ||||
|  *          element-wise operations. Broadcasting is necessary if the shapes of the | ||||
|  *          tensors are not identical and no dimension in either tensor equals 1. | ||||
|  * | ||||
|  * @param   t0      Pointer to the first ggml_tensor. | ||||
|  * @param   t1      Pointer to the second ggml_tensor. | ||||
|  * @return  True if broadcasting is needed, False otherwise. | ||||
|  * | ||||
|  * @remarks This function iterates over the dimensions of t0 and t1. It checks if each | ||||
|  *          dimension in t1 differs from t0's corresponding dimension and is not equal | ||||
|  *          to 1. If such a dimension is found, broadcasting is required to align t1 | ||||
|  *          with t0 for element-wise operations. | ||||
|  */ | ||||
| bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes broadcast shapes and strides for two ggml_tensors. | ||||
|  * | ||||
|  * @details This function calculates the broadcast shapes and strides for two ggml_tensors, | ||||
|  *          following the broadcasting rules similar to numpy. It adjusts dimensions and | ||||
|  *          strides to ensure compatibility for element-wise operations where one tensor | ||||
|  *          can be broadcasted to match the shape of another tensor. | ||||
|  * | ||||
|  * @param   src0                Pointer to the first ggml_tensor. | ||||
|  * @param   src1                Pointer to the second ggml_tensor. | ||||
|  * @param   bcast_ne_src0       Output array to store broadcasted dimensions for src0. | ||||
|  * @param   bcast_ne_src1       Output array to store broadcasted dimensions for src1. | ||||
|  * @param   bcast_nb_src0       Output array to store broadcasted strides for src0. | ||||
|  * @param   bcast_nb_src1       Output array to store broadcasted strides for src1. | ||||
|  * @return  Number of dimensions in the broadcasted shape. | ||||
|  * | ||||
|  * @pre     ggml_can_repeat(src1, src0) must return true, indicating src1 can be broadcasted | ||||
|  *          to match src0. | ||||
|  * | ||||
|  * @remarks This function iterates over the dimensions of src0 and src1, calculating the | ||||
|  *          necessary broadcast dimensions and strides. If a dimension requires broadcasting | ||||
|  *          (i.e., its size in src1 is smaller than in src0), an additional dimension is | ||||
|  *          added with size calculated to match src0's dimension. This adjustment ensures | ||||
|  *          that src1 can be element-wise broadcasted to src0's shape. | ||||
|  * | ||||
|  *  How it works: | ||||
|  * | ||||
|  *  if dim0 has padding. | ||||
|  *  a -> (2, 2) padding = 2 | ||||
|  *   a: [[1, 2, *, *] | ||||
|  *       [2, 3, *, *]] | ||||
|  *  nb = (8, 4, 2) | ||||
|  * | ||||
|  *  if a should bcast with b -> (2, 4) | ||||
|  *  b' -> (2, 2, 2) | ||||
|  *  b : [[1, 2, 3, 4, *, *] | ||||
|  *       [5, 6, 7, 8, *, *]] | ||||
|  *  nb = (12, 6, 1) | ||||
|  * | ||||
|  *  after bcast: | ||||
|  *  a' -> (2, 1, 2) | ||||
|  *  a': [[[1, 2], *, *] | ||||
|  *       [[2, 3], *, *]] | ||||
|  *  nb = (8, 4, 2, 1) | ||||
|  * | ||||
|  *  b' : [[[1, 2], [3, 4], *, *] | ||||
|  *        [[5, 6], [7, 8], *, *]] | ||||
|  *  nb = (12, 6, 2, 1) | ||||
|  *  \endcode | ||||
|  * | ||||
|  *  dim1 in a inserted dim, should add nb for dim1, | ||||
|  *  and all other nb moves to next in order. | ||||
|  */ | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1, | ||||
|                         int64_t* bcast_ne_src0, int64_t* bcast_ne_src1, | ||||
|                         size_t* bcast_nb_src0, size_t* bcast_nb_src1); | ||||
|  | ||||
| // Bcast macro to avoid duplicate code. | ||||
| #define BCAST_SHAPE(src0, src1)                                              \ | ||||
|     int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2];                            \ | ||||
|     int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2];                            \ | ||||
|     size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2];                             \ | ||||
|     size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2];                             \ | ||||
|     int64_t bcast_dims = ggml_cann_get_bcast_shape(                          \ | ||||
|         src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \ | ||||
|         bcast_##src1##_nb); | ||||
|  | ||||
| #define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims | ||||
|  | ||||
| /** | ||||
|  * @brief Calculates broadcast shapes for matrix multiplication. | ||||
|  * | ||||
|  * @details This function computes the broadcast shapes required for matrix multiplication | ||||
|  *          based on the input, weight, and destination tensor shapes. It ensures that the | ||||
|  *          dimensions of weight tensors are expanded appropriately to satisfy matrix | ||||
|  *          multiplication broadcast rules. | ||||
|  * | ||||
|  * @param input_ne      Array containing the dimensions of the input tensor. | ||||
|  * @param weight_ne     Array containing the dimensions of the weight tensor. | ||||
|  * @param dst_ne        Array containing the dimensions of the destination tensor. | ||||
|  * @param input_nb      Array containing the strides of the input tensor. | ||||
|  * @param weight_nb     Array containing the strides of the weight tensor. | ||||
|  * @param dst_nb        Array containing the strides of the destination tensor. | ||||
|  * @param bcast_input_ne    Output array for broadcasted input tensor dimensions. | ||||
|  * @param bcast_weight_ne   Output array for broadcasted weight tensor dimensions. | ||||
|  * @param bcast_dst_ne      Output array for broadcasted destination tensor dimensions. | ||||
|  * @param bcast_input_nb    Output array for broadcasted input tensor strides. | ||||
|  * @param bcast_weight_nb   Output array for broadcasted weight tensor strides. | ||||
|  * @param bcast_dst_nb      Output array for broadcasted destination tensor strides. | ||||
|  * @return The number of dimensions in the broadcasted tensors. | ||||
|  * | ||||
|  * @remarks This function iterates over the tensor dimensions and calculates the broadcast | ||||
|  *          shapes needed for matrix multiplication. It ensures that dimensions where | ||||
|  *          weight tensor requires expansion are appropriately handled to conform with | ||||
|  *          broadcasting rules. | ||||
|  * @note compare with ggml_cann_get_bcast_shape, mul_mat broadcast need add this new dim | ||||
|  *       before cast dim. | ||||
|  * @sa ggml_cann_get_bcast_shape | ||||
|  */ | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape( | ||||
|     const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, | ||||
|     const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, | ||||
|     int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, | ||||
|     size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb); | ||||
|  | ||||
| // Bcast macro to avoid duplicate code. | ||||
| #define BCAST_MUL_MAT_SHAPE(input, weight, dst)                         \ | ||||
|     int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2];                      \ | ||||
|     int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2];                     \ | ||||
|     int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2];                        \ | ||||
|     size_t bcast_##input##_nb[GGML_MAX_DIMS * 2];                       \ | ||||
|     size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2];                      \ | ||||
|     size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2];                         \ | ||||
|     int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape(              \ | ||||
|         input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \ | ||||
|         bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne,      \ | ||||
|         bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb); | ||||
|  | ||||
| #define BCAST_MUL_MAT_PARAM(tensor) \ | ||||
|     bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims | ||||
|  | ||||
| #endif  // CANN_ACL_TENSOR_H | ||||
							
								
								
									
										3427
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3427
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										592
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										592
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,592 @@ | ||||
| #ifndef CANN_ACLNN_OPS | ||||
| #define CANN_ACLNN_OPS | ||||
|  | ||||
| /** | ||||
|  * @file    acl_tensor | ||||
|  * @brief   This file contains related functions of ggml_tensor and acl_tensor. | ||||
|  *          Contains conversion from ggml_tensor to acl_tensor, broadcast and other | ||||
|  *          functions. | ||||
|  * @author  hipudding <huafengchun@gmail.com> | ||||
|  * @author  wangshuai09 <391746016@qq.com> | ||||
|  * @date    July 15, 2024 | ||||
|  * | ||||
|  * Copyright (c) 2023-2024 The ggml authors | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
|  | ||||
| #include <aclnnop/aclnn_add.h> | ||||
| #include <aclnnop/aclnn_arange.h> | ||||
| #include <aclnnop/aclnn_argsort.h> | ||||
| #include <aclnnop/aclnn_cat.h> | ||||
| #include <aclnnop/aclnn_clamp.h> | ||||
| #include <aclnnop/aclnn_div.h> | ||||
| #include <aclnnop/aclnn_gelu.h> | ||||
| #include <aclnnop/aclnn_hardsigmoid.h> | ||||
| #include <aclnnop/aclnn_hardswish.h> | ||||
| #include <aclnnop/aclnn_leaky_relu.h> | ||||
| #include <aclnnop/aclnn_mul.h> | ||||
| #include <aclnnop/aclnn_relu.h> | ||||
| #include <aclnnop/aclnn_silu.h> | ||||
| #include <aclnnop/aclnn_tanh.h> | ||||
| #include "acl_tensor.h" | ||||
| #include "common.h" | ||||
|  | ||||
| /** | ||||
|  * @brief   Repeats a ggml tensor along each dimension to match the dimensions | ||||
|  *          of another tensor. | ||||
|  * | ||||
|  * @details This function repeats the elements of a source ggml tensor along | ||||
|  *          each dimension to create a destination tensor with the specified | ||||
|  *          dimensions. The operation is performed using the ACL backend and | ||||
|  *          executed asynchronously on the device. | ||||
|  * | ||||
|  * @param   ctx The CANN context used for operations. | ||||
|  * @param   dst The ggml tensor representing the destination, which op is | ||||
|  *              GGML_OP_REPEAT and specifies the desired dimensions. | ||||
|  */ | ||||
| void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Adds two ggml tensors using the CANN backend. | ||||
|  * | ||||
|  * @details This function performs an element-wise addition of two tensors. In | ||||
|  *          case the tensors do not have the same shape, one or both tensors | ||||
|  *          will be broadcasted to match the shape of the other before the | ||||
|  *          addition is performed.The formula for the operation is given by: | ||||
|  *          \f[ | ||||
|  *              \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1} | ||||
|  *          \f] | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The ggml tensor representing the destination, result of the | ||||
|  *            addition is stored at dst->data, and dst->op is `GGML_OP_ADD` | ||||
|  */ | ||||
| void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies the Leaky ReLU activation function to a tensor using the CANN | ||||
|  *          backend. | ||||
|  * | ||||
|  * @details This function computes the Leaky ReLU activation for each element of | ||||
|  *          the input tensor. The Leaky ReLU function allows a small gradient | ||||
|  *          when the unit is not active (i.e., when the input is negative). The | ||||
|  *          Leaky ReLU function is defined as: | ||||
|  *          \f[ | ||||
|  *              \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0, | ||||
|  *               src) | ||||
|  *          \f] | ||||
|  *          `negativeSlope` is in dst->params. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the result of the Leaky ReLU | ||||
|  *            activation is stored, which op is `GGML_OP_LEAKY_RELU` | ||||
|  */ | ||||
| void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief    Concatenates multiple tensors along a specified dimension using the | ||||
|  *           CANN backend. | ||||
|  * | ||||
|  * @param ctx        The CANN context used for operations. | ||||
|  * @param tensorList A pointer to the list of tensors to be concatenated. | ||||
|  * @param dst        The destination tensor where the result of the | ||||
|  *                   concatenation is stored. dst->op is `GGML_OP_CONCAT`. | ||||
|  * @param concat_dim The dimension along which the tensors are concatenated. | ||||
|  * | ||||
|  * @attention tensorList length should be 2 and the dimension using for concat | ||||
|  *            default to 1. | ||||
|  */ | ||||
| void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Generates a sequence of evenly spaced values within a specified | ||||
|  *          interval for a ggml tensor using the CANN backend. | ||||
|  * | ||||
|  * @details This function creates a sequence of numbers over a specified i | ||||
|  *          nterval, starting from `start`, ending before `stop`, and | ||||
|  *          incrementing by `step`. The sequence is stored in the destination | ||||
|  *          tensor `dst`. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the generated sequence will be stored. | ||||
|  *            `start`, 'stop' and 'step' are in dst->op_params and dst->op is | ||||
|  *            `GGML_OP_ARANGE`. | ||||
|  */ | ||||
| void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the square of the elements of a ggml tensor using the CANN | ||||
|  *          backend. | ||||
|  * @details The function sets the second source tensor of the destination | ||||
|  *          tensor `dst` to be equal to the first source tensor. This is | ||||
|  *          effectively squaring the elements since the multiplication becomes | ||||
|  *          `element * element`. | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the squared values will be stored, | ||||
|  *            which dst->op is `GGML_OP_SQR`. | ||||
|  */ | ||||
| void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies a clamp operation to the elements of a ggml tensor using the | ||||
|  *          CANN backend. | ||||
|  * | ||||
|  * @details This function clamps the elements of the input tensor `src` to a | ||||
|  *          specified range defined by `min` and `max` values. The result is | ||||
|  *          stored in the destination tensor `dst`. The operation is defined as: | ||||
|  *          \f[ | ||||
|  *              y = \max(\min(x, max\_value), min\_value) | ||||
|  *           \f] | ||||
|  *          where `x` is an element of the input tensor, and `y` is the | ||||
|  *          corresponding element in the output tensor. | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the clamped values will be stored. | ||||
|  *            dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params. | ||||
|  */ | ||||
| void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Scales the elements of a ggml tensor by a constant factor using the | ||||
|  *          CANN backend. | ||||
|  * | ||||
|  * @details This function multiplies each element of the input tensor `src` by | ||||
|  *          a scaling factor `scale`, storing the result in the destination | ||||
|  *          tensor `dst`. The operation is defined as: | ||||
|  *          \f[ | ||||
|  *             dst = src \times scale | ||||
|  *          \f] | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the scaled values will be stored. | ||||
|  *            dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params. | ||||
|  */ | ||||
| void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Sorts the elements of a ggml tensor and returns the indices that | ||||
|  *          would sort the tensor using the CANN backend. | ||||
|  * | ||||
|  * @details This function performs an argsort operation on the input tensor | ||||
|  *          `src`. It sorts the elements of `src` in either ascending or | ||||
|  *          descending order, depending on the `GGML_SORT_ORDER_DESC`, | ||||
|  *          and returns the indices that would sort the original tensor. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the sorted indices will be stored. | ||||
|  *            dst->op is `GGML_OP_ARGSORT`. | ||||
|  */ | ||||
| void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the Layer Normalization for a ggml tensor using the CANN | ||||
|  *          backend. | ||||
|  * | ||||
|  * @details This function applies the Layer Normalization operation on the | ||||
|  *          input tensor `src` and stores the result in the destination tensor | ||||
|  *          `dst`. Layer Normalization normalizes the features at each sample in | ||||
|  *          a mini-batch independently. It is commonly used in neural networks | ||||
|  *          to normalize the activations of a layer by adjusting and scaling | ||||
|  *          the outputs. | ||||
|  *          The operation is defined as: | ||||
|  *          \f[ | ||||
|  *              \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}} | ||||
|  *          \f] | ||||
|  *          `Var` defaults dst->ne[0]. `eps` is in dst->params. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the normalized values will be stored. | ||||
|  * @attention `Var` defaults to dst->ne[0]. | ||||
|  */ | ||||
| void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief  Computes the Group Normalization for a ggml tensor using the CANN | ||||
|  *         backend. | ||||
|  * | ||||
|  * @brief  This function applies the Group Normalization operation on the input | ||||
|  *         tensor `src` and stores the result in the destination tensor `dst`. | ||||
|  *         Group Normalization divides the channels into groups and normalizes | ||||
|  *         the features within each group across spatial locations. | ||||
|  *         It is commonly used in convolutional neural networks to improve | ||||
|  *         training stability and performance. | ||||
|  *         The operation is defined as: | ||||
|  *         \f[ | ||||
|  *             \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}} | ||||
|  *         \f] | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the normalized values will be stored. | ||||
|  *            `n_groups` is in dst->params, which split C channel to `n_groups`. | ||||
|  *            dst->op is `GGML_OP_GROUP_NORM`. | ||||
|  * | ||||
|  * @attention eps defaults to 1e-6f. | ||||
|  */ | ||||
| void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the accumulation of tensors using the CANN backend. | ||||
|  * | ||||
|  * @details This function performs an accumulation operation on two tensors. | ||||
|  *          Depending on the `inplace` flag, it either updates the destination | ||||
|  *          tensor `dst` in place by adding `alpha * src1` to it, or it creates | ||||
|  *          a new tensor as the result of `src0 + alpha * src1` and stores it in | ||||
|  *          `dst`. | ||||
|  *          The operation is defined as: | ||||
|  *          \f[ | ||||
|  *               dst = src0 + alpha \times src1 | ||||
|  *          \f] | ||||
|  *          if `inplace` is `true`, `src0` is equal to 'dst'. | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the accumulated values will be stored. | ||||
|  *            `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`. | ||||
|  */ | ||||
| void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the sum of elements along the last dimension of a ggml tensor | ||||
|  *          using the CANN backend. | ||||
|  * | ||||
|  * @details This function performs a reduction sum operation along the last | ||||
|  *          dimension of the input tensor `src`. The result of the sum is stored | ||||
|  *          in the destination tensor `dst`. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the reduced values will be stored。 | ||||
|  *            dst->op is `GGML_OP_SUM_ROWS`. | ||||
|  * | ||||
|  * @attention `reduce_dims` defaults to 3, which means the last dimension. | ||||
|  */ | ||||
| void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Upsamples a ggml tensor using nearest neighbor interpolation using | ||||
|  *          the CANN backend. | ||||
|  * | ||||
|  * @details This function performs upsampling of the input tensor `src` using | ||||
|  *          nearest neighbor interpolation. The upsampling is applied to the | ||||
|  *          height and width dimensions (last two dimensions) of the tensor. The | ||||
|  *          result is stored in the destination tensor `dst`, which must have | ||||
|  *          the appropriate dimensions for the upsampled output. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the upsampled values will be stored. | ||||
|  *            dst->op is `GGML_OP_UPSCALE`. | ||||
|  */ | ||||
| void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, | ||||
|                                   ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Pads a ggml tensor to match the dimensions of the destination tensor | ||||
|  *          using the CANN backend. | ||||
|  * | ||||
|  * @details This function pads the input tensor `src` so that it matches the | ||||
|  *          dimensions of the destination tensor `dst`. The amount of padding | ||||
|  *          is calculated based on the difference in sizes between `src` and | ||||
|  *          `dst` along each dimension. The padded tensor is stored in `dst`. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor, which specifies the target dimensions for | ||||
|  *            padding. dst->op is `GGML_OP_PAD`. | ||||
|  */ | ||||
| void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Executes a 2D pooling operation on a ggml tensor using the CANN | ||||
|  *          backend. | ||||
|  * | ||||
|  * @details This function dispatches the execution of a 2D pooling operation on | ||||
|  *          the input tensor `dst`. The type of pooling (average or max) is | ||||
|  *          determined by the `op` parameter, which is read from the operation | ||||
|  *          parameters of `dst`. The function supports average pooling | ||||
|  *          (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an | ||||
|  *          invalid operation is encountered, the function asserts a failure. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor on which the pooling operation is to be | ||||
|  *            performed. dst->op is `GGML_OP_POOL_2D`. | ||||
|  */ | ||||
| void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Duplicates a ggml tensor using the CANN backend. | ||||
|  * | ||||
|  * @details This function duplicates the contents of the source tensor `src` to | ||||
|  *          the destination tensor `dst`. The function supports various tensor | ||||
|  *          types and configurations, including handling of extra data, type | ||||
|  *          conversions, and special cases for contiguous and non-contiguous | ||||
|  *          tensors. | ||||
|  * | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the duplicated data will be stored. | ||||
|  *            dst->op is `GGML_OP_DUP` | ||||
|  * | ||||
|  * @attention Only support Fp16/FP32. Not support when src and dst have | ||||
|  *            different shape and dst is no-contiguous. | ||||
|  * @note:     This func need to simplify. | ||||
|  */ | ||||
| void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the Root Mean Square (RMS) normalization of a ggml tensor | ||||
|  *          using the CANN backend. | ||||
|  * | ||||
|  * @details This function applies RMS normalization to the input tensor `src` | ||||
|  *          and stores the result in the destination tensor `dst`. RMS | ||||
|  *          normalization involves computing the root mean square of the input | ||||
|  *          tensor along a specified dimension and then dividing each element of | ||||
|  *          the tensor by this value, adjusted by a small epsilon value to | ||||
|  *          prevent division by zero. | ||||
|  *          The operation is defined as: | ||||
|  *          \f[ | ||||
|  *               \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i, | ||||
|  *               \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s} | ||||
|  *          \f] | ||||
|  *          `eps` is in dst->op_params. | ||||
|  * @param ctx The CANN context used for operations. | ||||
|  * @param dst The destination tensor where the normalized values will be stored. | ||||
|  *            dst->op is `GGML_OP_RMS_NORM`. | ||||
|  */ | ||||
| void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies a diagonal mask to the tensor with a specified value. | ||||
|  * | ||||
|  * @details This function creates a mask tensor filled with ones, then applies | ||||
|  *          an upper triangular and lower triangular operation to it based on | ||||
|  *          the number of past elements specified. Afterward, it adds the masked | ||||
|  *          tensor to the destination tensor in-place. | ||||
|  * | ||||
|  * @param ctx The backend CANN context used for operations. | ||||
|  * @param dst The destination tensor where the result will be stored. dst->op is | ||||
|  *            `GGML_OP_DIAG_MASK` | ||||
|  * @param value The value to use for masking. | ||||
|  */ | ||||
| void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value); | ||||
|  | ||||
| /** | ||||
|  * @brief   Performs an image-to-column transformation on the input tensor. | ||||
|  * | ||||
|  * @details This function takes an input tensor and applies an image-to-column | ||||
|  *          operation, converting spatial dimensions into column-like | ||||
|  *          structures suitable for convolutional operations. It supports both | ||||
|  *          half-precision (F16) and single-precision (F32) floating-point data | ||||
|  *          types. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor that stores the result of the operation. | ||||
|  *            dst->op is `GGML_OP_IM2COL`. | ||||
|  */ | ||||
| void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes time step embeddings using sine and cosine functions. | ||||
|  * | ||||
|  * @details This function calculates time step embeddings by applying sine and | ||||
|  *          cosine transformations to a given input tensor, which is typically | ||||
|  *          used in temporal models like diffusion models or transformers to | ||||
|  *          encode time information effectively. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the result of the embedding operation | ||||
|  *            will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`. | ||||
|  */ | ||||
| void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| // @see ggml_cann_dup. | ||||
| void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the softmax activation with optional masking. | ||||
|  * | ||||
|  * @details This function computes the softmax activation over the input tensor, | ||||
|  *          optionally applying a mask and scaling factor. It supports both FP16 | ||||
|  *          and FP32 data types and can handle masking by broadcasting the mask | ||||
|  *          across rows if necessary. | ||||
|  *          The function performs the following steps: | ||||
|  *          1. Multiplies the input tensor by a scale factor. | ||||
|  *          2. Optionally casts the mask tensor to FP32 if it is in FP16 format. | ||||
|  *          3. Broadcasts the mask tensor if its dimensions do not match the | ||||
|  *             input tensor's dimensions. | ||||
|  *          4. Adds the mask to the scaled input tensor. | ||||
|  *          5. Applies the softmax activation function along the specified | ||||
|  *             dimension. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the result will be stored. dst->op is | ||||
|  *            `GGML_OP_SOFTMAX`. | ||||
|  */ | ||||
| void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Extracts specific rows from a tensor based on indices. | ||||
|  * | ||||
|  * @details This function retrieves rows from a source tensor src0 according to | ||||
|  *          the indices provided in another tensor src1 and stores the result in | ||||
|  *          a destination tensor (\p dst). It supports different data types | ||||
|  *          including F32, F16, Q4_0, and Q8_0. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the extracted rows will be stored. | ||||
|  *            dst->op is `GGML_OP_GET_ROWS`. | ||||
|  */ | ||||
| void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Executes matrix multiplication for the given tensor. | ||||
|  * | ||||
|  * @details This function performs matrix multiplication on the source tensors | ||||
|  *          associated with the destination tensor. It supports matrix | ||||
|  *          multiplication F32, F16, and Q8_0. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor for storing the result of the matrix | ||||
|  *            multiplication. dst->op is `GGML_OP_MUL_MAT`. | ||||
|  */ | ||||
| void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor. | ||||
|  * | ||||
|  * @details This function implements the RoPE mechanism, which is a method to | ||||
|  *          encode positional information into sequence data, particularly | ||||
|  *          useful in transformer models. It supports both F32 and F16 data | ||||
|  *          types. | ||||
|  * | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the RoPE-transformed data will be | ||||
|  *            stored. dst->op is `GGML_OP_ROPE`. | ||||
|  * | ||||
|  * @note The function currently does not support cases where the n_dims is less | ||||
|  *       than the input tensor's first dimension. | ||||
|  * @note The function currently does not support cases where the freq_factors is | ||||
|  *       not NULL. | ||||
|  * @note The function currently does not support cases where the ext_factor is | ||||
|  *       not equal 0. | ||||
|  * @note The function currently does not support cases where the freq_scale is | ||||
|  *       not equal 1. | ||||
|  */ | ||||
| void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  | ||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*, | ||||
|                                        aclTensor*, uint64_t*, aclOpExecutor**), | ||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)> | ||||
| void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_tensor* src0 = dst->src[0]; | ||||
|     ggml_tensor* src1 = dst->src[1]; | ||||
|     GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | ||||
|  | ||||
|     aclTensor* acl_src0; | ||||
|     aclTensor* acl_src1; | ||||
|     aclTensor* acl_dst; | ||||
|  | ||||
|     // Need bcast | ||||
|     if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) { | ||||
|         BCAST_SHAPE(src0, src1) | ||||
|         acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0)); | ||||
|         acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1)); | ||||
|         acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0)); | ||||
|     } else { | ||||
|         acl_src0 = ggml_cann_create_tensor(src0); | ||||
|         acl_src1 = ggml_cann_create_tensor(src1); | ||||
|         acl_dst = ggml_cann_create_tensor(dst); | ||||
|     } | ||||
|  | ||||
|     uint64_t workspaceSize = 0; | ||||
|     aclOpExecutor* executor; | ||||
|     void* workspaceAddr = nullptr; | ||||
|  | ||||
|     ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize, | ||||
|                                &executor)); | ||||
|     if (workspaceSize > 0) { | ||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); | ||||
|         workspaceAddr = workspace_allocator.get(); | ||||
|     } | ||||
|  | ||||
|     aclrtStream main_stream = ctx.stream(); | ||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); | ||||
|  | ||||
|     ACL_CHECK(aclDestroyTensor(acl_src0)); | ||||
|     ACL_CHECK(aclDestroyTensor(acl_src1)); | ||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); | ||||
| } | ||||
|  | ||||
| // Activation functions template. | ||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*, | ||||
|                                        aclOpExecutor**), | ||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, | ||||
|                               const aclrtStream)> | ||||
| void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_tensor* src = dst->src[0]; | ||||
|  | ||||
|     GGML_ASSERT(src->type == GGML_TYPE_F32); | ||||
|     GGML_ASSERT(dst->type == GGML_TYPE_F32); | ||||
|  | ||||
|     aclTensor* acl_src = ggml_cann_create_tensor(src); | ||||
|     aclTensor* acl_dst = ggml_cann_create_tensor(dst); | ||||
|  | ||||
|     uint64_t workspaceSize = 0; | ||||
|     aclOpExecutor* executor; | ||||
|     void* workspaceAddr = nullptr; | ||||
|  | ||||
|     ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor)); | ||||
|     if (workspaceSize > 0) { | ||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); | ||||
|         workspaceAddr = workspace_allocator.get(); | ||||
|     } | ||||
|  | ||||
|     aclrtStream main_stream = ctx.stream(); | ||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); | ||||
|  | ||||
|     ACL_CHECK(aclDestroyTensor(acl_src)); | ||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); | ||||
| } | ||||
|  | ||||
| // Activation functions template for const aclTensors. | ||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*, | ||||
|                                        uint64_t*, aclOpExecutor**), | ||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, | ||||
|                               const aclrtStream)> | ||||
| void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_tensor* src = dst->src[0]; | ||||
|  | ||||
|     GGML_ASSERT(src->type == GGML_TYPE_F32); | ||||
|     GGML_ASSERT(dst->type == GGML_TYPE_F32); | ||||
|  | ||||
|     aclTensor* acl_src = ggml_cann_create_tensor(src); | ||||
|     aclTensor* acl_dst = ggml_cann_create_tensor(dst); | ||||
|  | ||||
|     uint64_t workspaceSize = 0; | ||||
|     aclOpExecutor* executor; | ||||
|     void* workspaceAddr = nullptr; | ||||
|  | ||||
|     ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor)); | ||||
|     if (workspaceSize > 0) { | ||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); | ||||
|         workspaceAddr = workspace_allocator.get(); | ||||
|     } | ||||
|  | ||||
|     aclrtStream main_stream = ctx.stream(); | ||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); | ||||
|  | ||||
|     ACL_CHECK(aclDestroyTensor(acl_src)); | ||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); | ||||
| } | ||||
|  | ||||
| #endif  // CANN_ACLNN_OPS | ||||
							
								
								
									
										286
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023-2024 The ggml authors | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
|  | ||||
| #ifndef CANN_COMMON_H | ||||
| #define CANN_COMMON_H | ||||
|  | ||||
| #include <acl/acl.h> | ||||
|  | ||||
| #include <cstdio> | ||||
| #include <iostream> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <vector> | ||||
|  | ||||
| #include "../include/ggml-cann.h" | ||||
| #include "../include/ggml.h" | ||||
|  | ||||
| #define MATRIX_ROW_PADDING 512 | ||||
| #define GGML_CANN_MAX_STREAMS 8 | ||||
|  | ||||
| /** | ||||
|  * @brief Handles CANN-related errors by printing an error message and | ||||
|  *        terminating the program. | ||||
|  * @param stmt The statement that caused the error. | ||||
|  * @param func The function in which the error occurred. | ||||
|  * @param file The file in which the error occurred. | ||||
|  * @param line The line number at which the error occurred. | ||||
|  * @param msg The error message. | ||||
|  */ | ||||
| [[noreturn]] void ggml_cann_error(const char* stmt, const char* func, | ||||
|                                   const char* file, int line, const char* msg); | ||||
|  | ||||
| /** | ||||
|  * @brief Checks the result of a CANN function call and invokes the error | ||||
|  *        handler if the call fails. | ||||
|  * @param stmt The CANN function call to check. | ||||
|  * @param success The success code that indicates the call was successful. | ||||
|  * @param error_fn The function to call to retrieve the error message. | ||||
|  */ | ||||
| #define ACL_CHECK_GEN(stmt, success, error_fn)                                \ | ||||
|     do {                                                                      \ | ||||
|         int err_code = (stmt);                                                \ | ||||
|         if (err_code != (success)) {                                          \ | ||||
|             ggml_cann_error(#stmt, __func__, __FILE__, __LINE__, error_fn()); \ | ||||
|         }                                                                     \ | ||||
|     } while (0); | ||||
|  | ||||
| #define ACL_CHECK(stmt) ACL_CHECK_GEN(stmt, 0, aclGetRecentErrMsg) | ||||
|  | ||||
| /** | ||||
|  * @brief Contains information about CANN devices. | ||||
|  */ | ||||
| struct ggml_cann_device_info { | ||||
|     /** | ||||
|      * @brief Number of CANN devices available. | ||||
|      */ | ||||
|     int32_t device_count; | ||||
|  | ||||
|     /** | ||||
|      * @brief Information about a single CANN device. | ||||
|      */ | ||||
|     struct cann_device_info { | ||||
|         int cc;                 /**< Compute capability.                   */ | ||||
|         size_t smpb;            /**< Maximum shared memory per block.      */ | ||||
|         bool vmm;               /**< Virtual memory support.               */ | ||||
|         size_t vmm_granularity; /**< Granularity of virtual memory.        */ | ||||
|         size_t total_vram;      /**< Total video RAM available on the device. */ | ||||
|     }; | ||||
|  | ||||
|     cann_device_info devices[GGML_CANN_MAX_DEVICES] = | ||||
|         {}; /**< Array of CANN device information. */ | ||||
| }; | ||||
|  | ||||
| const ggml_cann_device_info& ggml_cann_info(); | ||||
|  | ||||
| void ggml_cann_set_device(int32_t device); | ||||
| int32_t ggml_cann_get_device(); | ||||
|  | ||||
| /** | ||||
|  * @brief Abstract base class for memory pools used by CANN. | ||||
|  */ | ||||
| struct ggml_cann_pool { | ||||
|     /** | ||||
|      * @brief Virtual destructor for the memory pool. | ||||
|      */ | ||||
|     virtual ~ggml_cann_pool() = default; | ||||
|  | ||||
|     /** | ||||
|      * @brief Allocates memory from the pool. | ||||
|      * | ||||
|      * @param size         The size of the memory block to allocate. | ||||
|      * @param actual_size  Pointer to a variable where the actual allocated size | ||||
|      *                     will be stored. | ||||
|      * @return             Pointer to the allocated memory block. | ||||
|      */ | ||||
|     virtual void* alloc(size_t size, size_t* actual_size) = 0; | ||||
|  | ||||
|     /** | ||||
|      * @brief Frees a previously allocated memory block. | ||||
|      * | ||||
|      * @param ptr   Pointer to the memory block to free. | ||||
|      * @param size  Size of the memory block to free. | ||||
|      * @note Note that all CANN opertors are running async. Make sure memory is | ||||
|      *       still avaiable before this operator finished. | ||||
|      */ | ||||
|     virtual void free(void* ptr, size_t size) = 0; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief RAII wrapper for managing memory allocations from a CANN memory pool. | ||||
|  */ | ||||
| struct ggml_cann_pool_alloc { | ||||
|     ggml_cann_pool* pool = nullptr; /**< Pointer to the memory pool. */ | ||||
|     void* ptr = nullptr;    /**< Pointer to the allocated memory block. */ | ||||
|     size_t actual_size = 0; /**< Actual size of the allocated memory block. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Default constructor. | ||||
|      */ | ||||
|     ggml_cann_pool_alloc() = default; | ||||
|  | ||||
|     /** | ||||
|      * @brief Constructor that initializes the memory pool. | ||||
|      * @param pool Reference to the memory pool. | ||||
|      */ | ||||
|     explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {} | ||||
|  | ||||
|     /** | ||||
|      * @brief Constructor that initializes the memory pool and allocates memory. | ||||
|      * @param pool Reference to the memory pool. | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      */ | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) { | ||||
|         alloc(size); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Destructor that frees the allocated memory block. | ||||
|      */ | ||||
|     ~ggml_cann_pool_alloc() { | ||||
|         if (ptr != nullptr) { | ||||
|             pool->free(ptr, actual_size); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Allocates memory from the pool. | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* alloc(size_t size) { | ||||
|         GGML_ASSERT(pool != nullptr); | ||||
|         GGML_ASSERT(ptr == nullptr); | ||||
|         ptr = pool->alloc(size, &this->actual_size); | ||||
|         return ptr; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Allocates memory from a specific memory pool. | ||||
|      * @param pool Reference to the memory pool. | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* alloc(ggml_cann_pool& pool, size_t size) { | ||||
|         this->pool = &pool; | ||||
|         return alloc(size); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Gets the pointer to the allocated memory block. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* get() { return ptr; } | ||||
|  | ||||
|     // Deleted copy constructor | ||||
|     ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete; | ||||
|  | ||||
|     // Deleted move constructor | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete; | ||||
|  | ||||
|     // Deleted copy assignment operator | ||||
|     ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete; | ||||
|  | ||||
|     // Deleted move assignment operator | ||||
|     ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Context for managing CANN backend operations. | ||||
|  */ | ||||
| struct ggml_backend_cann_context { | ||||
|     int32_t device;                  /**< Device ID. */ | ||||
|     std::string name;                /**< Name of the device. */ | ||||
|     std::string description;         /**< Description of the device. */ | ||||
|     aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ | ||||
|  | ||||
|     aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Constructor for initializing the context with a given device. | ||||
|      * @param device Device ID. | ||||
|      */ | ||||
|     explicit ggml_backend_cann_context(int device) | ||||
|         : device(device), name("CANN" + std::to_string(device)) { | ||||
|         ggml_cann_set_device(device); | ||||
|         description = aclrtGetSocName(); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Destructor for cleaning up resources. | ||||
|      */ | ||||
|     ~ggml_backend_cann_context() { | ||||
|         ggml_cann_set_device(device); | ||||
|         if (copy_event != nullptr) { | ||||
|             ACL_CHECK(aclrtDestroyEvent(copy_event)); | ||||
|         } | ||||
|         for (int i = 0; i < GGML_CANN_MAX_STREAMS; ++i) { | ||||
|             if (streams[i] != nullptr) { | ||||
|                 ACL_CHECK(aclrtDestroyStream(streams[i])); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Get or create a stream for a given index. | ||||
|      * @param stream Index of the stream. | ||||
|      * @return The stream corresponding to the given index. | ||||
|      */ | ||||
|     aclrtStream stream(int stream) { | ||||
|         if (streams[stream] == nullptr) { | ||||
|             ggml_cann_set_device(device); | ||||
|             ACL_CHECK(aclrtCreateStream(&streams[stream])); | ||||
|         } | ||||
|         return streams[stream]; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Get or create the default stream (index 0). | ||||
|      * @return The default stream. | ||||
|      */ | ||||
|     aclrtStream stream() { return stream(0); } | ||||
|  | ||||
|     // TODO: each stream should have a memory pool. | ||||
|     std::unique_ptr<ggml_cann_pool> | ||||
|         mem_pool; /**< Memory pool for the device. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Create a new memory pool for a given device. | ||||
|      * @param device Device ID. | ||||
|      * @return A unique pointer to the new memory pool. | ||||
|      */ | ||||
|     static std::unique_ptr<ggml_cann_pool> new_pool_for_device(int device); | ||||
|  | ||||
|     /** | ||||
|      * @brief Get or create the memory pool for the context. | ||||
|      * @return Reference to the memory pool. | ||||
|      */ | ||||
|     ggml_cann_pool& pool() { | ||||
|         if (mem_pool == nullptr) { | ||||
|             mem_pool = new_pool_for_device(device); | ||||
|         } | ||||
|         return *mem_pool; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| #endif  // CANN_COMMON_H | ||||
							
								
								
									
										2188
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2188
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										30
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| file(GLOB SRC_FILES | ||||
|     get_row_f32.cpp | ||||
|     get_row_f16.cpp | ||||
|     get_row_q4_0.cpp | ||||
|     get_row_q8_0.cpp | ||||
|     quantize_f32_q8_0.cpp | ||||
|     quantize_f16_q8_0.cpp | ||||
|     quantize_float_to_q4_0.cpp | ||||
|     dup.cpp | ||||
| ) | ||||
|  | ||||
| set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR}) | ||||
| set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim") | ||||
|  | ||||
| if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||||
|     set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||||
| elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||||
|     set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||||
| else() | ||||
|     message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.") | ||||
| endif() | ||||
| include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) | ||||
|  | ||||
| ascendc_library(ascendc_kernels STATIC | ||||
|     ${SRC_FILES} | ||||
| ) | ||||
|  | ||||
| message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.") | ||||
| ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}") | ||||
| # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP) | ||||
							
								
								
									
										19
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #ifndef ASCENDC_KERNELS_H | ||||
| #define ASCENDC_KERNELS_H | ||||
|  | ||||
| #include "aclrtlaunch_ascendc_get_row_f32.h" | ||||
| #include "aclrtlaunch_ascendc_get_row_f16.h" | ||||
| #include "aclrtlaunch_ascendc_get_row_q8_0.h" | ||||
| #include "aclrtlaunch_ascendc_get_row_q4_0.h" | ||||
|  | ||||
| #include "aclrtlaunch_ascendc_quantize_f32_q8_0.h" | ||||
| #include "aclrtlaunch_ascendc_quantize_f16_q8_0.h" | ||||
| #include "aclrtlaunch_ascendc_quantize_f16_to_q4_0.h" | ||||
| #include "aclrtlaunch_ascendc_quantize_f32_to_q4_0.h" | ||||
|  | ||||
| #include "aclrtlaunch_ascendc_dup_by_rows_fp16.h" | ||||
| #include "aclrtlaunch_ascendc_dup_by_rows_fp32.h" | ||||
| #include "aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16.h" | ||||
| #include "aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32.h" | ||||
|  | ||||
| #endif  // ASCENDC_KERNELS_H | ||||
							
								
								
									
										236
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,236 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| #include <cmath> | ||||
|  | ||||
| using namespace AscendC; | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
| const int64_t SUPPORTED_MAX_DIM = 65535;  // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T> | ||||
|  | ||||
| template <typename SRC_T, typename DST_T> | ||||
| class DupByRows { | ||||
|    public: | ||||
|     __aicore__ inline DupByRows() {} | ||||
|     __aicore__ inline void init(GM_ADDR src, GM_ADDR dst, int64_t *input_ne_ub, | ||||
|                                 size_t *input_nb_ub) { | ||||
|         /* Dup by rows when src is contigous on first dimension and dst is | ||||
|         contiguous, each kernel process one row. | ||||
|         */ | ||||
|  | ||||
|         // Input has four dims. | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         // param | ||||
|         num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3]; | ||||
|         num_elem = input_ne_ub[0]; | ||||
|  | ||||
|         // index for (ne[1], ne[2], ne[3]): (idx_ne1, idx_ne2, idx_ne3) | ||||
|         idx_ne3 = op_block_idx / (input_ne_ub[1] * input_ne_ub[2]); | ||||
|         idx_ne2 = (op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2])) | ||||
|                   / (input_ne_ub[1]); | ||||
|         idx_ne1 = op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2]) | ||||
|                 - idx_ne2 * input_ne_ub[1]; | ||||
|  | ||||
|         // src may not contiguous in dim [1,2,3], so stride decited by ne&nb | ||||
|         src_stride = input_nb_ub[3] * idx_ne3 + input_nb_ub[2] * idx_ne2 | ||||
|                      + input_nb_ub[1] * idx_ne1; | ||||
|  | ||||
|         // dst is contiguous | ||||
|         dst_stride = op_block_idx * (input_ne_ub[0] * sizeof(DST_T)); | ||||
|  | ||||
|         src_gm.SetGlobalBuffer(reinterpret_cast<__gm__ SRC_T *>(src + | ||||
|                                                                 src_stride)); | ||||
|         dst_gm.SetGlobalBuffer(reinterpret_cast<__gm__ DST_T *>(dst + | ||||
|                                                                 dst_stride)); | ||||
|  | ||||
|         pipe.InitBuffer(src_queue, BUFFER_NUM, (sizeof(SRC_T) * num_elem + | ||||
|                                                 32 - 1) / 32 * 32); | ||||
|         pipe.InitBuffer(dst_queue, BUFFER_NUM, (sizeof(DST_T) * num_elem + | ||||
|                                                 32 - 1) / 32 * 32); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in() { | ||||
|         LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>(); | ||||
|         const size_t elem_per_block = 32 / sizeof(SRC_T); | ||||
|         size_t tail = num_elem % elem_per_block; | ||||
|         size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem; | ||||
|         DataCopy(src_local, src_gm, cpy_elements_len); | ||||
|         src_queue.EnQue(src_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out() { | ||||
|         LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>(); | ||||
| #ifdef ASCEND_310P | ||||
|         const size_t elem_per_block = 32 / sizeof(DST_T); | ||||
|         size_t tail = num_elem % elem_per_block; | ||||
|         size_t len = num_elem & ~(elem_per_block - 1); | ||||
|         if (len > 0) { | ||||
|             DataCopy(dst_gm, dst_local, len); | ||||
|         } | ||||
|         if(tail != 0) { | ||||
|             for (size_t i = tail; i < elem_per_block; i++) { | ||||
|                 dst_local[len + i].SetValue(0, 0); | ||||
|             } | ||||
|             SetAtomicAdd<float>(); | ||||
|             DataCopy(dst_gm[len], dst_local[len], elem_per_block); | ||||
|             SetAtomicNone(); | ||||
|         } | ||||
| #else | ||||
|         DataCopyExtParams dataCopyParams; | ||||
|         dataCopyParams.blockCount = 1; | ||||
|         dataCopyParams.blockLen = num_elem * sizeof(DST_T); | ||||
|         DataCopyPad(dst_gm, dst_local, dataCopyParams); | ||||
| #endif | ||||
|         dst_queue.FreeTensor(dst_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void dup() { | ||||
|         // main process, copy one row data from src to dst. | ||||
|         copy_in(); | ||||
|  | ||||
|         LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>(); | ||||
|         LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>(); | ||||
|  | ||||
|         int32_t BLOCK_NUM = 32 / sizeof(DST_T); | ||||
|         DataCopy(dst_local, src_local, (num_elem + BLOCK_NUM - 1) | ||||
|                                         / BLOCK_NUM * BLOCK_NUM); | ||||
|         dst_queue.EnQue<DST_T>(dst_local); | ||||
|  | ||||
|         src_queue.FreeTensor(src_local); | ||||
|         copy_out(); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void dup_with_cast() { | ||||
|         // main process, copy one row data from src to dst. | ||||
|         // cast dtype from src to dst. | ||||
|         copy_in(); | ||||
|  | ||||
|         LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>(); | ||||
|         LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>(); | ||||
|  | ||||
|         Cast(dst_local, src_local, RoundMode::CAST_NONE, num_elem); | ||||
|         dst_queue.EnQue<DST_T>(dst_local); | ||||
|  | ||||
|         src_queue.FreeTensor(src_local); | ||||
|         copy_out(); | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<SRC_T> src_gm; | ||||
|     GlobalTensor<DST_T> dst_gm; | ||||
|  | ||||
|     int64_t num_rows; | ||||
|     int64_t num_elem; | ||||
|     int64_t idx_ne3; | ||||
|     int64_t idx_ne2; | ||||
|     int64_t idx_ne1; | ||||
|     int64_t src_stride; | ||||
|     int64_t dst_stride; | ||||
|  | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> src_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> dst_queue; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16( | ||||
|                                                         GM_ADDR src_gm, | ||||
|                                                         GM_ADDR dst_gm, | ||||
|                                                         GM_ADDR input_ne_gm, | ||||
|                                                         GM_ADDR input_nb_gm, | ||||
|                                                         GM_ADDR output_ne_gm, | ||||
|                                                         GM_ADDR output_nb_gm) { | ||||
|  | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     DupByRows<half, half> op; | ||||
|     op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub); | ||||
|     op.dup(); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32( | ||||
|                                                         GM_ADDR src_gm, | ||||
|                                                         GM_ADDR dst_gm, | ||||
|                                                         GM_ADDR input_ne_gm, | ||||
|                                                         GM_ADDR input_nb_gm, | ||||
|                                                         GM_ADDR output_ne_gm, | ||||
|                                                         GM_ADDR output_nb_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     DupByRows<float_t, float_t> op; | ||||
|     op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub); | ||||
|     op.dup(); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16( | ||||
|                                                         GM_ADDR src_gm, | ||||
|                                                         GM_ADDR dst_gm, | ||||
|                                                         GM_ADDR input_ne_gm, | ||||
|                                                         GM_ADDR input_nb_gm, | ||||
|                                                         GM_ADDR output_ne_gm, | ||||
|                                                         GM_ADDR output_nb_gm) { | ||||
|  | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     DupByRows<float_t, half> op; | ||||
|     op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub); | ||||
|     op.dup_with_cast(); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32( | ||||
|                                                         GM_ADDR src_gm, | ||||
|                                                         GM_ADDR dst_gm, | ||||
|                                                         GM_ADDR input_ne_gm, | ||||
|                                                         GM_ADDR input_nb_gm, | ||||
|                                                         GM_ADDR output_ne_gm, | ||||
|                                                         GM_ADDR output_nb_gm) { | ||||
|  | ||||
|     // copy params from gm to ub. | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     DupByRows<half, float_t> op; | ||||
|     op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub); | ||||
|     op.dup_with_cast(); | ||||
| } | ||||
							
								
								
									
										197
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,197 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| // optimize me. Use template to avoid copy code. | ||||
| using namespace AscendC; | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
|  | ||||
| class GET_ROW_F16 { | ||||
|    public: | ||||
|     __aicore__ inline GET_ROW_F16() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, size_t *input_nb_ub, | ||||
|                                 int64_t *indices_ne_ub, size_t *indices_nb_ub, | ||||
|                                 int64_t *output_ne_ub, size_t *output_nb_ub) { | ||||
|         // TODO, use template for F16/f32 | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             input_stride[i] = input_nb_ub[i] / input_nb_ub[0]; | ||||
|  | ||||
|             indices_ne[i] = indices_ne_ub[i]; | ||||
|             indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0]; | ||||
|  | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|             output_stride[i] = output_nb_ub[i] / output_nb_ub[0]; | ||||
|         } | ||||
|  | ||||
|         // Indices has two dims. n_elements = all rows should get. | ||||
|         // dr, all rows should this thread get. | ||||
|         uint64_t n_elements = | ||||
|             indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3]; | ||||
|         dr = n_elements / op_block_num; | ||||
|  | ||||
|         uint64_t tails = n_elements % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ half *)input); | ||||
|         indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices); | ||||
|         output_gm.SetGlobalBuffer((__gm__ float *)output); | ||||
|  | ||||
|         uint64_t input_local_buffer_size = ((input_ne[0] * sizeof(half) + 31) | ||||
|                                              & ~31); | ||||
|         uint64_t output_local_buffer_size = ((input_ne[0] * sizeof(float) + 31) | ||||
|                                               & ~31); | ||||
|  | ||||
|         local_buffer_elems = input_local_buffer_size / sizeof(half); | ||||
|  | ||||
|         // TODO, consider long row that can't put in UB. | ||||
|         // All data should asign to 32. It's ok because all data is align to 32. | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, input_local_buffer_size); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, output_local_buffer_size); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset, size_t len) { | ||||
|         size_t origin_len = len; | ||||
|         LocalTensor<half> input_local = input_queue.AllocTensor<half>(); | ||||
|         const size_t elem_per_block = 32 / sizeof(half); | ||||
|         size_t tail = len % elem_per_block; | ||||
|         len = len & ~(elem_per_block - 1); | ||||
|         if(tail != 0) { | ||||
|             len += elem_per_block; | ||||
|         } | ||||
|         DataCopy(input_local, input_gm[offset], len); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset, size_t len) { | ||||
|         LocalTensor<float> output_local = output_queue.DeQue<float>(); | ||||
|         const size_t elem_per_block = 32 / sizeof(float); | ||||
|         size_t tail = len % elem_per_block; | ||||
|         len = len & ~(elem_per_block - 1); | ||||
|         if (len > 0) { | ||||
|             DataCopy(output_gm[offset], output_local, len); | ||||
|         } | ||||
|  | ||||
|         if(tail != 0) { | ||||
| #ifdef ASCEND_310P | ||||
|             for (size_t i = tail; i < elem_per_block; i++) { | ||||
|                 output_local[len + i].SetValue(0, 0); | ||||
|             } | ||||
|             SetAtomicAdd<float>(); | ||||
|             DataCopy(output_gm[offset + len], output_local[len], elem_per_block); | ||||
|             SetAtomicNone(); | ||||
| #else | ||||
|             DataCopyExtParams dataCopyParams; | ||||
|             dataCopyParams.blockCount = 1; | ||||
|             dataCopyParams.blockLen = tail * sizeof(float); | ||||
|             DataCopyPad(output_gm[offset + len], output_local[len], | ||||
|                         dataCopyParams); | ||||
| #endif | ||||
|         } | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate_row(int64_t idx) { | ||||
|         const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]); | ||||
|         const int64_t indices_ne1_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) / | ||||
|             indices_ne[0]; | ||||
|         const int64_t indices_ne0_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] - | ||||
|              indices_ne1_idx * indices_ne[0]); | ||||
|  | ||||
|         const int64_t indices_offset = indices_ne0_idx * indices_stride[0] + | ||||
|                                        indices_ne1_idx * indices_stride[1] + | ||||
|                                        indices_ne2_idx * indices_stride[2]; | ||||
|         const int32_t selected_row_idx = indices_gm.GetValue(indices_offset); | ||||
|  | ||||
|         const int64_t input_offset = selected_row_idx * input_stride[1] + | ||||
|                                      indices_ne1_idx * input_stride[2] + | ||||
|                                      indices_ne2_idx * input_stride[3]; | ||||
|  | ||||
|         const int64_t output_offset = indices_ne0_idx * output_stride[1] + | ||||
|                                       indices_ne1_idx * output_stride[2] + | ||||
|                                       indices_ne2_idx * output_stride[3]; | ||||
|  | ||||
|         copy_in(input_offset, input_ne[0]); | ||||
|         LocalTensor<half> input_local = input_queue.DeQue<half>(); | ||||
|         LocalTensor<float> output_local = output_queue.AllocTensor<float>(); | ||||
|  | ||||
|         Cast(output_local, input_local, RoundMode::CAST_NONE, | ||||
|              local_buffer_elems); | ||||
|         output_queue.EnQue(output_local); | ||||
|         copy_out(output_offset, input_ne[0]); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             calculate_row(i); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t indices_ne[4]; | ||||
|     size_t indices_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     size_t local_buffer_elems; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<half> input_gm; | ||||
|     GlobalTensor<int32_t> indices_gm; | ||||
|     GlobalTensor<float> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     int64_t op_block_idx; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_get_row_f16( | ||||
|     GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, | ||||
|     GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm, | ||||
|     GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t indices_ne_ub[4]; | ||||
|     size_t indices_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(indices_ne_gm, indices_ne_ub, 32); | ||||
|     copy_to_ub(indices_nb_gm, indices_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     GET_ROW_F16 op; | ||||
|     op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub, | ||||
|             indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
							
								
								
									
										190
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										190
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,190 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| // optimize me. Use template to avoid copy code. | ||||
| using namespace AscendC; | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
|  | ||||
| class GET_ROW_F32 { | ||||
|    public: | ||||
|     __aicore__ inline GET_ROW_F32() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, size_t *input_nb_ub, | ||||
|                                 int64_t *indices_ne_ub, size_t *indices_nb_ub, | ||||
|                                 int64_t *output_ne_ub, size_t *output_nb_ub) { | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             input_stride[i] = input_nb_ub[i] / input_nb_ub[0]; | ||||
|  | ||||
|             indices_ne[i] = indices_ne_ub[i]; | ||||
|             indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0]; | ||||
|  | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|             output_stride[i] = output_nb_ub[i] / output_nb_ub[0]; | ||||
|         } | ||||
|  | ||||
|         // Indices has two dims. n_elements = all rows should get. | ||||
|         // dr, all rows should this thread get. | ||||
|         uint64_t n_elements = | ||||
|             indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3]; | ||||
|         dr = n_elements / op_block_num; | ||||
|  | ||||
|         uint64_t tails = n_elements % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ float *)input); | ||||
|         indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices); | ||||
|         output_gm.SetGlobalBuffer((__gm__ float *)output); | ||||
|  | ||||
|         uint64_t local_buffer_size = ((input_ne[0] * sizeof(float) + 31) & ~31); | ||||
|         local_buffer_elems = local_buffer_size / sizeof(float); | ||||
|  | ||||
|         // TODO, consider long row that can't put in UB. | ||||
|         // All data should asign to 32. It's ok because all data is align to 32. | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, local_buffer_size); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, local_buffer_size); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset, size_t len) { | ||||
|         LocalTensor<float> input_local = input_queue.AllocTensor<float>(); | ||||
|         const size_t elem_per_block = 32 / sizeof(float); | ||||
|         size_t tail = len % elem_per_block; | ||||
|         len = len & ~(elem_per_block - 1); | ||||
|         if(tail != 0) { | ||||
|             len += elem_per_block; | ||||
|         } | ||||
|         DataCopy(input_local, input_gm[offset], len); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset, size_t len) { | ||||
|         LocalTensor<float> output_local = output_queue.DeQue<float>(); | ||||
|         const size_t elem_per_block = 32 / sizeof(float); | ||||
|         size_t tail = len % elem_per_block; | ||||
|         len = len & ~(elem_per_block - 1); | ||||
|         if (len > 0) { | ||||
|             DataCopy(output_gm[offset], output_local, len); | ||||
|         } | ||||
|  | ||||
|         if(tail != 0) { | ||||
| #ifdef ASCEND_310P | ||||
|             for (size_t i = tail; i < elem_per_block; i++) { | ||||
|                 output_local[len + i].SetValue(0, 0); | ||||
|             } | ||||
|             SetAtomicAdd<float>(); | ||||
|             DataCopy(output_gm[offset + len], output_local[len], elem_per_block); | ||||
|             SetAtomicNone(); | ||||
| #else | ||||
|             DataCopyExtParams dataCopyParams; | ||||
|             dataCopyParams.blockCount = 1; | ||||
|             dataCopyParams.blockLen = tail * sizeof(float); | ||||
|             DataCopyPad(output_gm[offset + len], output_local[len], | ||||
|                         dataCopyParams); | ||||
| #endif | ||||
|         } | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate_row(int64_t idx) { | ||||
|         const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]); | ||||
|         const int64_t indices_ne1_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) / | ||||
|             indices_ne[0]; | ||||
|         const int64_t indices_ne0_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] - | ||||
|              indices_ne1_idx * indices_ne[0]); | ||||
|  | ||||
|         const int64_t indices_offset = indices_ne0_idx * indices_stride[0] + | ||||
|                                        indices_ne1_idx * indices_stride[1] + | ||||
|                                        indices_ne2_idx * indices_stride[2]; | ||||
|         const int32_t selected_row_idx = indices_gm.GetValue(indices_offset); | ||||
|  | ||||
|         const int64_t input_offset = selected_row_idx * input_stride[1] + | ||||
|                                      indices_ne1_idx * input_stride[2] + | ||||
|                                      indices_ne2_idx * input_stride[3]; | ||||
|  | ||||
|         const int64_t output_offset = indices_ne0_idx * output_stride[1] + | ||||
|                                       indices_ne1_idx * output_stride[2] + | ||||
|                                       indices_ne2_idx * output_stride[3]; | ||||
|  | ||||
|         copy_in(input_offset, input_ne[0]); | ||||
|         LocalTensor<float> input_local = input_queue.DeQue<float>(); | ||||
|         LocalTensor<float> output_local = output_queue.AllocTensor<float>(); | ||||
|  | ||||
|         DataCopy(output_local, input_local, local_buffer_elems); | ||||
|         output_queue.EnQue(output_local); | ||||
|         copy_out(output_offset, input_ne[0]); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             calculate_row(i); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t indices_ne[4]; | ||||
|     size_t indices_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     size_t local_buffer_elems; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<float> input_gm; | ||||
|     GlobalTensor<int32_t> indices_gm; | ||||
|     GlobalTensor<float> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     int64_t op_block_idx; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_get_row_f32( | ||||
|     GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, | ||||
|     GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm, | ||||
|     GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t indices_ne_ub[4]; | ||||
|     size_t indices_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(indices_ne_gm, indices_ne_ub, 32); | ||||
|     copy_to_ub(indices_nb_gm, indices_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     GET_ROW_F32 op; | ||||
|     op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub, | ||||
|             indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
							
								
								
									
										204
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										204
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,204 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| // optimize me. Use template to avoid copy code. | ||||
| using namespace AscendC; | ||||
| #ifdef ASCEND_310P // 310P not support 4bit get row | ||||
|     extern "C" __global__ __aicore__ void ascendc_get_row_q4_0( | ||||
|         GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, | ||||
|         GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm, | ||||
|         GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { | ||||
|         // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. | ||||
|         printf("Ascend310P not support 4bit get row.\n"); | ||||
|     } | ||||
| #else | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
|  | ||||
| #define QK4_0 32 | ||||
|  | ||||
| class GET_ROW_Q4_0 { | ||||
|    public: | ||||
|     __aicore__ inline GET_ROW_Q4_0() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, int64_t *indices_ne_ub, | ||||
|                                 size_t *indices_nb_ub, int64_t *output_ne_ub, | ||||
|                                 size_t *output_nb_ub) { | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             indices_ne[i] = indices_ne_ub[i]; | ||||
|             indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0]; | ||||
|             scale_ne[i] = input_ne_ub[i]; | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|             output_stride[i] = output_nb_ub[i] / output_nb_ub[0]; | ||||
|         } | ||||
|  | ||||
|         // one scale for a group. | ||||
|         scale_ne[0] /= QK4_0; | ||||
|  | ||||
|         input_stride[0] = 1; | ||||
|         scale_stride[0] = 1; | ||||
|         output_stride[0] = 1; | ||||
|         for (int i = 1; i < 4; i++) { | ||||
|             input_stride[i] = input_stride[i - 1] * input_ne[i - 1]; | ||||
|             scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         group_size_in_row = input_ne[0] / QK4_0; | ||||
|         int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] * | ||||
|                                input_ne[3] / 2; | ||||
|  | ||||
|         // Indices has two dims. n_elements = all rows should get. | ||||
|         // dr, all rows should this thread get. | ||||
|         uint64_t n_elements = | ||||
|             indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3]; | ||||
|         dr = n_elements / op_block_num; | ||||
|  | ||||
|         uint64_t tails = n_elements % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ int4b_t *)input); | ||||
|         scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset)); | ||||
|         indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices); | ||||
|         output_gm.SetGlobalBuffer((__gm__ float *)output); | ||||
|  | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, QK4_0 * sizeof(int4b_t)); | ||||
|         pipe.InitBuffer(cast_queue, BUFFER_NUM, QK4_0 * sizeof(half)); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, QK4_0 * sizeof(float)); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset) { | ||||
|         LocalTensor<int4b_t> input_local = input_queue.AllocTensor<int4b_t>(); | ||||
|         // 32 * sizeof(int4b_t) = 16, which is not aligned to 32, why no error? | ||||
|         DataCopy(input_local, input_gm[offset], QK4_0); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset) { | ||||
|         LocalTensor<float> output_local = output_queue.DeQue<float>(); | ||||
|         DataCopy(output_gm[offset], output_local, QK4_0); | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate_group(int64_t idx, int64_t group) { | ||||
|         const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]); | ||||
|         const int64_t indices_ne1_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) / | ||||
|             indices_ne[0]; | ||||
|         const int64_t indices_ne0_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] - | ||||
|              indices_ne1_idx * indices_ne[0]); | ||||
|  | ||||
|         const int64_t indices_offset = indices_ne0_idx * indices_stride[0] + | ||||
|                                        indices_ne1_idx * indices_stride[1] + | ||||
|                                        indices_ne2_idx * indices_stride[2]; | ||||
|         const int32_t selected_row_idx = indices_gm.GetValue(indices_offset); | ||||
|  | ||||
|         const int64_t input_offset = selected_row_idx * input_stride[1] + | ||||
|                                      indices_ne1_idx * input_stride[2] + | ||||
|                                      indices_ne2_idx * input_stride[3] + | ||||
|                                      group * QK4_0; | ||||
|         const int64_t scale_offset = selected_row_idx * scale_stride[1] + | ||||
|                                      indices_ne1_idx * scale_stride[2] + | ||||
|                                      indices_ne2_idx * scale_stride[3] + group; | ||||
|         const int64_t output_offset = indices_ne0_idx * output_stride[1] + | ||||
|                                       indices_ne1_idx * output_stride[2] + | ||||
|                                       indices_ne2_idx * output_stride[3] + | ||||
|                                       group * QK4_0; | ||||
|  | ||||
|         copy_in(input_offset); | ||||
|         LocalTensor<int4b_t> input_local = input_queue.DeQue<int4b_t>(); | ||||
|         LocalTensor<half> cast_local = cast_queue.AllocTensor<half>(); | ||||
|         LocalTensor<float> output_local = output_queue.AllocTensor<float>(); | ||||
|  | ||||
|         // TODO: cast more data to speed up. | ||||
|         Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0); | ||||
|         Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0); | ||||
|  | ||||
|         // Only mul need compile by group. | ||||
|         half scale = scale_gm.GetValue(scale_offset); | ||||
|  | ||||
|         Muls(output_local, output_local, (float)scale, QK4_0); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|         cast_queue.FreeTensor(cast_local); | ||||
|         output_queue.EnQue(output_local); | ||||
|  | ||||
|         copy_out(output_offset); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             for (int64_t j = 0; j < group_size_in_row; j++) { | ||||
|                 calculate_group(i, j); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t scale_ne[4]; | ||||
|     size_t scale_stride[4]; | ||||
|  | ||||
|     int64_t indices_ne[4]; | ||||
|     size_t indices_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     int64_t group_size_in_row; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<int4b_t> input_gm; | ||||
|     GlobalTensor<half> scale_gm; | ||||
|     GlobalTensor<int32_t> indices_gm; | ||||
|     GlobalTensor<float> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_get_row_q4_0( | ||||
|     GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, | ||||
|     GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm, | ||||
|     GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     int64_t indices_ne_ub[4]; | ||||
|     size_t indices_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(indices_ne_gm, indices_ne_ub, 32); | ||||
|     copy_to_ub(indices_nb_gm, indices_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     GET_ROW_Q4_0 op; | ||||
|     op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub, | ||||
|             indices_nb_ub, output_ne_ub, output_nb_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
|  | ||||
| #endif // #ifdef ASCEND_310P | ||||
							
								
								
									
										191
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,191 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| // optimize me. Use template to avoid copy code. | ||||
| using namespace AscendC; | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
|  | ||||
| #define QK8_0 32 | ||||
|  | ||||
| class GET_ROW_Q8_0 { | ||||
|    public: | ||||
|     __aicore__ inline GET_ROW_Q8_0() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, int64_t *indices_ne_ub, | ||||
|                                 size_t *indices_nb_ub, int64_t *output_ne_ub, | ||||
|                                 size_t *output_nb_ub) { | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             indices_ne[i] = indices_ne_ub[i]; | ||||
|             indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0]; | ||||
|             scale_ne[i] = input_ne_ub[i]; | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|             output_stride[i] = output_nb_ub[i] / output_nb_ub[0]; | ||||
|         } | ||||
|  | ||||
|         // one scale for a group. | ||||
|         scale_ne[0] /= QK8_0; | ||||
|  | ||||
|         input_stride[0] = 1; | ||||
|         scale_stride[0] = 1; | ||||
|         output_stride[0] = 1; | ||||
|         for (int i = 1; i < 4; i++) { | ||||
|             input_stride[i] = input_stride[i - 1] * input_ne[i - 1]; | ||||
|             scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         group_size_in_row = input_ne[0] / QK8_0; | ||||
|         int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] * | ||||
|                                input_ne[3] * sizeof(int8_t); | ||||
|  | ||||
|         // Indices has two dims. n_elements = all rows should get. | ||||
|         // dr, all rows should this thread get. | ||||
|         uint64_t n_elements = | ||||
|             indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3]; | ||||
|         dr = n_elements / op_block_num; | ||||
|  | ||||
|         uint64_t tails = n_elements % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ int8_t *)input); | ||||
|         scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset)); | ||||
|         indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices); | ||||
|         output_gm.SetGlobalBuffer((__gm__ float *)output); | ||||
|  | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t)); | ||||
|         pipe.InitBuffer(cast_queue, BUFFER_NUM, QK8_0 * sizeof(half)); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(float)); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset) { | ||||
|         LocalTensor<int8_t> input_local = input_queue.AllocTensor<int8_t>(); | ||||
|         DataCopy(input_local, input_gm[offset], QK8_0); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset) { | ||||
|         LocalTensor<float> output_local = output_queue.DeQue<float>(); | ||||
|         DataCopy(output_gm[offset], output_local, QK8_0); | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate_group(int64_t idx, int64_t group) { | ||||
|         const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]); | ||||
|         const int64_t indices_ne1_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) / | ||||
|             indices_ne[0]; | ||||
|         const int64_t indices_ne0_idx = | ||||
|             (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] - | ||||
|              indices_ne1_idx * indices_ne[0]); | ||||
|  | ||||
|         const int64_t indices_offset = indices_ne0_idx * indices_stride[0] + | ||||
|                                        indices_ne1_idx * indices_stride[1] + | ||||
|                                        indices_ne2_idx * indices_stride[2]; | ||||
|         const int32_t selected_row_idx = indices_gm.GetValue(indices_offset); | ||||
|  | ||||
|         const int64_t input_offset = selected_row_idx * input_stride[1] + | ||||
|                                      indices_ne1_idx * input_stride[2] + | ||||
|                                      indices_ne2_idx * input_stride[3] + | ||||
|                                      group * QK8_0; | ||||
|         const int64_t scale_offset = selected_row_idx * scale_stride[1] + | ||||
|                                      indices_ne1_idx * scale_stride[2] + | ||||
|                                      indices_ne2_idx * scale_stride[3] + group; | ||||
|         const int64_t output_offset = indices_ne0_idx * output_stride[1] + | ||||
|                                       indices_ne1_idx * output_stride[2] + | ||||
|                                       indices_ne2_idx * output_stride[3] + | ||||
|                                       group * QK8_0; | ||||
|  | ||||
|         copy_in(input_offset); | ||||
|         LocalTensor<int8_t> input_local = input_queue.DeQue<int8_t>(); | ||||
|         LocalTensor<half> cast_local = cast_queue.AllocTensor<half>(); | ||||
|         LocalTensor<float> output_local = output_queue.AllocTensor<float>(); | ||||
|  | ||||
|         // TODO: cast more data to speed up. | ||||
|         Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0); | ||||
|         Cast(output_local, cast_local, RoundMode::CAST_NONE, QK8_0); | ||||
|  | ||||
|         // Only mul need compile by group. | ||||
|         half scale = scale_gm.GetValue(scale_offset); | ||||
|         Muls(output_local, output_local, (float)scale, QK8_0); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|         cast_queue.FreeTensor(cast_local); | ||||
|         output_queue.EnQue(output_local); | ||||
|  | ||||
|         copy_out(output_offset); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             for (int64_t j = 0; j < group_size_in_row; j++) { | ||||
|                 calculate_group(i, j); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t scale_ne[4]; | ||||
|     size_t scale_stride[4]; | ||||
|  | ||||
|     int64_t indices_ne[4]; | ||||
|     size_t indices_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     int64_t group_size_in_row; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<int8_t> input_gm; | ||||
|     GlobalTensor<half> scale_gm; | ||||
|     GlobalTensor<int32_t> indices_gm; | ||||
|     GlobalTensor<float> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_get_row_q8_0( | ||||
|     GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, | ||||
|     GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm, | ||||
|     GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     int64_t indices_ne_ub[4]; | ||||
|     size_t indices_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|     size_t output_nb_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(indices_ne_gm, indices_ne_ub, 32); | ||||
|     copy_to_ub(indices_nb_gm, indices_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|     copy_to_ub(output_nb_gm, output_nb_ub, 32); | ||||
|  | ||||
|     GET_ROW_Q8_0 op; | ||||
|     op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub, | ||||
|             indices_nb_ub, output_ne_ub, output_nb_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
							
								
								
									
										218
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,218 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| using namespace AscendC; | ||||
| #ifdef ASCEND_310P | ||||
|     extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0( | ||||
|         GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|         GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|         // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. | ||||
|         printf("Ascend310P not support f16->8bit quantization.\n"); | ||||
|     } | ||||
| #else | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
| #define QK8_0 32 | ||||
|  | ||||
| class QUANTIZE_F16_Q8_0 { | ||||
|    public: | ||||
|     __aicore__ inline QUANTIZE_F16_Q8_0() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, size_t *input_nb_ub, | ||||
|                                 int64_t *output_ne_ub) { | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             input_stride[i] = input_nb_ub[i] / input_nb_ub[0]; | ||||
|  | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|         } | ||||
|  | ||||
|         output_stride[0] = 1; | ||||
|         for (int i = 1; i < 4; i++) { | ||||
|             output_stride[i] = output_stride[i - 1] * output_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         scale_ne = input_ne; | ||||
|         scale_stride[0] = 1; | ||||
|         scale_stride[1] = input_ne[0] / QK8_0; | ||||
|         for (int i = 2; i < 4; i++) { | ||||
|             scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         // split input tensor by rows. | ||||
|         uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3]; | ||||
|         dr = nr / op_block_num; | ||||
|  | ||||
|         uint64_t tails = nr % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         group_size_in_row = scale_stride[1]; | ||||
|         int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] * | ||||
|                               output_ne[3] * sizeof(uint8_t); | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ half *)input); | ||||
|         output_gm.SetGlobalBuffer((__gm__ int8_t *)output); | ||||
|         scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size + ir * | ||||
|                                                  group_size_in_row * | ||||
|                                                  sizeof(half))); | ||||
|  | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(half)); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t)); | ||||
|         pipe.InitBuffer(work_queue, 1, 32); | ||||
|         pipe.InitBuffer(max_queue, 1, 32); | ||||
|         pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float)); | ||||
|         pipe.InitBuffer(scale_queue, 1, 32); | ||||
|         pipe.InitBuffer(cast_queue ,1 ,QK8_0 * sizeof(float)); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset) { | ||||
|         LocalTensor<half> input_local = input_queue.AllocTensor<half>(); | ||||
|         DataCopy(input_local, input_gm[offset], QK8_0); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset) { | ||||
|         LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>(); | ||||
|         DataCopy(output_gm[offset], output_local, QK8_0); | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline half calculate_group(int64_t row, int64_t group) { | ||||
|         const int64_t i3 = row / (input_ne[1] * input_ne[2]); | ||||
|         const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1]; | ||||
|         const int64_t i1 = | ||||
|             row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1]; | ||||
|  | ||||
|         const int64_t input_offset = i1 * input_stride[1] + | ||||
|                                      i2 * input_stride[2] + | ||||
|                                      i3 * input_stride[3] + QK8_0 * group; | ||||
|  | ||||
|         const int64_t output_offset = i1 * output_stride[1] + | ||||
|                                       i2 * output_stride[2] + | ||||
|                                       i3 * output_stride[3] + QK8_0 * group; | ||||
|  | ||||
|         copy_in(input_offset); | ||||
|         LocalTensor<half> input_local = input_queue.DeQue<half>(); | ||||
|         LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>(); | ||||
|         LocalTensor<float> work_local = work_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> abs_local = abs_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> max_local = max_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> cast_local = cast_queue.AllocTensor<float>(); | ||||
|  | ||||
|         Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0); | ||||
|         Abs(abs_local, cast_local, QK8_0); | ||||
|         ReduceMax(max_local, abs_local, work_local, QK8_0); | ||||
|  | ||||
|         pipe_barrier(PIPE_ALL); | ||||
|         float d = max_local.GetValue(0); | ||||
|         d = d / ((1 << 7) - 1); | ||||
|         if (d != 0) { | ||||
|             Muls(cast_local, cast_local, 1.0f / d, QK8_0); | ||||
|         } | ||||
|  | ||||
|         Cast(cast_local, cast_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         Cast(input_local, cast_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         Cast(output_local, input_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         output_queue.EnQue(output_local); | ||||
|         copy_out(output_offset); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|         work_queue.FreeTensor(work_local); | ||||
|         abs_queue.FreeTensor(abs_local); | ||||
|         max_queue.FreeTensor(max_local); | ||||
|         cast_queue.FreeTensor(cast_local); | ||||
|         return (half)d; | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         LocalTensor<half> scale_local = scale_queue.AllocTensor<half>(); | ||||
|         uint32_t scale_local_offset = 0; | ||||
|         uint32_t scale_global_offset = 0; | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             for (int64_t j = 0; j < group_size_in_row; j++) { | ||||
|                 half scale = calculate_group(i, j); | ||||
|                 scale_local.SetValue(scale_local_offset++, scale); | ||||
|                 if (scale_local_offset == 16) { | ||||
|                     scale_local_offset = 0; | ||||
|                     // TODO: OPTIMIZE ME | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     DataCopy(scale_gm[scale_global_offset], scale_local, 16); | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     scale_global_offset += 16; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (scale_local_offset != 0) { | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|             DataCopyExtParams dataCopyParams; | ||||
|             dataCopyParams.blockCount = 1; | ||||
|             dataCopyParams.blockLen = scale_local_offset * sizeof(half); | ||||
|             DataCopyPad(scale_gm[scale_global_offset], scale_local, | ||||
|                         dataCopyParams); | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t *scale_ne; | ||||
|     size_t scale_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     int64_t group_size_in_row; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<half> input_gm; | ||||
|     GlobalTensor<half> scale_gm; | ||||
|     GlobalTensor<int8_t> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     TQue<QuePosition::VECIN, 1> work_queue; | ||||
|     TQue<QuePosition::VECOUT, 1> max_queue; | ||||
|     TQue<QuePosition::VECIN, 1> abs_queue; | ||||
|     TQue<QuePosition::VECOUT, 1> scale_queue; | ||||
|     TQue<QuePosition::VECOUT, 1> cast_queue; | ||||
|  | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0( | ||||
|     GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|     GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|  | ||||
|     QUANTIZE_F16_Q8_0 op; | ||||
|     op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
|  | ||||
| #endif // #ifdef ASCEND_310P | ||||
							
								
								
									
										216
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										216
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,216 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| using namespace AscendC; | ||||
| #ifdef ASCEND_310P // 310P not support f32->8bit quantization | ||||
|     extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0( | ||||
|         GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|         GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|         // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. | ||||
|         printf("Ascend310P not support f32->8bit quantization.\n"); | ||||
|     } | ||||
| #else | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
| #define QK8_0 32 | ||||
|  | ||||
| class QUANTIZE_F32_Q8_0 { | ||||
|    public: | ||||
|     __aicore__ inline QUANTIZE_F32_Q8_0() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, size_t *input_nb_ub, | ||||
|                                 int64_t *output_ne_ub) { | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             input_stride[i] = input_nb_ub[i] / input_nb_ub[0]; | ||||
|  | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|         } | ||||
|  | ||||
|         output_stride[0] = 1; | ||||
|         for (int i = 1; i < 4; i++) { | ||||
|             output_stride[i] = output_stride[i - 1] * output_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         scale_ne = input_ne; | ||||
|         scale_stride[0] = 1; | ||||
|         scale_stride[1] = input_ne[0] / QK8_0; | ||||
|         for (int i = 2; i < 4; i++) { | ||||
|             scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         // split input tensor by rows. | ||||
|         uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3]; | ||||
|         dr = nr / op_block_num; | ||||
|  | ||||
|         uint64_t tails = nr % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         group_size_in_row = scale_stride[1]; | ||||
|         int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] * | ||||
|                               output_ne[3] * sizeof(uint8_t); | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ float *)input); | ||||
|         output_gm.SetGlobalBuffer((__gm__ int8_t *)output); | ||||
|         scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size + | ||||
|                                                  ir * group_size_in_row * | ||||
|                                                  sizeof(half))); | ||||
|  | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(float)); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t)); | ||||
|         pipe.InitBuffer(work_queue, 1, 32); | ||||
|         pipe.InitBuffer(max_queue, 1, 32); | ||||
|         pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float)); | ||||
|         pipe.InitBuffer(cast_queue, 1, QK8_0 * sizeof(half)); | ||||
|         pipe.InitBuffer(scale_queue, 1, 32); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset) { | ||||
|         LocalTensor<float> input_local = input_queue.AllocTensor<float>(); | ||||
|         DataCopy(input_local, input_gm[offset], QK8_0); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset) { | ||||
|         LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>(); | ||||
|         DataCopy(output_gm[offset], output_local, QK8_0); | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline half calculate_group(int64_t row, int64_t group) { | ||||
|         const int64_t i3 = row / (input_ne[1] * input_ne[2]); | ||||
|         const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1]; | ||||
|         const int64_t i1 = | ||||
|             row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1]; | ||||
|  | ||||
|         const int64_t input_offset = i1 * input_stride[1] + | ||||
|                                      i2 * input_stride[2] + | ||||
|                                      i3 * input_stride[3] + QK8_0 * group; | ||||
|  | ||||
|         const int64_t output_offset = i1 * output_stride[1] + | ||||
|                                       i2 * output_stride[2] + | ||||
|                                       i3 * output_stride[3] + QK8_0 * group; | ||||
|  | ||||
|         copy_in(input_offset); | ||||
|         LocalTensor<float> input_local = input_queue.DeQue<float>(); | ||||
|         LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>(); | ||||
|         LocalTensor<float> work_local = work_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> abs_local = abs_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> max_local = max_queue.AllocTensor<float>(); | ||||
|         LocalTensor<half> cast_local = cast_queue.AllocTensor<half>(); | ||||
|  | ||||
|         Abs(abs_local, input_local, QK8_0); | ||||
|         ReduceMax(max_local, abs_local, work_local, QK8_0); | ||||
|         pipe_barrier(PIPE_ALL); | ||||
|         float d = max_local.GetValue(0); | ||||
|         d = d / ((1 << 7) - 1); | ||||
|         if (d != 0) { | ||||
|             Muls(input_local, input_local, 1.0f / d, QK8_0); | ||||
|         } | ||||
|  | ||||
|         Cast(input_local, input_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         Cast(cast_local, input_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         Cast(output_local, cast_local, RoundMode::CAST_ROUND, QK8_0); | ||||
|         output_queue.EnQue(output_local); | ||||
|         copy_out(output_offset); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|         work_queue.FreeTensor(work_local); | ||||
|         abs_queue.FreeTensor(abs_local); | ||||
|         max_queue.FreeTensor(max_local); | ||||
|         cast_queue.FreeTensor(cast_local); | ||||
|  | ||||
|         return (half)d; | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         LocalTensor<half> scale_local = scale_queue.AllocTensor<half>(); | ||||
|         uint32_t scale_local_offset = 0; | ||||
|         uint32_t scale_global_offset = 0; | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             for (int64_t j = 0; j < group_size_in_row; j++) { | ||||
|                 half scale = calculate_group(i, j); | ||||
|                 scale_local.SetValue(scale_local_offset++, scale); | ||||
|                 if (scale_local_offset == 16) { | ||||
|                     scale_local_offset = 0; | ||||
|                     // TODO: OPTIMIZE ME | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     DataCopy(scale_gm[scale_global_offset], scale_local, 16); | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     scale_global_offset += 16; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (scale_local_offset != 0) { | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|             DataCopyExtParams dataCopyParams; | ||||
|             dataCopyParams.blockCount = 1; | ||||
|             dataCopyParams.blockLen = scale_local_offset * sizeof(half); | ||||
|             DataCopyPad(scale_gm[scale_global_offset], scale_local, | ||||
|                         dataCopyParams); | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t *scale_ne; | ||||
|     size_t scale_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     int64_t group_size_in_row; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<float> input_gm; | ||||
|     GlobalTensor<half> scale_gm; | ||||
|     GlobalTensor<int8_t> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     TQue<QuePosition::VECIN, 1> work_queue; | ||||
|     TQue<QuePosition::VECOUT, 1> max_queue; | ||||
|     TQue<QuePosition::VECIN, 1> abs_queue; | ||||
|     TQue<QuePosition::VECIN, 1> cast_queue; | ||||
|     TQue<QuePosition::VECOUT, 1> scale_queue; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0( | ||||
|     GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|     GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|  | ||||
|     QUANTIZE_F32_Q8_0 op; | ||||
|     op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
|  | ||||
| #endif // #ifdef ASCEND_310P | ||||
							
								
								
									
										295
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										295
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,295 @@ | ||||
| #include "kernel_operator.h" | ||||
|  | ||||
| using namespace AscendC; | ||||
| #ifdef ASCEND_310P // 310P not support float->4bit quantization | ||||
|     extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0( | ||||
|         GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|         GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|         // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. | ||||
|         printf("Ascend310P not support f32->4bit quantization.\n"); | ||||
|     } | ||||
|  | ||||
|     extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0( | ||||
|         GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|         GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|         // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. | ||||
|         printf("Ascend310P not support f16->4bit quantization.\n"); | ||||
|     } | ||||
| #else | ||||
|  | ||||
| #define BUFFER_NUM 2 | ||||
| #define Group_Size 32 | ||||
|  | ||||
| template <typename SRC_T> | ||||
| class QUANTIZE_FLOAT_TO_Q4_0 { | ||||
|    public: | ||||
|     __aicore__ inline QUANTIZE_FLOAT_TO_Q4_0() {} | ||||
|     __aicore__ inline void init(GM_ADDR input, GM_ADDR output, | ||||
|                                 int64_t *input_ne_ub, size_t *input_nb_ub, | ||||
|                                 int64_t *output_ne_ub) { | ||||
|         // TODO: fix test_case CPY(type_src=f16,type_dst=q4_0,ne=[256,4,4,4], | ||||
|         //                         permute=[0,0,0,0]): | ||||
|         // [CPY] NMSE = 0.000008343 > 0.000001000 FAIL | ||||
|         int64_t op_block_num = GetBlockNum(); | ||||
|         int64_t op_block_idx = GetBlockIdx(); | ||||
|  | ||||
|         // input stride of data elements | ||||
|         for (int i = 0; i < 4; i++) { | ||||
|             input_ne[i] = input_ne_ub[i]; | ||||
|             input_stride[i] = input_nb_ub[i] / input_nb_ub[0]; | ||||
|             output_ne[i] = output_ne_ub[i]; | ||||
|         } | ||||
|  | ||||
|         // output stride of data elements | ||||
|         output_stride[0] = 1; | ||||
|         for (int i = 1; i < 4; i++) { | ||||
|             output_stride[i] = output_stride[i - 1] * output_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         // scale saved one by one after data:. [group1_scale, group2_scale, ...] | ||||
|         scale_ne = input_ne; | ||||
|         scale_stride[0] = 1; | ||||
|         scale_stride[1] = input_ne[0] / Group_Size; | ||||
|         for (int i = 2; i < 4; i++) { | ||||
|             scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1]; | ||||
|         } | ||||
|  | ||||
|         // split input tensor by rows. | ||||
|         uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3]; | ||||
|         dr = nr / op_block_num; | ||||
|  | ||||
|         uint64_t tails = nr % op_block_num; | ||||
|         if (op_block_idx < tails) { | ||||
|             dr += 1; | ||||
|             ir = dr * op_block_idx; | ||||
|         } else { | ||||
|             ir = dr * op_block_idx + tails; | ||||
|         } | ||||
|  | ||||
|         group_size_in_row = scale_stride[1]; | ||||
|         int64_t scale_offset = output_ne[0] * output_ne[1] * output_ne[2] * | ||||
|                               output_ne[3] * sizeof(uint8_t) / 2; | ||||
|  | ||||
|         input_gm.SetGlobalBuffer((__gm__ SRC_T *)input); | ||||
|         output_gm.SetGlobalBuffer((__gm__ int8_t *)output); | ||||
|         scale_gm.SetGlobalBuffer((__gm__ half *)(output + scale_offset + ir * | ||||
|                                                  group_size_in_row * | ||||
|                                                  sizeof(half))); | ||||
|  | ||||
|         pipe.InitBuffer(input_queue, BUFFER_NUM, Group_Size * sizeof(SRC_T)); | ||||
|         pipe.InitBuffer(output_queue, BUFFER_NUM, | ||||
|                             Group_Size * sizeof(int8_t) / 2); | ||||
|         pipe.InitBuffer(cast_queue , 1, Group_Size * sizeof(float)); | ||||
|         pipe.InitBuffer(work_queue, 1, Group_Size * sizeof(float)); | ||||
|         pipe.InitBuffer(max_queue, 1, Group_Size * sizeof(float)); | ||||
|         pipe.InitBuffer(min_queue, 1, Group_Size * sizeof(float)); | ||||
|         pipe.InitBuffer(scale_queue, 1, Group_Size / 2 * sizeof(half)); | ||||
|         pipe.InitBuffer(int8_queue, 1, Group_Size * sizeof(int8_t)); | ||||
|         pipe.InitBuffer(half_queue, 1, Group_Size * sizeof(half)); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_in(uint32_t offset) { | ||||
|         LocalTensor<SRC_T> input_local = input_queue.AllocTensor<SRC_T>(); | ||||
|         DataCopy(input_local, input_gm[offset], Group_Size); | ||||
|         input_queue.EnQue(input_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void copy_out(uint32_t offset) { | ||||
|         // reinterpretcast Group_Size(32) * int4b_t to Group_Size / 2 * int8_t, | ||||
|         // and using DataCopyPad to avoid 32 bits align. | ||||
|         LocalTensor<int4b_t> output_local = output_queue.DeQue<int4b_t>(); | ||||
|         LocalTensor<int8_t> output_int8_local = | ||||
|                                     output_local.ReinterpretCast<int8_t>(); | ||||
|  | ||||
|         DataCopyExtParams dataCopyParams; | ||||
|         dataCopyParams.blockCount = 1; | ||||
|         dataCopyParams.blockLen = Group_Size / 2  * sizeof(int8_t); | ||||
|         DataCopyPad(output_gm[offset], output_int8_local, dataCopyParams); | ||||
|  | ||||
|         output_queue.FreeTensor(output_local); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void input_to_cast(LocalTensor<float> cast_local, | ||||
|                                          LocalTensor<float> input_local) { | ||||
|         DataCopy(cast_local, input_local, Group_Size); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void input_to_cast(LocalTensor<float> cast_local, | ||||
|                                          LocalTensor<half> input_local) { | ||||
|         Cast(cast_local, input_local, RoundMode::CAST_NONE, Group_Size); | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline half calculate_group(int64_t row, int64_t group) { | ||||
|         const int64_t i3 = row / (input_ne[1] * input_ne[2]); | ||||
|         const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1]; | ||||
|         const int64_t i1 = | ||||
|             row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1]; | ||||
|  | ||||
|         const int64_t input_offset = i1 * input_stride[1] + | ||||
|                                      i2 * input_stride[2] + | ||||
|                                      i3 * input_stride[3] + Group_Size * group; | ||||
|  | ||||
|         // output_offset is stride for output_gm which datatype is int8_t and | ||||
|         // divided by 2 is needed for int4b_t. | ||||
|         const int64_t output_offset = (i1 * output_stride[1] + | ||||
|                                        i2 * output_stride[2] + | ||||
|                                        i3 * output_stride[3] + | ||||
|                                        Group_Size * group) / 2; | ||||
|         copy_in(input_offset); | ||||
|  | ||||
|         LocalTensor<SRC_T> input_local = input_queue.DeQue<SRC_T>(); | ||||
|         LocalTensor<int4b_t> output_local = output_queue.AllocTensor<int4b_t>(); | ||||
|         LocalTensor<float> cast_local = cast_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> work_local = work_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> max_local = max_queue.AllocTensor<float>(); | ||||
|         LocalTensor<float> min_local = min_queue.AllocTensor<float>(); | ||||
|         LocalTensor<int8_t> int8_local = int8_queue.AllocTensor<int8_t>(); | ||||
|         LocalTensor<half> half_local = half_queue.AllocTensor<half>(); | ||||
|  | ||||
|         input_to_cast(cast_local, input_local); | ||||
|  | ||||
|         ReduceMax(max_local, cast_local, work_local, Group_Size); | ||||
|         ReduceMin(min_local, cast_local, work_local, Group_Size); | ||||
|         const float max_value = max_local.GetValue(0); | ||||
|         const float min_value = min_local.GetValue(0); | ||||
|         float d = max_value; | ||||
|         if (min_value < 0 && (-1 * min_value) > max_value) { | ||||
|             d = min_value; | ||||
|         } | ||||
|  | ||||
|         d = d / (-8); | ||||
|         if (d != 0) { | ||||
|             Muls(cast_local, cast_local, 1.0f / d, Group_Size); | ||||
|         } | ||||
|  | ||||
|         // range: [-8,8] -> [0.5,16.5] -> [0,16] -> [0,15] -> [-8,7] | ||||
|         float scalar = 8.5f; | ||||
|         Adds(cast_local, cast_local, scalar, Group_Size); | ||||
|         Cast(cast_local, cast_local, RoundMode::CAST_FLOOR, Group_Size); | ||||
|         scalar = 15.0f; | ||||
|         Mins(cast_local, cast_local, scalar, Group_Size); | ||||
|         scalar = -8.0f; | ||||
|         Adds(cast_local, cast_local, scalar, Group_Size); | ||||
|  | ||||
|         // float->half->int4b | ||||
|         Cast(half_local, cast_local, RoundMode::CAST_NONE, Group_Size); | ||||
|         Cast(output_local, half_local, RoundMode::CAST_NONE, Group_Size); | ||||
|  | ||||
|         output_queue.EnQue(output_local); | ||||
|         copy_out(output_offset); | ||||
|  | ||||
|         input_queue.FreeTensor(input_local); | ||||
|         work_queue.FreeTensor(work_local); | ||||
|         max_queue.FreeTensor(max_local); | ||||
|         min_queue.FreeTensor(min_local); | ||||
|         int8_queue.FreeTensor(int8_local); | ||||
|         half_queue.FreeTensor(half_local); | ||||
|         cast_queue.FreeTensor(cast_local); | ||||
|         return (half)d; | ||||
|     } | ||||
|  | ||||
|     __aicore__ inline void calculate() { | ||||
|         LocalTensor<half> scale_local = scale_queue.AllocTensor<half>(); | ||||
|         uint32_t scale_local_offset = 0; | ||||
|         uint32_t scale_global_offset = 0; | ||||
|         for (int64_t i = ir; i < ir + dr; i++) { | ||||
|             for (int64_t j = 0; j < group_size_in_row; j++) { | ||||
|                 half scale = calculate_group(i, j); | ||||
|                 scale_local.SetValue(scale_local_offset++, scale); | ||||
|                 // Copy Group_Size/2 length data each time. | ||||
|                 if (scale_local_offset == Group_Size / 2) { | ||||
|                     scale_local_offset = 0; | ||||
|                     // TODO: OPTIMIZE ME | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     DataCopy(scale_gm[scale_global_offset], scale_local, | ||||
|                                       Group_Size / 2); | ||||
|                     pipe_barrier(PIPE_ALL); | ||||
|                     scale_global_offset += Group_Size / 2; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (scale_local_offset != 0) { | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|             DataCopyExtParams dataCopyParams; | ||||
|             dataCopyParams.blockCount = 1; | ||||
|             dataCopyParams.blockLen = scale_local_offset * sizeof(half); | ||||
|             DataCopyPad(scale_gm[scale_global_offset], scale_local, | ||||
|                         dataCopyParams); | ||||
|             pipe_barrier(PIPE_ALL); | ||||
|         } | ||||
|         scale_queue.FreeTensor(scale_local); | ||||
|     } | ||||
|  | ||||
|    private: | ||||
|     int64_t input_ne[4]; | ||||
|     size_t input_stride[4]; | ||||
|  | ||||
|     int64_t *scale_ne; | ||||
|     size_t scale_stride[4]; | ||||
|  | ||||
|     int64_t output_ne[4]; | ||||
|     size_t output_stride[4]; | ||||
|  | ||||
|     int64_t group_size_in_row; | ||||
|  | ||||
|     int64_t ir; | ||||
|     int64_t dr; | ||||
|  | ||||
|     TPipe pipe; | ||||
|     GlobalTensor<SRC_T> input_gm; | ||||
|     GlobalTensor<half> scale_gm; | ||||
|     GlobalTensor<int8_t> output_gm; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||||
|     TQue<QuePosition::VECIN, BUFFER_NUM> work_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> max_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> min_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> scale_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> cast_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> int8_queue; | ||||
|     TQue<QuePosition::VECOUT, BUFFER_NUM> half_queue; | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) { | ||||
|     auto gm_ptr = (__gm__ uint8_t *)gm; | ||||
|     auto ub_ptr = (uint8_t *)(ub); | ||||
|     for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) { | ||||
|         *ub_ptr = *gm_ptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0( | ||||
|     GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|     GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|  | ||||
|     QUANTIZE_FLOAT_TO_Q4_0<half> op; | ||||
|     op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0( | ||||
|     GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, | ||||
|     GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { | ||||
|     int64_t input_ne_ub[4]; | ||||
|     size_t input_nb_ub[4]; | ||||
|     int64_t output_ne_ub[4]; | ||||
|  | ||||
|     copy_to_ub(input_ne_gm, input_ne_ub, 32); | ||||
|     copy_to_ub(input_nb_gm, input_nb_ub, 32); | ||||
|     copy_to_ub(output_ne_gm, output_ne_ub, 32); | ||||
|  | ||||
|     QUANTIZE_FLOAT_TO_Q4_0<float> op; | ||||
|     op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); | ||||
|     op.calculate(); | ||||
| } | ||||
|  | ||||
| #endif // #ifdef ASCEND_310P | ||||
							
								
								
									
										1853
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1853
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										342
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										342
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,342 @@ | ||||
| function(ggml_add_cpu_backend_variant_impl tag_name) | ||||
|     if (tag_name) | ||||
|         set(GGML_CPU_NAME ggml-cpu-${tag_name}) | ||||
|     else() | ||||
|         set(GGML_CPU_NAME ggml-cpu) | ||||
|     endif() | ||||
|  | ||||
|     ggml_add_backend_library(${GGML_CPU_NAME}) | ||||
|  | ||||
|     list (APPEND GGML_CPU_SOURCES | ||||
|         ggml-cpu/ggml-cpu.c | ||||
|         ggml-cpu/ggml-cpu.cpp | ||||
|         ggml-cpu/ggml-cpu-aarch64.cpp | ||||
|         ggml-cpu/ggml-cpu-aarch64.h | ||||
|         ggml-cpu/ggml-cpu-hbm.cpp | ||||
|         ggml-cpu/ggml-cpu-hbm.h | ||||
|         ggml-cpu/ggml-cpu-quants.c | ||||
|         ggml-cpu/ggml-cpu-quants.h | ||||
|         ggml-cpu/ggml-cpu-traits.cpp | ||||
|         ggml-cpu/ggml-cpu-traits.h | ||||
|         ggml-cpu/amx/amx.cpp | ||||
|         ggml-cpu/amx/amx.h | ||||
|         ggml-cpu/amx/mmq.cpp | ||||
|         ggml-cpu/amx/mmq.h | ||||
|         ggml-cpu/ggml-cpu-impl.h | ||||
|         ) | ||||
|  | ||||
|     target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) | ||||
|     target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) | ||||
|  | ||||
|     if (APPLE AND GGML_ACCELERATE) | ||||
|         find_library(ACCELERATE_FRAMEWORK Accelerate) | ||||
|         if (ACCELERATE_FRAMEWORK) | ||||
|             message(STATUS "Accelerate framework found") | ||||
|  | ||||
|             target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE) | ||||
|             target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK) | ||||
|             target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64) | ||||
|  | ||||
|             target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) | ||||
|         else() | ||||
|             message(WARNING "Accelerate framework not found") | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_OPENMP) | ||||
|         find_package(OpenMP) | ||||
|         if (OpenMP_FOUND) | ||||
|             target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP) | ||||
|  | ||||
|             target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) | ||||
|         else() | ||||
|             message(WARNING "OpenMP not found") | ||||
|         endif() | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_LLAMAFILE) | ||||
|         target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE) | ||||
|  | ||||
|         list(APPEND GGML_CPU_SOURCES | ||||
|                     ggml-cpu/llamafile/sgemm.cpp | ||||
|                     ggml-cpu/llamafile/sgemm.h) | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_CPU_HBM) | ||||
|         find_library(memkind memkind REQUIRED) | ||||
|  | ||||
|         message(STATUS "Using memkind for CPU HBM") | ||||
|  | ||||
|         target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM) | ||||
|  | ||||
|         target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind) | ||||
|     endif() | ||||
|  | ||||
|     if (CMAKE_OSX_ARCHITECTURES      STREQUAL "arm64" OR | ||||
|         CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR | ||||
|         (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|             CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) | ||||
|  | ||||
|         message(STATUS "ARM detected") | ||||
|  | ||||
|         if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||||
|             message(FATAL_ERROR "MSVC is not supported for ARM, use clang") | ||||
|         else() | ||||
|             check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) | ||||
|             if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") | ||||
|                 list(APPEND ARCH_FLAGS -mfp16-format=ieee) | ||||
|             endif() | ||||
|  | ||||
|             if (GGML_NATIVE) | ||||
|                 # -mcpu=native does not always enable all the features in some compilers, | ||||
|                 # so we check for them manually and enable them if available | ||||
|  | ||||
|                 execute_process( | ||||
|                     COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v - | ||||
|                     INPUT_FILE "/dev/null" | ||||
|                     OUTPUT_QUIET | ||||
|                     ERROR_VARIABLE ARM_MCPU | ||||
|                     RESULT_VARIABLE ARM_MCPU_RESULT | ||||
|                 ) | ||||
|                 if (NOT ARM_MCPU_RESULT) | ||||
|                     string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}") | ||||
|                 endif() | ||||
|                 if ("${ARM_MCPU_FLAG}" STREQUAL "") | ||||
|                     set(ARM_MCPU_FLAG -mcpu=native) | ||||
|                     message(STATUS "ARM -mcpu not found, -mcpu=native will be used") | ||||
|                 endif() | ||||
|  | ||||
|                 include(CheckCXXSourceRuns) | ||||
|  | ||||
|                 function(check_arm_feature tag code) | ||||
|                     set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||||
|                     set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}") | ||||
|                     check_cxx_source_runs( | ||||
|                         "${code}" | ||||
|                         GGML_MACHINE_SUPPORTS_${tag} | ||||
|                     ) | ||||
|                     if (GGML_MACHINE_SUPPORTS_${tag}) | ||||
|                         set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE) | ||||
|                     else() | ||||
|                         set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE) | ||||
|                     endif() | ||||
|                     set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||||
|                 endfunction() | ||||
|  | ||||
|                 check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }") | ||||
|                 check_arm_feature(i8mm    "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }") | ||||
|                 check_arm_feature(sve     "#include <arm_sve.h>\nint main()  { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }") | ||||
|  | ||||
|                 list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}") | ||||
|             else() | ||||
|                 if (GGML_CPU_ARM_ARCH) | ||||
|                     list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) | ||||
|                 endif() | ||||
|             endif() | ||||
|  | ||||
|             # show enabled features | ||||
|             if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") | ||||
|                 set(FEAT_INPUT_FILE "NUL") | ||||
|             else() | ||||
|                 set(FEAT_INPUT_FILE "/dev/null") | ||||
|             endif() | ||||
|  | ||||
|             execute_process( | ||||
|                 COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - | ||||
|                 INPUT_FILE ${FEAT_INPUT_FILE} | ||||
|                 OUTPUT_VARIABLE ARM_FEATURE | ||||
|                 RESULT_VARIABLE ARM_FEATURE_RESULT | ||||
|             ) | ||||
|             if (ARM_FEATURE_RESULT) | ||||
|                 message(WARNING "Failed to get ARM features") | ||||
|             else() | ||||
|                 foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC) | ||||
|                     string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos) | ||||
|                     if (NOT ${feature_pos} EQUAL -1) | ||||
|                         message(STATUS "ARM feature ${feature} enabled") | ||||
|                     endif() | ||||
|                 endforeach() | ||||
|             endif() | ||||
|         endif() | ||||
|     elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR | ||||
|             (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|             CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$")) | ||||
|  | ||||
|         message(STATUS "x86 detected") | ||||
|  | ||||
|         if (MSVC) | ||||
|             # instruction set detection for MSVC only | ||||
|             if (GGML_NATIVE) | ||||
|                 include(ggml-cpu/cmake/FindSIMD.cmake) | ||||
|             endif () | ||||
|             if (GGML_AVX512) | ||||
|                 list(APPEND ARCH_FLAGS /arch:AVX512) | ||||
|                 # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__ | ||||
|                 # MSVC has no compile-time flags enabling specific | ||||
|                 # AVX512 extensions, neither it defines the | ||||
|                 # macros corresponding to the extensions. | ||||
|                 # Do it manually. | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_AVX512) | ||||
|                 if (GGML_AVX512_VBMI) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AVX512VBMI__) | ||||
|                     if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||||
|                         list(APPEND ARCH_FLAGS -mavx512vbmi) | ||||
|                     endif() | ||||
|                 endif() | ||||
|                 if (GGML_AVX512_VNNI) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI) | ||||
|                     if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||||
|                         list(APPEND ARCH_FLAGS -mavx512vnni) | ||||
|                     endif() | ||||
|                 endif() | ||||
|                 if (GGML_AVX512_BF16) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16) | ||||
|                     if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||||
|                         list(APPEND ARCH_FLAGS -mavx512bf16) | ||||
|                     endif() | ||||
|                 endif() | ||||
|                 if (GGML_AMX_TILE) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE) | ||||
|                 endif() | ||||
|                 if (GGML_AMX_INT8) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8) | ||||
|                 endif() | ||||
|                 if (GGML_AMX_BF16) | ||||
|                     list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16) | ||||
|                 endif() | ||||
|             elseif (GGML_AVX2) | ||||
|                 list(APPEND ARCH_FLAGS /arch:AVX2) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C) | ||||
|             elseif (GGML_AVX) | ||||
|                 list(APPEND ARCH_FLAGS /arch:AVX) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||||
|             else () | ||||
|                 list(APPEND ARCH_FLAGS /arch:SSE4.2) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||
|             endif() | ||||
|             if (GGML_AVX_VNNI) | ||||
|                 list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI) | ||||
|             endif() | ||||
|         else () | ||||
|             if (GGML_NATIVE) | ||||
|                 list(APPEND ARCH_FLAGS -march=native) | ||||
|             else () | ||||
|                 list(APPEND ARCH_FLAGS -msse4.2) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||
|                 if (GGML_F16C) | ||||
|                     list(APPEND ARCH_FLAGS -mf16c) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_F16C) | ||||
|                 endif() | ||||
|                 if (GGML_FMA) | ||||
|                     list(APPEND ARCH_FLAGS -mfma) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_FMA) | ||||
|                 endif() | ||||
|                 if (GGML_AVX) | ||||
|                     list(APPEND ARCH_FLAGS -mavx) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||||
|                 endif() | ||||
|                 if (GGML_AVX2) | ||||
|                     list(APPEND ARCH_FLAGS -mavx2) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX2) | ||||
|                 endif() | ||||
|                 if (GGML_AVX_VNNI) | ||||
|                     list(APPEND ARCH_FLAGS -mavxvnni) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI) | ||||
|                 endif() | ||||
|                 if (GGML_AVX512) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512f) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512cd) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512vl) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512dq) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512bw) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX512) | ||||
|                 endif() | ||||
|                 if (GGML_AVX512_VBMI) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512vbmi) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI) | ||||
|                 endif() | ||||
|                 if (GGML_AVX512_VNNI) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512vnni) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI) | ||||
|                 endif() | ||||
|                 if (GGML_AVX512_BF16) | ||||
|                     list(APPEND ARCH_FLAGS -mavx512bf16) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16) | ||||
|                 endif() | ||||
|                 if (GGML_AMX_TILE) | ||||
|                     list(APPEND ARCH_FLAGS -mamx-tile) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE) | ||||
|                 endif() | ||||
|                 if (GGML_AMX_INT8) | ||||
|                     list(APPEND ARCH_FLAGS -mamx-int8) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8) | ||||
|                 endif() | ||||
|                 if (GGML_AMX_BF16) | ||||
|                     list(APPEND ARCH_FLAGS -mamx-bf16) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16) | ||||
|                 endif() | ||||
|             endif() | ||||
|         endif() | ||||
|     elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") | ||||
|         message(STATUS "PowerPC detected") | ||||
|         execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M) | ||||
|         if (${POWER_M} MATCHES "POWER10") | ||||
|             list(APPEND ARCH_FLAGS -mcpu=power10) | ||||
|         elseif (${POWER_M} MATCHES "POWER9") | ||||
|             list(APPEND ARCH_FLAGS -mcpu=power9) | ||||
|         elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") | ||||
|             list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native) | ||||
|         else() | ||||
|             list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native) | ||||
|         endif() | ||||
|     elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") | ||||
|         message(STATUS "loongarch64 detected") | ||||
|  | ||||
|         list(APPEND ARCH_FLAGS -march=loongarch64) | ||||
|         if (GGML_LASX) | ||||
|             list(APPEND ARCH_FLAGS -mlasx) | ||||
|         endif() | ||||
|         if (GGML_LSX) | ||||
|             list(APPEND ARCH_FLAGS -mlsx) | ||||
|         endif() | ||||
|     elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") | ||||
|         message(STATUS "RISC-V detected") | ||||
|         if (GGML_RVV) | ||||
|             list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d) | ||||
|         endif() | ||||
|     else() | ||||
|         message(STATUS "Unknown architecture") | ||||
|     endif() | ||||
|  | ||||
|     if (GGML_CPU_AARCH64) | ||||
|         target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64) | ||||
|     endif() | ||||
|  | ||||
|     message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}") | ||||
|     target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES}) | ||||
|     target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS}) | ||||
|     target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS}) | ||||
|  | ||||
|     if (GGML_BACKEND_DL) | ||||
|         if (GGML_NATIVE) | ||||
|             # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE | ||||
|             message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") | ||||
|         endif() | ||||
|  | ||||
|         # The feature detection code is compiled as a separate target so that | ||||
|         # it can be built without the architecture flags | ||||
|         # Since multiple variants of the CPU backend may be included in the same | ||||
|         # build, using set_source_files_properties() to set the arch flags is not possible | ||||
|         set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats) | ||||
|         add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp) | ||||
|         target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include) | ||||
|         target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS}) | ||||
|         target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) | ||||
|         set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
|         target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME}) | ||||
|     endif() | ||||
|  | ||||
|     if (EMSCRIPTEN) | ||||
|         set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128") | ||||
|     endif() | ||||
| endfunction() | ||||
							
								
								
									
										220
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,220 @@ | ||||
| #include "amx.h" | ||||
| #include "common.h" | ||||
| #include "mmq.h" | ||||
| #include "ggml-backend-impl.h" | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml-impl.h" | ||||
| #include "ggml-cpu.h" | ||||
| #include "ggml-cpu-traits.h" | ||||
|  | ||||
| #if defined(__gnu_linux__) | ||||
| #include <sys/syscall.h> | ||||
| #include <unistd.h> | ||||
| #endif | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
|  | ||||
| #if defined(__AMX_INT8__) && defined(__AVX512VNNI__) | ||||
|  | ||||
| // AMX type_trais | ||||
| namespace ggml::cpu::amx { | ||||
| class tensor_traits : public ggml::cpu::tensor_traits { | ||||
|     bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override { | ||||
|         size = ggml_backend_amx_desired_wsize(op); | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override { | ||||
|         if (op->op == GGML_OP_MUL_MAT) { | ||||
|             ggml_backend_amx_mul_mat(params, op); | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) { | ||||
|     static tensor_traits traits; | ||||
|     return &traits; | ||||
| } | ||||
| }  // namespace ggml::cpu::amx | ||||
|  | ||||
| // AMX buffer interface | ||||
| static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) { | ||||
|     free(buffer->context); | ||||
| } | ||||
|  | ||||
| static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) { | ||||
|     return (void *) (buffer->context); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { | ||||
|     tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor); | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, | ||||
|                                                   uint8_t value, size_t offset, size_t size) { | ||||
|     memset((char *) tensor->data + offset, value, size); | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, | ||||
|                                                const void * data, size_t offset, size_t size) { | ||||
|     if (qtype_has_amx_kernels(tensor->type)) { | ||||
|         GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type)); | ||||
|         ggml_backend_amx_convert_weight(tensor, data, offset, size); | ||||
|     } else { | ||||
|         memcpy((char *) tensor->data + offset, data, size); | ||||
|     } | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| /* | ||||
| // need to figure what we need to do with buffer->extra. | ||||
| static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { | ||||
|     GGML_ASSERT(!qtype_has_amx_kernels(tensor->type)); | ||||
|     memcpy(data, (const char *)tensor->data + offset, size); | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { | ||||
|     if (ggml_backend_buffer_is_host(src->buffer)) { | ||||
|         if (qtype_has_amx_kernels(src->type)) { | ||||
|             ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst)); | ||||
|         } else { | ||||
|             memcpy(dst->data, src->data, ggml_nbytes(src)); | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|     return false; | ||||
|  | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
| */ | ||||
|  | ||||
| static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { | ||||
|     memset(buffer->context, value, buffer->size); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = { | ||||
|     /* .free_buffer     = */ ggml_backend_amx_buffer_free_buffer, | ||||
|     /* .get_base        = */ ggml_backend_amx_buffer_get_base, | ||||
|     /* .init_tensor     = */ ggml_backend_amx_buffer_init_tensor, | ||||
|     /* .memset_tensor   = */ ggml_backend_amx_buffer_memset_tensor, | ||||
|     /* .set_tensor      = */ ggml_backend_amx_buffer_set_tensor, | ||||
|     /* .get_tensor      = */ nullptr, | ||||
|     /* .cpy_tensor      = */ nullptr, | ||||
|     /* .clear           = */ ggml_backend_amx_buffer_clear, | ||||
|     /* .reset           = */ nullptr, | ||||
| }; | ||||
|  | ||||
| static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | ||||
|     return "AMX"; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { | ||||
|     void * data = ggml_aligned_malloc(size); | ||||
|     if (data == NULL) { | ||||
|         fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size); | ||||
| } | ||||
|  | ||||
| static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { | ||||
|     return TENSOR_ALIGNMENT; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| namespace ggml::cpu::amx { | ||||
| class extra_buffer_type : ggml::cpu::extra_buffer_type { | ||||
|     bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { | ||||
|         // handle only 2d gemm for now | ||||
|         auto is_contiguous_2d = [](const struct ggml_tensor * t) { | ||||
|             return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1; | ||||
|         }; | ||||
|  | ||||
|         if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) &&  // src0 must be contiguous | ||||
|             is_contiguous_2d(op->src[1]) &&                               // src1 must be contiguous | ||||
|             op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() && | ||||
|             op->ne[0] % (TILE_N * 2) == 0 &&                              // out_features is 32x | ||||
|             (qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == GGML_TYPE_F16))) { | ||||
|             // src1 must be host buffer | ||||
|             if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) { | ||||
|                 return false; | ||||
|             } | ||||
|             // src1 must be float32 | ||||
|             if (op->src[1]->type == GGML_TYPE_F32) { | ||||
|                 return true; | ||||
|             } | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override { | ||||
|         if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer && | ||||
|             op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) { | ||||
|             return (ggml::cpu::tensor_traits *) op->src[0]->extra; | ||||
|         } | ||||
|  | ||||
|         return nullptr; | ||||
|     } | ||||
| }; | ||||
| }  // namespace ggml::cpu::amx | ||||
|  | ||||
| static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { | ||||
|     return ggml_backend_amx_get_alloc_size(tensor); | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| #define ARCH_GET_XCOMP_PERM     0x1022 | ||||
| #define ARCH_REQ_XCOMP_PERM     0x1023 | ||||
| #define XFEATURE_XTILECFG       17 | ||||
| #define XFEATURE_XTILEDATA      18 | ||||
|  | ||||
| static bool ggml_amx_init() { | ||||
| #if defined(__gnu_linux__) | ||||
|     if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) { | ||||
|         fprintf(stderr, "AMX is not ready to be used!\n"); | ||||
|         return false; | ||||
|     } | ||||
|     return true; | ||||
| #elif defined(_WIN32) | ||||
|     return true; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() { | ||||
|     static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = { | ||||
|         /* .iface = */ { | ||||
|                         /* .get_name         = */ ggml_backend_amx_buffer_type_get_name, | ||||
|                         /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer, | ||||
|                         /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment, | ||||
|                         /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX | ||||
|                         /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size, | ||||
|                         /* .is_host          = */ nullptr, | ||||
|                         }, | ||||
|         /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), | ||||
|         /* .context = */ new ggml::cpu::amx::extra_buffer_type(), | ||||
|     }; | ||||
|  | ||||
|     if (!ggml_amx_init()) { | ||||
|         return nullptr; | ||||
|     } | ||||
|  | ||||
|     return &ggml_backend_buffer_type_amx; | ||||
| } | ||||
|  | ||||
| #endif  // defined(__AMX_INT8__) && defined(__AVX512VNNI__) | ||||
							
								
								
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml-cpu-impl.h" | ||||
|  | ||||
| // GGML internal header | ||||
|  | ||||
| #if defined(__AMX_INT8__) && defined(__AVX512VNNI__) | ||||
| ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void); | ||||
| #endif | ||||
							
								
								
									
										91
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
| #include "ggml-cpu-impl.h" | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <memory> | ||||
| #include <type_traits> | ||||
|  | ||||
| #if defined(GGML_USE_OPENMP) | ||||
| #include <omp.h> | ||||
| #endif | ||||
|  | ||||
| #define TILE_M 16 | ||||
| #define TILE_N 16 | ||||
| #define TILE_K 32 | ||||
| #define VNNI_BLK 4 | ||||
|  | ||||
| #define AMX_BLK_SIZE 32 | ||||
|  | ||||
| #define TMM0 0 | ||||
| #define TMM1 1 | ||||
| #define TMM2 2 | ||||
| #define TMM3 3 | ||||
| #define TMM4 4 | ||||
| #define TMM5 5 | ||||
| #define TMM6 6 | ||||
| #define TMM7 7 | ||||
|  | ||||
| // parallel routines | ||||
| template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0> | ||||
| inline T div_up(T x, T y) { return (x + y - 1) / y; } | ||||
|  | ||||
| template <typename T> | ||||
| inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) { | ||||
| #if 0 | ||||
|     // onednn partition pattern | ||||
|     T& n_my = n_end; | ||||
|     if (nth <= 1 || n == 0) { | ||||
|         n_start = 0; | ||||
|         n_my = n; | ||||
|     } else { | ||||
|         T n1 = div_up(n, nth); | ||||
|         T n2 = n1 - 1; | ||||
|         T T1 = n - n2 * nth; | ||||
|         n_my = ith < T1 ? n1 : n2; | ||||
|         n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2; | ||||
|     } | ||||
|     n_end += n_start; | ||||
| #else | ||||
|     // pytorch aten partition pattern | ||||
|     T n_my = div_up(n, nth); | ||||
|     n_start = ith * n_my; | ||||
|     n_end = std::min(n_start + n_my, n); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| template <typename func_t> | ||||
| inline void parallel_for(int n, const func_t& f) { | ||||
| #if defined(GGML_USE_OPENMP) | ||||
| #pragma omp parallel | ||||
| { | ||||
|     int nth = omp_get_num_threads(); | ||||
|     int ith = omp_get_thread_num(); | ||||
|     int tbegin, tend; | ||||
|     balance211(n, nth, ith, tbegin, tend); | ||||
|     f(tbegin, tend); | ||||
| } | ||||
| #else | ||||
|     f(0, n); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| template <typename func_t> | ||||
| inline void parallel_for_ggml(const ggml_compute_params * params, int n, const func_t & f) { | ||||
|     int tbegin, tend; | ||||
|     balance211(n, params->nth, params->ith, tbegin, tend); | ||||
|     f(tbegin, tend); | ||||
| } | ||||
|  | ||||
| // quantized types that have AMX support | ||||
| inline bool qtype_has_amx_kernels(const enum ggml_type type) { | ||||
|     // TODO: fix padding for vnni format | ||||
|     return (type == GGML_TYPE_Q4_0) || | ||||
|         (type == GGML_TYPE_Q4_1) || | ||||
|         (type == GGML_TYPE_Q8_0) || | ||||
|         (type == GGML_TYPE_Q4_K) || | ||||
|         (type == GGML_TYPE_Q5_K) || | ||||
|         (type == GGML_TYPE_Q6_K) || | ||||
|         (type == GGML_TYPE_IQ4_XS); | ||||
| } | ||||
							
								
								
									
										2511
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2511
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										10
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| #pragma once | ||||
| #include "common.h" | ||||
|  | ||||
| size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst); | ||||
|  | ||||
| size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor); | ||||
|  | ||||
| void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||||
|  | ||||
| void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst); | ||||
							
								
								
									
										100
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | ||||
| include(CheckCSourceRuns) | ||||
|  | ||||
| set(AVX_CODE " | ||||
|     #include <immintrin.h> | ||||
|     int main() | ||||
|     { | ||||
|         __m256 a; | ||||
|         a = _mm256_set1_ps(0); | ||||
|         return 0; | ||||
|     } | ||||
| ") | ||||
|  | ||||
| set(AVX512_CODE " | ||||
|     #include <immintrin.h> | ||||
|     int main() | ||||
|     { | ||||
|         __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0, | ||||
|                                     0, 0, 0, 0, 0, 0, 0, 0); | ||||
|         __m512i b = a; | ||||
|         __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); | ||||
|         return 0; | ||||
|     } | ||||
| ") | ||||
|  | ||||
| set(AVX2_CODE " | ||||
|     #include <immintrin.h> | ||||
|     int main() | ||||
|     { | ||||
|         __m256i a = {0}; | ||||
|         a = _mm256_abs_epi16(a); | ||||
|         __m256i x; | ||||
|         _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code | ||||
|         return 0; | ||||
|     } | ||||
| ") | ||||
|  | ||||
| set(FMA_CODE " | ||||
|     #include <immintrin.h> | ||||
|     int main() | ||||
|     { | ||||
|         __m256 acc = _mm256_setzero_ps(); | ||||
|         const __m256 d = _mm256_setzero_ps(); | ||||
|         const __m256 p = _mm256_setzero_ps(); | ||||
|         acc = _mm256_fmadd_ps( d, p, acc ); | ||||
|         return 0; | ||||
|     } | ||||
| ") | ||||
|  | ||||
| macro(check_sse type flags) | ||||
|     set(__FLAG_I 1) | ||||
|     set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||||
|     foreach (__FLAG ${flags}) | ||||
|         if (NOT ${type}_FOUND) | ||||
|             set(CMAKE_REQUIRED_FLAGS ${__FLAG}) | ||||
|             check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I}) | ||||
|             if (HAS_${type}_${__FLAG_I}) | ||||
|                 set(${type}_FOUND TRUE CACHE BOOL "${type} support") | ||||
|                 set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") | ||||
|             endif() | ||||
|             math(EXPR __FLAG_I "${__FLAG_I}+1") | ||||
|         endif() | ||||
|     endforeach() | ||||
|     set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||||
|  | ||||
|     if (NOT ${type}_FOUND) | ||||
|         set(${type}_FOUND FALSE CACHE BOOL "${type} support") | ||||
|         set(${type}_FLAGS "" CACHE STRING "${type} flags") | ||||
|     endif() | ||||
|  | ||||
|     mark_as_advanced(${type}_FOUND ${type}_FLAGS) | ||||
| endmacro() | ||||
|  | ||||
| # flags are for MSVC only! | ||||
| check_sse("AVX" " ;/arch:AVX") | ||||
| if (NOT ${AVX_FOUND}) | ||||
|     set(GGML_AVX OFF) | ||||
| else() | ||||
|     set(GGML_AVX ON) | ||||
| endif() | ||||
|  | ||||
| check_sse("AVX2" " ;/arch:AVX2") | ||||
| check_sse("FMA" " ;/arch:AVX2") | ||||
| if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND})) | ||||
|     set(GGML_AVX2 OFF) | ||||
| else() | ||||
|     set(GGML_AVX2 ON) | ||||
| endif() | ||||
|  | ||||
| check_sse("AVX512" " ;/arch:AVX512") | ||||
| if (NOT ${AVX512_FOUND}) | ||||
|     set(GGML_AVX512 OFF) | ||||
| else() | ||||
|     set(GGML_AVX512 ON) | ||||
| endif() | ||||
							
								
								
									
										323
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										323
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,323 @@ | ||||
| #include "ggml-backend-impl.h" | ||||
|  | ||||
| #if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64)) | ||||
|  | ||||
| #ifdef _MSC_VER | ||||
| #include <intrin.h> | ||||
| #endif | ||||
|  | ||||
| #include <cstring> | ||||
| #include <vector> | ||||
| #include <bitset> | ||||
| #include <array> | ||||
| #include <string> | ||||
|  | ||||
| // ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf | ||||
| struct cpuid_x86 { | ||||
|     bool SSE3(void) { return f_1_ecx[0]; } | ||||
|     bool PCLMULQDQ(void) { return f_1_ecx[1]; } | ||||
|     bool MONITOR(void) { return f_1_ecx[3]; } | ||||
|     bool SSSE3(void) { return f_1_ecx[9]; } | ||||
|     bool FMA(void) { return f_1_ecx[12]; } | ||||
|     bool CMPXCHG16B(void) { return f_1_ecx[13]; } | ||||
|     bool SSE41(void) { return f_1_ecx[19]; } | ||||
|     bool SSE42(void) { return f_1_ecx[20]; } | ||||
|     bool MOVBE(void) { return f_1_ecx[22]; } | ||||
|     bool POPCNT(void) { return f_1_ecx[23]; } | ||||
|     bool AES(void) { return f_1_ecx[25]; } | ||||
|     bool XSAVE(void) { return f_1_ecx[26]; } | ||||
|     bool OSXSAVE(void) { return f_1_ecx[27]; } | ||||
|     bool AVX(void) { return f_1_ecx[28]; } | ||||
|     bool F16C(void) { return f_1_ecx[29]; } | ||||
|     bool RDRAND(void) { return f_1_ecx[30]; } | ||||
|  | ||||
|     bool MSR(void) { return f_1_edx[5]; } | ||||
|     bool CX8(void) { return f_1_edx[8]; } | ||||
|     bool SEP(void) { return f_1_edx[11]; } | ||||
|     bool CMOV(void) { return f_1_edx[15]; } | ||||
|     bool CLFSH(void) { return f_1_edx[19]; } | ||||
|     bool MMX(void) { return f_1_edx[23]; } | ||||
|     bool FXSR(void) { return f_1_edx[24]; } | ||||
|     bool SSE(void) { return f_1_edx[25]; } | ||||
|     bool SSE2(void) { return f_1_edx[26]; } | ||||
|  | ||||
|     bool FSGSBASE(void) { return f_7_ebx[0]; } | ||||
|     bool BMI1(void) { return f_7_ebx[3]; } | ||||
|     bool HLE(void) { return is_intel && f_7_ebx[4]; } | ||||
|     bool AVX2(void) { return f_7_ebx[5]; } | ||||
|     bool BMI2(void) { return f_7_ebx[8]; } | ||||
|     bool ERMS(void) { return f_7_ebx[9]; } | ||||
|     bool INVPCID(void) { return f_7_ebx[10]; } | ||||
|     bool RTM(void) { return is_intel && f_7_ebx[11]; } | ||||
|     bool AVX512F(void) { return f_7_ebx[16]; } | ||||
|     bool AVX512DQ(void) { return f_7_ebx[17]; } | ||||
|     bool RDSEED(void) { return f_7_ebx[18]; } | ||||
|     bool ADX(void) { return f_7_ebx[19]; } | ||||
|     bool AVX512PF(void) { return f_7_ebx[26]; } | ||||
|     bool AVX512ER(void) { return f_7_ebx[27]; } | ||||
|     bool AVX512CD(void) { return f_7_ebx[28]; } | ||||
|     bool AVX512BW(void) { return f_7_ebx[30]; } | ||||
|     bool AVX512VL(void) { return f_7_ebx[31]; } | ||||
|  | ||||
|     bool SHA(void) { return f_7_ebx[29]; } | ||||
|  | ||||
|     bool PREFETCHWT1(void) { return f_7_ecx[0]; } | ||||
|  | ||||
|     bool LAHF(void) { return f_81_ecx[0]; } | ||||
|     bool LZCNT(void) { return is_intel && f_81_ecx[5]; } | ||||
|     bool ABM(void) { return is_amd && f_81_ecx[5]; } | ||||
|     bool SSE4a(void) { return is_amd && f_81_ecx[6]; } | ||||
|     bool XOP(void) { return is_amd && f_81_ecx[11]; } | ||||
|     bool TBM(void) { return is_amd && f_81_ecx[21]; } | ||||
|  | ||||
|     bool SYSCALL(void) { return is_intel && f_81_edx[11]; } | ||||
|     bool MMXEXT(void) { return is_amd && f_81_edx[22]; } | ||||
|     bool RDTSCP(void) { return is_intel && f_81_edx[27]; } | ||||
|     bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; } | ||||
|     bool _3DNOW(void) { return is_amd && f_81_edx[31]; } | ||||
|  | ||||
|     bool AVX512_VBMI(void) { return f_7_ecx[1]; } | ||||
|     bool AVX512_VNNI(void) { return f_7_ecx[11]; } | ||||
|     bool AVX512_FP16(void) { return f_7_edx[23]; } | ||||
|     bool AVX512_BF16(void) { return f_7_1_eax[5]; } | ||||
|     bool AVX_VNNI(void) { return f_7_1_eax[4]; } | ||||
|  | ||||
|     bool AMX_TILE(void) { return f_7_edx[24]; } | ||||
|     bool AMX_INT8(void) { return f_7_edx[25]; } | ||||
|     bool AMX_FP16(void) { return f_7_1_eax[21]; } | ||||
|     bool AMX_BF16(void) { return f_7_edx[22]; } | ||||
|  | ||||
| #ifdef _MSC_VER | ||||
|     static void cpuid(int cpu_info[4], int eax) { | ||||
|         __cpuid(cpu_info, eax); | ||||
|     } | ||||
|     static void cpuidex(int cpu_info[4], int eax, int ecx) { | ||||
|         __cpuidex(cpu_info, eax, ecx); | ||||
|     } | ||||
| #else | ||||
|     static void cpuid(int cpu_info[4], int eax) { | ||||
|         __asm__ __volatile__( | ||||
|             "cpuid" | ||||
|             : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) | ||||
|             : "a"(eax), "c"(0)); | ||||
|     } | ||||
|     static void cpuidex(int cpu_info[4], int eax, int ecx) { | ||||
|         __asm__ __volatile__( | ||||
|             "cpuid" | ||||
|             : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) | ||||
|             : "a"(eax), "c"(ecx)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     cpuid_x86() { | ||||
|         std::array<int, 4> cpui; | ||||
|         std::vector<std::array<int, 4>> data; | ||||
|  | ||||
|         // calling __cpuid with 0x0 as the function_id argument | ||||
|         // gets the number of the highest valid function ID. | ||||
|         cpuid(cpui.data(), 0); | ||||
|         int n_ids = cpui[0]; | ||||
|  | ||||
|         for (int i = 0; i <= n_ids; ++i) { | ||||
|             cpuidex(cpui.data(), i, 0); | ||||
|             data.push_back(cpui); | ||||
|         } | ||||
|  | ||||
|         // capture vendor string | ||||
|         char vendor[0x20] = {}; | ||||
|         *reinterpret_cast<int *>(vendor)     = data[0][1]; | ||||
|         *reinterpret_cast<int *>(vendor + 4) = data[0][3]; | ||||
|         *reinterpret_cast<int *>(vendor + 8) = data[0][2]; | ||||
|         this->vendor = vendor; | ||||
|         if (this->vendor == "GenuineIntel") { | ||||
|             is_intel = true; | ||||
|         } else if (this->vendor == "AuthenticAMD") { | ||||
|             is_amd = true; | ||||
|         } | ||||
|  | ||||
|         // load bitset with flags for function 0x00000001 | ||||
|         if (n_ids >= 1) { | ||||
|             f_1_ecx = data[1][2]; | ||||
|             f_1_edx = data[1][3]; | ||||
|         } | ||||
|  | ||||
|         // load bitset with flags for function 0x00000007 | ||||
|         if (n_ids >= 7) { | ||||
|             f_7_ebx = data[7][1]; | ||||
|             f_7_ecx = data[7][2]; | ||||
|             f_7_edx = data[7][3]; | ||||
|             cpuidex(cpui.data(), 7, 1); | ||||
|             f_7_1_eax = cpui[0]; | ||||
|         } | ||||
|  | ||||
|         // calling __cpuid with 0x80000000 as the function_id argument | ||||
|         // gets the number of the highest valid extended ID. | ||||
|         cpuid(cpui.data(), 0x80000000); | ||||
|         unsigned int n_ex_ids = cpui[0]; | ||||
|  | ||||
|         std::vector<std::array<int, 4>> ext_data; | ||||
|         for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) { | ||||
|             cpuidex(cpui.data(), i, 0); | ||||
|             ext_data.push_back(cpui); | ||||
|         } | ||||
|  | ||||
|         // load bitset with flags for function 0x80000001 | ||||
|         if (n_ex_ids >= 0x80000001) { | ||||
|             f_81_ecx = ext_data[1][2]; | ||||
|             f_81_edx = ext_data[1][3]; | ||||
|         } | ||||
|  | ||||
|         // interpret CPU brand string if reported | ||||
|         char brand[0x40] = {}; | ||||
|         if (n_ex_ids >= 0x80000004) { | ||||
|             std::memcpy(brand, ext_data[2].data(), sizeof(cpui)); | ||||
|             std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui)); | ||||
|             std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui)); | ||||
|             this->brand = brand; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     bool is_intel = false; | ||||
|     bool is_amd = false; | ||||
|     std::string vendor; | ||||
|     std::string brand; | ||||
|     std::bitset<32> f_1_ecx; | ||||
|     std::bitset<32> f_1_edx; | ||||
|     std::bitset<32> f_7_ebx; | ||||
|     std::bitset<32> f_7_ecx; | ||||
|     std::bitset<32> f_7_edx; | ||||
|     std::bitset<32> f_7_1_eax; | ||||
|     std::bitset<32> f_81_ecx; | ||||
|     std::bitset<32> f_81_edx; | ||||
| }; | ||||
|  | ||||
| #if 0 | ||||
| void test_x86_is() { | ||||
|     cpuid_x86 is; | ||||
|     printf("CPU Vendor: %s\n", is.vendor.c_str()); | ||||
|     printf("Brand: %s\n", is.brand.c_str()); | ||||
|     printf("is_intel: %d\n", is.is_intel); | ||||
|     printf("is_amd: %d\n", is.is_amd); | ||||
|     printf("sse3: %d\n", is.SSE3()); | ||||
|     printf("pclmulqdq: %d\n", is.PCLMULQDQ()); | ||||
|     printf("ssse3: %d\n", is.SSSE3()); | ||||
|     printf("fma: %d\n", is.FMA()); | ||||
|     printf("cmpxchg16b: %d\n", is.CMPXCHG16B()); | ||||
|     printf("sse41: %d\n", is.SSE41()); | ||||
|     printf("sse42: %d\n", is.SSE42()); | ||||
|     printf("movbe: %d\n", is.MOVBE()); | ||||
|     printf("popcnt: %d\n", is.POPCNT()); | ||||
|     printf("aes: %d\n", is.AES()); | ||||
|     printf("xsave: %d\n", is.XSAVE()); | ||||
|     printf("osxsave: %d\n", is.OSXSAVE()); | ||||
|     printf("avx: %d\n", is.AVX()); | ||||
|     printf("f16c: %d\n", is.F16C()); | ||||
|     printf("rdrand: %d\n", is.RDRAND()); | ||||
|     printf("msr: %d\n", is.MSR()); | ||||
|     printf("cx8: %d\n", is.CX8()); | ||||
|     printf("sep: %d\n", is.SEP()); | ||||
|     printf("cmov: %d\n", is.CMOV()); | ||||
|     printf("clflush: %d\n", is.CLFSH()); | ||||
|     printf("mmx: %d\n", is.MMX()); | ||||
|     printf("fxsr: %d\n", is.FXSR()); | ||||
|     printf("sse: %d\n", is.SSE()); | ||||
|     printf("sse2: %d\n", is.SSE2()); | ||||
|     printf("fsgsbase: %d\n", is.FSGSBASE()); | ||||
|     printf("bmi1: %d\n", is.BMI1()); | ||||
|     printf("hle: %d\n", is.HLE()); | ||||
|     printf("avx2: %d\n", is.AVX2()); | ||||
|     printf("bmi2: %d\n", is.BMI2()); | ||||
|     printf("erms: %d\n", is.ERMS()); | ||||
|     printf("invpcid: %d\n", is.INVPCID()); | ||||
|     printf("rtm: %d\n", is.RTM()); | ||||
|     printf("avx512f: %d\n", is.AVX512F()); | ||||
|     printf("rdseed: %d\n", is.RDSEED()); | ||||
|     printf("adx: %d\n", is.ADX()); | ||||
|     printf("avx512pf: %d\n", is.AVX512PF()); | ||||
|     printf("avx512er: %d\n", is.AVX512ER()); | ||||
|     printf("avx512cd: %d\n", is.AVX512CD()); | ||||
|     printf("sha: %d\n", is.SHA()); | ||||
|     printf("prefetchwt1: %d\n", is.PREFETCHWT1()); | ||||
|     printf("lahf: %d\n", is.LAHF()); | ||||
|     printf("lzcnt: %d\n", is.LZCNT()); | ||||
|     printf("abm: %d\n", is.ABM()); | ||||
|     printf("sse4a: %d\n", is.SSE4a()); | ||||
|     printf("xop: %d\n", is.XOP()); | ||||
|     printf("tbm: %d\n", is.TBM()); | ||||
|     printf("syscall: %d\n", is.SYSCALL()); | ||||
|     printf("mmxext: %d\n", is.MMXEXT()); | ||||
|     printf("rdtscp: %d\n", is.RDTSCP()); | ||||
|     printf("3dnowext: %d\n", is._3DNOWEXT()); | ||||
|     printf("3dnow: %d\n", is._3DNOW()); | ||||
|     printf("avx512_vbmi: %d\n", is.AVX512_VBMI()); | ||||
|     printf("avx512_vnni: %d\n", is.AVX512_VNNI()); | ||||
|     printf("avx512_fp16: %d\n", is.AVX512_FP16()); | ||||
|     printf("avx512_bf16: %d\n", is.AVX512_BF16()); | ||||
|     printf("amx_tile: %d\n", is.AMX_TILE()); | ||||
|     printf("amx_int8: %d\n", is.AMX_INT8()); | ||||
|     printf("amx_fp16: %d\n", is.AMX_FP16()); | ||||
|     printf("amx_bf16: %d\n", is.AMX_BF16()); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| static int ggml_backend_cpu_x86_score() { | ||||
|     // FIXME: this does not check for OS support | ||||
|  | ||||
|     int score = 0; | ||||
|     cpuid_x86 is; | ||||
|  | ||||
| #ifdef GGML_FMA | ||||
|     if (!is.FMA()) { return 0; } | ||||
|     score += 1; | ||||
| #endif | ||||
| #ifdef GGML_F16C | ||||
|     if (!is.F16C()) { return 0; } | ||||
|     score += 1<<1; | ||||
| #endif | ||||
| #ifdef GGML_SSE42 | ||||
|     if (!is.SSE42()) { return 0; } | ||||
|     score += 1<<2; | ||||
| #endif | ||||
| #ifdef GGML_AVX | ||||
|     if (!is.AVX()) { return 0; } | ||||
|     score += 1<<4; | ||||
| #endif | ||||
| #ifdef GGML_AVX2 | ||||
|     if (!is.AVX2()) { return 0; } | ||||
|     score += 1<<5; | ||||
| #endif | ||||
| #ifdef GGML_AVX_VNNI | ||||
|     if (!is.AVX_VNNI()) { return 0; } | ||||
|     score += 1<<6; | ||||
| #endif | ||||
| #ifdef GGML_AVX512 | ||||
|     if (!is.AVX512F()) { return 0; } | ||||
|     if (!is.AVX512CD()) { return 0; } | ||||
|     if (!is.AVX512VL()) { return 0; } | ||||
|     if (!is.AVX512DQ()) { return 0; } | ||||
|     if (!is.AVX512BW()) { return 0; } | ||||
|     score += 1<<7; | ||||
| #endif | ||||
| #ifdef GGML_AVX512_VBMI | ||||
|     if (!is.AVX512_VBMI()) { return 0; } | ||||
|     score += 1<<8; | ||||
| #endif | ||||
| #ifdef GGML_AVX512_BF16 | ||||
|     if (!is.AVX512_BF16()) { return 0; } | ||||
|     score += 1<<9; | ||||
| #endif | ||||
| #ifdef GGML_AVX512_VNNI | ||||
|     if (!is.AVX512_VNNI()) { return 0; } | ||||
|     score += 1<<10; | ||||
| #endif | ||||
| #ifdef GGML_AMX_INT8 | ||||
|     if (!is.AMX_INT8()) { return 0; } | ||||
|     score += 1<<11; | ||||
| #endif | ||||
|  | ||||
|     return score; | ||||
| } | ||||
|  | ||||
| GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score) | ||||
|  | ||||
| #endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64)) | ||||
							
								
								
									
										4247
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										4247
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml-cpu-traits.h" | ||||
| #include "ggml.h" | ||||
|  | ||||
| // GGML internal header | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void); | ||||
							
								
								
									
										55
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| #ifdef GGML_USE_CPU_HBM | ||||
|  | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml-backend-impl.h" | ||||
| #include "ggml-cpu.h" | ||||
| #include "ggml-impl.h" | ||||
|  | ||||
| #include "ggml-cpu-hbm.h" | ||||
|  | ||||
| // buffer type HBM | ||||
|  | ||||
| #include <hbwmalloc.h> | ||||
|  | ||||
| static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | ||||
|     return "CPU_HBM"; | ||||
|  | ||||
|     GGML_UNUSED(buft); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) { | ||||
|     hbw_free(buffer->context); | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, | ||||
|                                                                            size_t                     size) { | ||||
|     void * ptr; | ||||
|     int    result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size); | ||||
|     if (result != 0) { | ||||
|         GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size); | ||||
|     buffer->buft                 = buft; | ||||
|     buffer->iface.free_buffer    = ggml_backend_cpu_hbm_buffer_free_buffer; | ||||
|  | ||||
|     return buffer; | ||||
| } | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) { | ||||
|     static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = { | ||||
|         /* .iface    = */ { | ||||
|                            /* .get_name         = */ ggml_backend_cpu_hbm_buffer_type_get_name, | ||||
|                            /* .alloc_buffer     = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, | ||||
|                            /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment, | ||||
|                            /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX | ||||
|                            /* .get_alloc_size   = */ nullptr,  // defaults to ggml_nbytes | ||||
|                            /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host, | ||||
|                            }, | ||||
|         /* .context  = */ nullptr, | ||||
|     }; | ||||
|  | ||||
|     return &ggml_backend_cpu_buffer_type_hbm; | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml-backend.h" | ||||
| #include "ggml.h" | ||||
|  | ||||
| // GGML CPU internal header | ||||
|  | ||||
| ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void); | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user