2025-02-27 10:31:13 -08:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <string>
|
2025-05-19 15:02:18 -07:00
|
|
|
#include <vector>
|
|
|
|
|
|
2025-02-27 10:31:13 -08:00
|
|
|
#include "whisper.h"
|
|
|
|
|
|
|
|
|
|
class WhisperSession {
|
|
|
|
|
public:
|
2025-03-21 11:00:38 -07:00
|
|
|
WhisperSession(const std::string& modelPath, std::string lang, std::string prompt, bool shortAudioContext);
|
2025-02-27 10:31:13 -08:00
|
|
|
~WhisperSession();
|
2025-03-21 11:00:38 -07:00
|
|
|
// Adds to the buffer
|
|
|
|
|
void addAudio(const float *pAudio, int sizeAudio);
|
|
|
|
|
// Returns the next finalized slice of audio (if any) and updates the preview.
|
|
|
|
|
std::string transcribeNextChunk();
|
|
|
|
|
// Transcribes all buffered audio data that hasn't been finalized yet
|
|
|
|
|
std::string transcribeAll();
|
2025-02-27 10:31:13 -08:00
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
whisper_full_params buildWhisperParams_();
|
|
|
|
|
std::string transcribe_(const std::vector<float>& audio, size_t samplesToTranscribe);
|
|
|
|
|
std::string splitAndTranscribeBefore_(int transcribeUpTo, int trimTo);
|
2025-03-21 11:00:38 -07:00
|
|
|
|
|
|
|
|
bool isBufferSilent_();
|
2025-02-27 10:31:13 -08:00
|
|
|
|
|
|
|
|
whisper_context *pContext_;
|
|
|
|
|
const std::string lang_;
|
|
|
|
|
const std::string prompt_;
|
2025-03-27 13:56:56 -07:00
|
|
|
const bool shortAudioContext_;
|
2025-02-27 10:31:13 -08:00
|
|
|
|
|
|
|
|
std::vector<float> audioBuffer_;
|
|
|
|
|
};
|
|
|
|
|
|