1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-17 18:44:45 +02:00
joplin/packages/app-mobile/services/voiceTyping/utils/splitWhisperText.ts
Henry Heino 9f5282c8f5
Android: Allow switching the voice typing library to Whisper (#11158)
Co-authored-by: Laurent Cozic <laurent22@users.noreply.github.com>
2024-10-26 21:00:56 +01:00

66 lines
2.4 KiB
TypeScript

// Matches pairs of timestamps or single timestamps.
const timestampExp = /<\|(\d+\.\d*)\|>(?:<\|(\d+\.\d*)\|>)?/g;
const timestampMatchToNumber = (match: RegExpMatchArray) => {
const firstTimestamp = match[1];
const secondTimestamp = match[2];
// Prefer the second timestamp in the pair, to remove leading silence.
const timestamp = Number(secondTimestamp ? secondTimestamp : firstTimestamp);
// Should always be a finite number (i.e. not NaN)
if (!isFinite(timestamp)) throw new Error(`Timestamp match failed with ${match[0]}`);
return timestamp;
};
const splitWhisperText = (textWithTimestamps: string, recordingLengthSeconds: number) => {
const timestamps = [
...textWithTimestamps.matchAll(timestampExp),
].map(match => {
const timestamp = timestampMatchToNumber(match);
return { timestamp, match };
});
if (!timestamps.length) {
return { trimTo: 0, dataBeforeTrim: '', dataAfterTrim: textWithTimestamps };
}
const firstTimestamp = timestamps[0];
let breakAt = firstTimestamp;
const lastTimestamp = timestamps[timestamps.length - 1];
const hasLongPauseAfterData = lastTimestamp.timestamp + 4 < recordingLengthSeconds;
if (hasLongPauseAfterData) {
breakAt = lastTimestamp;
} else {
const textWithTimestampsContentLength = textWithTimestamps.trimEnd().length;
for (const timestampData of timestamps) {
const { match, timestamp } = timestampData;
const contentBefore = textWithTimestamps.substring(Math.max(match.index - 3, 0), match.index);
const isNearEndOfLatinSentence = contentBefore.match(/[.?!]/);
const isNearEndOfData = match.index + match[0].length >= textWithTimestampsContentLength;
// Use a heuristic to determine whether to move content from the preview to the document.
// These are based on the maximum buffer length of 30 seconds -- as the buffer gets longer, the
// data should be more likely to be broken into chunks. Where possible, the break should be near
// the end of a sentence:
const canBreak = (timestamp > 4 && isNearEndOfLatinSentence && !isNearEndOfData)
|| (timestamp > 8 && !isNearEndOfData)
|| timestamp > 16;
if (canBreak) {
breakAt = timestampData;
break;
}
}
}
const trimTo = breakAt.timestamp;
const dataBeforeTrim = textWithTimestamps.substring(0, breakAt.match.index);
const dataAfterTrim = textWithTimestamps.substring(breakAt.match.index + breakAt.match[0].length);
return { trimTo, dataBeforeTrim, dataAfterTrim };
};
export default splitWhisperText;