joplin/packages/app-mobile/services/voiceTyping/utils/splitWhisperText.ts

// Matches pairs of timestamps or single timestamps.
const timestampExp = /<\|(\d+\.\d*)\|>(?:<\|(\d+\.\d*)\|>)?/g;

const timestampMatchToNumber = (match: RegExpMatchArray) => {
	const firstTimestamp = match[1];
	const secondTimestamp = match[2];
	// Prefer the second timestamp in the pair, to remove leading silence.
	const timestamp = Number(secondTimestamp ? secondTimestamp : firstTimestamp);

	// Should always be a finite number (i.e. not NaN)
	if (!isFinite(timestamp)) throw new Error(`Timestamp match failed with ${match[0]}`);

	return timestamp;
};

const splitWhisperText = (textWithTimestamps: string, recordingLengthSeconds: number) => {
	const timestamps = [
		...textWithTimestamps.matchAll(timestampExp),
	].map(match => {
		const timestamp = timestampMatchToNumber(match);
		return { timestamp, match };
	});

	if (!timestamps.length) {
		return { trimTo: 0, dataBeforeTrim: '', dataAfterTrim: textWithTimestamps };
	}

	const firstTimestamp = timestamps[0];
	let breakAt = firstTimestamp;

	const lastTimestamp = timestamps[timestamps.length - 1];
	const hasLongPauseAfterData = lastTimestamp.timestamp + 4 < recordingLengthSeconds;
	if (hasLongPauseAfterData) {
		breakAt = lastTimestamp;
	} else {
		const textWithTimestampsContentLength = textWithTimestamps.trimEnd().length;

		for (const timestampData of timestamps) {
			const { match, timestamp } = timestampData;
			const contentBefore = textWithTimestamps.substring(Math.max(match.index - 3, 0), match.index);
			const isNearEndOfLatinSentence = contentBefore.match(/[.?!]/);
			const isNearEndOfData = match.index + match[0].length >= textWithTimestampsContentLength;

			// Use a heuristic to determine whether to move content from the preview to the document.
			// These are based on the maximum buffer length of 30 seconds -- as the buffer gets longer, the
			// data should be more likely to be broken into chunks. Where possible, the break should be near
			// the end of a sentence:
			const canBreak = (timestamp > 4 && isNearEndOfLatinSentence && !isNearEndOfData)
					|| (timestamp > 8 && !isNearEndOfData)
					|| timestamp > 16;
			if (canBreak) {
				breakAt = timestampData;
				break;
			}
		}
	}

	const trimTo = breakAt.timestamp;
	const dataBeforeTrim = textWithTimestamps.substring(0, breakAt.match.index);
	const dataAfterTrim = textWithTimestamps.substring(breakAt.match.index + breakAt.match[0].length);

	return { trimTo, dataBeforeTrim, dataAfterTrim };
};

export default splitWhisperText;