mirror of
https://github.com/laurent22/joplin.git
synced 2025-01-23 18:53:36 +02:00
9f5282c8f5
Co-authored-by: Laurent Cozic <laurent22@users.noreply.github.com>
62 lines
2.1 KiB
TypeScript
62 lines
2.1 KiB
TypeScript
import splitWhisperText from './splitWhisperText';
|
|
|
|
describe('splitWhisperText', () => {
|
|
test.each([
|
|
{
|
|
// Should trim at sentence breaks
|
|
input: '<|0.00|> This is a test. <|5.00|><|6.00|> This is another sentence. <|7.00|>',
|
|
recordingLength: 8,
|
|
expected: {
|
|
trimTo: 6,
|
|
dataBeforeTrim: '<|0.00|> This is a test. ',
|
|
dataAfterTrim: ' This is another sentence. <|7.00|>',
|
|
},
|
|
},
|
|
{
|
|
// Should prefer sentence break splits to non sentence break splits
|
|
input: '<|0.00|> This is <|4.00|><|4.50|> a test. <|5.00|><|5.50|> Testing, <|6.00|><|7.00|> this is a test. <|8.00|>',
|
|
recordingLength: 8,
|
|
expected: {
|
|
trimTo: 5.50,
|
|
dataBeforeTrim: '<|0.00|> This is <|4.00|><|4.50|> a test. ',
|
|
dataAfterTrim: ' Testing, <|6.00|><|7.00|> this is a test. <|8.00|>',
|
|
},
|
|
},
|
|
{
|
|
// Should avoid splitting for very small timestamps
|
|
input: '<|0.00|> This is a test. <|2.00|><|2.30|> Testing! <|3.00|>',
|
|
recordingLength: 4,
|
|
expected: {
|
|
trimTo: 0,
|
|
dataBeforeTrim: '',
|
|
dataAfterTrim: ' This is a test. <|2.00|><|2.30|> Testing! <|3.00|>',
|
|
},
|
|
},
|
|
{
|
|
// For larger timestamps, should allow splitting at pauses, even if not on sentence breaks.
|
|
input: '<|0.00|> This is a test, <|10.00|><|12.00|> of splitting on timestamps. <|15.00|>',
|
|
recordingLength: 16,
|
|
expected: {
|
|
trimTo: 12,
|
|
dataBeforeTrim: '<|0.00|> This is a test, ',
|
|
dataAfterTrim: ' of splitting on timestamps. <|15.00|>',
|
|
},
|
|
},
|
|
{
|
|
// Should prefer to break at the end, if a large gap after the last timestamp.
|
|
input: '<|0.00|> This is a test, <|10.00|><|12.00|> of splitting on timestamps. <|15.00|>',
|
|
recordingLength: 30,
|
|
expected: {
|
|
trimTo: 15,
|
|
dataBeforeTrim: '<|0.00|> This is a test, <|10.00|><|12.00|> of splitting on timestamps. ',
|
|
dataAfterTrim: '',
|
|
},
|
|
},
|
|
])('should prefer to split at the end of sentences (case %#)', ({ input, recordingLength, expected }) => {
|
|
const actual = splitWhisperText(input, recordingLength);
|
|
expect(actual.trimTo).toBeCloseTo(expected.trimTo);
|
|
expect(actual.dataBeforeTrim).toBe(expected.dataBeforeTrim);
|
|
expect(actual.dataAfterTrim).toBe(expected.dataAfterTrim);
|
|
});
|
|
});
|