1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-15 20:02:56 +02:00

Remove obsolete TestCollectAggregateAdditional test file, refactor aggregation logic with skipping empty accumulators, add Length support to collectors and sorters, and update integration tests for grouping and array operations.

This commit is contained in:
Tim Voronov
2025-06-27 13:47:00 -04:00
parent 54c036e9c8
commit f48f819607
13 changed files with 252 additions and 239 deletions

View File

@@ -119,6 +119,18 @@ func (c *LoopCollectCompiler) compileAggregationFuncArgs(selectors []fql.ICollec
} }
func (c *LoopCollectCompiler) compileAggregationFuncCall(selectors []fql.ICollectAggregateSelectorContext, accumulator vm.Operand, argsPkg []int) { func (c *LoopCollectCompiler) compileAggregationFuncCall(selectors []fql.ICollectAggregateSelectorContext, accumulator vm.Operand, argsPkg []int) {
// Gets the number of records in the accumulator
cond := c.ctx.Registers.Allocate(core.Temp)
c.ctx.Emitter.EmitAB(vm.OpLength, cond, accumulator)
zero := loadConstant(c.ctx, runtime.ZeroInt)
// Check if the number equals to zero
c.ctx.Emitter.EmitEq(cond, cond, zero)
c.ctx.Registers.Free(zero)
// We skip the key retrieval and function call of there are no records in the accumulator
ifJump := c.ctx.Emitter.EmitJumpIfTrue(cond, core.JumpPlaceholder)
selectorVarRegs := make([]vm.Operand, len(selectors))
for i, selector := range selectors { for i, selector := range selectors {
argsNum := argsPkg[i] argsNum := argsPkg[i]
@@ -149,9 +161,20 @@ func (c *LoopCollectCompiler) compileAggregationFuncCall(selectors []fql.ICollec
// Since this temporary scope is only for aggregators and will be closed after the aggregation // Since this temporary scope is only for aggregators and will be closed after the aggregation
selectorVarName := selector.Identifier().GetText() selectorVarName := selector.Identifier().GetText()
varReg := c.ctx.Symbols.DeclareLocal(selectorVarName) varReg := c.ctx.Symbols.DeclareLocal(selectorVarName)
selectorVarRegs[i] = varReg
c.ctx.Emitter.EmitAB(vm.OpMove, varReg, result) c.ctx.Emitter.EmitAB(vm.OpMove, varReg, result)
c.ctx.Registers.Free(result) c.ctx.Registers.Free(result)
} }
elseJump := c.ctx.Emitter.EmitJump(core.JumpPlaceholder)
c.ctx.Emitter.PatchJumpNext(ifJump)
for _, varReg := range selectorVarRegs {
c.ctx.Emitter.EmitA(vm.OpLoadNone, varReg)
}
c.ctx.Emitter.PatchJumpNext(elseJump)
c.ctx.Registers.Free(cond)
} }
func (c *LoopCollectCompiler) loadAggregationArgKey(selector int, arg int) vm.Operand { func (c *LoopCollectCompiler) loadAggregationArgKey(selector int, arg int) vm.Operand {

View File

@@ -33,6 +33,10 @@ func (c *CounterCollector) Get(_ context.Context, _ runtime.Value) (runtime.Valu
return c.Value, nil return c.Value, nil
} }
func (c *CounterCollector) Length(_ context.Context) (runtime.Int, error) {
return 1, nil
}
func (c *CounterCollector) Close() error { func (c *CounterCollector) Close() error {
return nil return nil
} }

View File

@@ -64,12 +64,16 @@ func (c *KeyCollector) Get(ctx context.Context, key runtime.Value) (runtime.Valu
v, ok := c.grouping[k] v, ok := c.grouping[k]
if !ok { if !ok {
return runtime.None, runtime.ErrNotFound return runtime.None, runtime.Errorf(runtime.ErrNotFound, "collector key: %s", k)
} }
return v, nil return v, nil
} }
func (c *KeyCollector) Length(ctx context.Context) (runtime.Int, error) {
return c.Value.Length(ctx)
}
func (c *KeyCollector) Close() error { func (c *KeyCollector) Close() error {
val := c.Value val := c.Value
c.Value = nil c.Value = nil

View File

@@ -113,12 +113,16 @@ func (c *KeyCounterCollector) Get(ctx context.Context, key runtime.Value) (runti
v, ok := c.grouping[k] v, ok := c.grouping[k]
if !ok { if !ok {
return runtime.None, runtime.ErrNotFound return runtime.None, runtime.Errorf(runtime.ErrNotFound, "collector key: %s", k)
} }
return v, nil return v, nil
} }
func (c *KeyCounterCollector) Length(ctx context.Context) (runtime.Int, error) {
return c.Value.Length(ctx)
}
func (c *KeyCounterCollector) Close() error { func (c *KeyCounterCollector) Close() error {
val := c.Value val := c.Value
c.Value = nil c.Value = nil

View File

@@ -97,6 +97,10 @@ func (c *KeyGroupCollector) Get(ctx context.Context, key runtime.Value) (runtime
return v, nil return v, nil
} }
func (c *KeyGroupCollector) Length(ctx context.Context) (runtime.Int, error) {
return c.Value.Length(ctx)
}
func (c *KeyGroupCollector) Close() error { func (c *KeyGroupCollector) Close() error {
val := c.Value val := c.Value
c.Value = nil c.Value = nil

View File

@@ -63,6 +63,10 @@ func (s *Sorter) Get(_ context.Context, _ runtime.Value) (runtime.Value, error)
return runtime.None, runtime.ErrNotSupported return runtime.None, runtime.ErrNotSupported
} }
func (s *Sorter) Length(ctx context.Context) (runtime.Int, error) {
return s.Value.Length(ctx)
}
func (s *Sorter) Close() error { func (s *Sorter) Close() error {
val := s.Value val := s.Value
s.Value = nil s.Value = nil

View File

@@ -73,6 +73,10 @@ func (s *MultiSorter) Get(_ context.Context, _ runtime.Value) (runtime.Value, er
return runtime.None, runtime.ErrNotSupported return runtime.None, runtime.ErrNotSupported
} }
func (s *MultiSorter) Length(ctx context.Context) (runtime.Int, error) {
return s.Value.Length(ctx)
}
func (s *MultiSorter) Close() error { func (s *MultiSorter) Close() error {
val := s.Value val := s.Value
s.Value = nil s.Value = nil

View File

@@ -11,6 +11,7 @@ type Transformer interface {
runtime.Value runtime.Value
runtime.Iterable runtime.Iterable
runtime.Keyed runtime.Keyed
runtime.Measurable
io.Closer io.Closer
Add(ctx context.Context, key, value runtime.Value) error Add(ctx context.Context, key, value runtime.Value) error

View File

@@ -8,7 +8,6 @@ const (
OpJump OpJump
OpJumpIfFalse OpJumpIfFalse
OpJumpIfTrue OpJumpIfTrue
OpJumpIfEmpty
// Register Operations // Register Operations
OpMove // Move a value from register A to register B OpMove // Move a value from register A to register B
@@ -112,8 +111,6 @@ func (op Opcode) String() string {
return "JMPF" return "JMPF"
case OpJumpIfTrue: case OpJumpIfTrue:
return "JMPT" return "JMPT"
case OpJumpIfEmpty:
return "JMPE"
// Register Operations // Register Operations
case OpMove: case OpMove:

View File

@@ -84,23 +84,6 @@ loop:
if runtime.ToBoolean(reg[src1]) { if runtime.ToBoolean(reg[src1]) {
vm.pc = int(dst) vm.pc = int(dst)
} }
case OpJumpIfEmpty:
val, ok := reg[src1].(runtime.Measurable)
if ok {
size, err := val.Length(ctx)
if err != nil {
return nil, err
}
if size == 0 {
vm.pc = int(dst)
}
} else {
// If the value is not measurable, we consider it empty
vm.pc = int(dst)
}
case OpAdd: case OpAdd:
reg[dst] = internal.Add(ctx, reg[src1], reg[src2]) reg[dst] = internal.Add(ctx, reg[src1], reg[src2])
case OpSub: case OpSub:

View File

@@ -1,211 +0,0 @@
package vm_test
import (
"testing"
. "github.com/MontFerret/ferret/test/integration/base"
)
func TestCollectAggregateAdditional(t *testing.T) {
RunUseCases(t, []UseCase{
// Test 1: Multiple aggregation functions with complex expressions
// Test 2: Nested FOR loops with COLLECT AGGREGATE
// Test 3: Empty array handling
// Test 4: Null value handling
CaseArray(`
LET users = [
{
active: true,
age: null,
gender: "m",
married: true
},
{
active: true,
age: 25,
gender: "f",
married: false
},
{
active: true,
age: null,
gender: "m",
married: false
}
]
FOR u IN users
COLLECT gender = u.gender
AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
RETURN {
gender,
minAge,
maxAge
}
`, []any{
map[string]any{"gender": "f", "minAge": 25, "maxAge": 25},
map[string]any{"gender": "m", "minAge": nil, "maxAge": nil},
}, "Should handle null values in aggregation"),
// Test 5: Multiple grouping keys with aggregation
CaseArray(`
LET users = [
{
active: true,
age: 31,
gender: "m",
married: true,
department: "IT"
},
{
active: true,
age: 25,
gender: "f",
married: false,
department: "Marketing"
},
{
active: true,
age: 36,
gender: "m",
married: false,
department: "IT"
},
{
active: false,
age: 69,
gender: "m",
married: true,
department: "Management"
},
{
active: true,
age: 45,
gender: "f",
married: true,
department: "Marketing"
}
]
FOR u IN users
COLLECT
department = u.department,
gender = u.gender
AGGREGATE
minAge = MIN(u.age),
maxAge = MAX(u.age)
RETURN {
department,
gender,
minAge,
maxAge
}
`, []any{
map[string]any{"department": "IT", "gender": "m", "minAge": 31, "maxAge": 36},
map[string]any{"department": "Management", "gender": "m", "minAge": 69, "maxAge": 69},
map[string]any{"department": "Marketing", "gender": "f", "minAge": 25, "maxAge": 45},
}, "Should aggregate with multiple grouping keys"),
// Test 6: Aggregation with conditional expressions
CaseArray(`
LET users = [
{
active: true,
age: 31,
gender: "m",
married: true,
salary: 75000
},
{
active: true,
age: 25,
gender: "f",
married: false,
salary: 60000
},
{
active: true,
age: 36,
gender: "m",
married: false,
salary: 80000
},
{
active: false,
age: 69,
gender: "m",
married: true,
salary: 95000
},
{
active: true,
age: 45,
gender: "f",
married: true,
salary: 70000
}
]
FOR u IN users
COLLECT gender = u.gender
AGGREGATE
activeCount = SUM(u.active ? 1 : 0),
marriedCount = SUM(u.married ? 1 : 0),
highSalaryCount = SUM(u.salary > 70000 ? 1 : 0)
RETURN {
gender,
activeCount,
marriedCount,
highSalaryCount
}
`, []any{
map[string]any{
"gender": "f",
"activeCount": 2,
"marriedCount": 1,
"highSalaryCount": 0,
},
map[string]any{
"gender": "m",
"activeCount": 2,
"marriedCount": 2,
"highSalaryCount": 2,
},
}, "Should aggregate with conditional expressions"),
// Test 7: Aggregation with array operations
CaseArray(`
LET users = [
{
name: "John",
skills: ["JavaScript", "Python", "Go"]
},
{
name: "Jane",
skills: ["Java", "C++", "Python"]
},
{
name: "Bob",
skills: ["Go", "Rust"]
},
{
name: "Alice",
skills: ["JavaScript", "TypeScript"]
}
]
FOR u IN users
COLLECT AGGREGATE
allSkills = UNION(u.skills),
uniqueSkillCount = COUNT_DISTINCT(u.skills)
RETURN {
allSkills: SORTED(allSkills),
uniqueSkillCount
}
`, []any{
map[string]any{
"allSkills": []any{"C++", "Go", "Java", "JavaScript", "Python", "Rust", "TypeScript"},
"uniqueSkillCount": 7,
},
}, "Should aggregate with array operations"),
})
}

View File

@@ -9,14 +9,38 @@ import (
func TestCollectAggregate(t *testing.T) { func TestCollectAggregate(t *testing.T) {
RunUseCases(t, []UseCase{ RunUseCases(t, []UseCase{
CaseArray(` CaseArray(`
LET users = [] LET users = [
{
active: true,
age: null,
gender: "m",
married: true
},
{
active: true,
age: 25,
gender: "f",
married: false
},
{
active: true,
age: null,
gender: "m",
married: false
}
]
FOR u IN users FOR u IN users
COLLECT AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age) COLLECT gender = u.gender
AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
RETURN { RETURN {
gender,
minAge, minAge,
maxAge maxAge
} }
`, []any{}, "Should handle empty arrays gracefully"), `, []any{
map[string]any{"gender": "f", "minAge": 25, "maxAge": 25},
map[string]any{"gender": "m", "minAge": nil, "maxAge": nil},
}, "Should handle null values in aggregation"),
CaseArray(` CaseArray(`
LET users = [ LET users = [
{ {
@@ -63,6 +87,126 @@ FOR u IN users
map[string]any{"genderGroup": "f", "minAge": 25, "maxAge": 45}, map[string]any{"genderGroup": "f", "minAge": 25, "maxAge": 45},
map[string]any{"genderGroup": "m", "minAge": 31, "maxAge": 69}, map[string]any{"genderGroup": "m", "minAge": 31, "maxAge": 69},
}, "Should collect and aggregate values by a single key"), }, "Should collect and aggregate values by a single key"),
CaseArray(`
LET users = [
{
active: true,
age: 31,
gender: "m",
married: true,
department: "IT"
},
{
active: true,
age: 25,
gender: "f",
married: false,
department: "Marketing"
},
{
active: true,
age: 36,
gender: "m",
married: false,
department: "IT"
},
{
active: false,
age: 69,
gender: "m",
married: true,
department: "Management"
},
{
active: true,
age: 45,
gender: "f",
married: true,
department: "Marketing"
}
]
FOR u IN users
COLLECT
department = u.department,
gender = u.gender
AGGREGATE
minAge = MIN(u.age),
maxAge = MAX(u.age)
RETURN {
department,
gender,
minAge,
maxAge
}
`, []any{
map[string]any{"department": "IT", "gender": "m", "minAge": 31, "maxAge": 36},
map[string]any{"department": "Management", "gender": "m", "minAge": 69, "maxAge": 69},
map[string]any{"department": "Marketing", "gender": "f", "minAge": 25, "maxAge": 45},
}, "Should aggregate with multiple grouping keys"),
CaseArray(`
LET users = [
{
active: true,
age: 31,
gender: "m",
married: true,
salary: 75000
},
{
active: true,
age: 25,
gender: "f",
married: false,
salary: 60000
},
{
active: true,
age: 36,
gender: "m",
married: false,
salary: 80000
},
{
active: false,
age: 69,
gender: "m",
married: true,
salary: 95000
},
{
active: true,
age: 45,
gender: "f",
married: true,
salary: 70000
}
]
FOR u IN users
COLLECT gender = u.gender
AGGREGATE
activeCount = SUM(u.active ? 1 : 0),
marriedCount = SUM(u.married ? 1 : 0),
highSalaryCount = SUM(u.salary > 70000 ? 1 : 0)
RETURN {
gender,
activeCount,
marriedCount,
highSalaryCount
}
`, []any{
map[string]any{
"gender": "f",
"activeCount": 2,
"marriedCount": 1,
"highSalaryCount": 0,
},
map[string]any{
"gender": "m",
"activeCount": 2,
"marriedCount": 2,
"highSalaryCount": 2,
},
}, "Should aggregate with conditional expressions"),
CaseArray(` CaseArray(`
LET users = [ LET users = [
{ {
@@ -102,9 +246,20 @@ FOR u IN users
minAge, minAge,
maxAge maxAge
} }
`, []any{ `,
map[string]any{"minAge": 25, "maxAge": 69}, []any{map[string]any{"minAge": 25, "maxAge": 69}},
}, "Should collect and aggregate values without grouping"), "Should collect and aggregate values without grouping"),
CaseArray(`
LET users = []
FOR u IN users
COLLECT AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
RETURN {
minAge,
maxAge
}
`,
[]any{map[string]any{"minAge": nil, "maxAge": nil}},
"Should handle empty arrays gracefully"),
CaseArray(` CaseArray(`
LET users = [ LET users = [
{ {
@@ -322,5 +477,38 @@ FOR u IN users
"employeeCount": 2, "employeeCount": 2,
}, },
}, "Should aggregate multiple values with complex expressions"), }, "Should aggregate multiple values with complex expressions"),
CaseArray(`
LET users = [
{
name: "John",
skills: ["JavaScript", "Python", "Go"]
},
{
name: "Jane",
skills: ["Java", "C++", "Python"]
},
{
name: "Bob",
skills: ["Go", "Rust"]
},
{
name: "Alice",
skills: ["JavaScript", "TypeScript"]
}
]
FOR u IN users
COLLECT AGGREGATE
allSkills = UNION(u.skills),
uniqueSkillCount = COUNT_DISTINCT(u.skills)
RETURN {
allSkills: SORTED(allSkills),
uniqueSkillCount
}
`, []any{
map[string]any{
"allSkills": []any{"C++", "Go", "Java", "JavaScript", "Python", "Rust", "TypeScript"},
"uniqueSkillCount": 7,
},
}, "Should aggregate with array operations"),
}) })
} }

View File

@@ -87,6 +87,14 @@ func TestForCollect(t *testing.T) {
RETURN {x, gender} RETURN {x, gender}
`, "Should not have access to variables defined before COLLECT"), `, "Should not have access to variables defined before COLLECT"),
CaseArray(` CaseArray(`
LET users = []
FOR i IN users
COLLECT gender = i.gender
RETURN gender
`,
[]any{},
"Should handle empty arrays gracefully"),
CaseArray(`
LET users = [ LET users = [
{ {
active: true, active: true,