diff --git a/pkg/compiler/internal/core/emitter.go b/pkg/compiler/internal/core/emitter.go index 05b273ed..d7afbf72 100644 --- a/pkg/compiler/internal/core/emitter.go +++ b/pkg/compiler/internal/core/emitter.go @@ -27,6 +27,18 @@ func (e *Emitter) Position() int { return len(e.instructions) - 1 } +func (e *Emitter) Patchx(pos int, arg int) { + current := e.instructions[pos] + e.instructions[pos] = vm.Instruction{ + Opcode: current.Opcode, + Operands: [3]vm.Operand{ + current.Operands[0], + vm.Operand(arg), + current.Operands[2], + }, + } +} + // PatchSwapAB modifies an instruction at the given position to swap operands and update its operation and destination. func (e *Emitter) PatchSwapAB(pos int, op vm.Opcode, dst, src1 vm.Operand) { e.instructions[pos] = vm.Instruction{ diff --git a/pkg/compiler/internal/core/loops.go b/pkg/compiler/internal/core/loops.go index a5ed8e4d..b0c6011f 100644 --- a/pkg/compiler/internal/core/loops.go +++ b/pkg/compiler/internal/core/loops.go @@ -55,6 +55,18 @@ func (lt *LoopTable) Pop() *Loop { return top } +func (lt *LoopTable) FindParent(pos int) *Loop { + for i := pos - 1; i >= 0; i-- { + loop := lt.stack[i] + + if loop.Allocate { + return loop + } + } + + return nil +} + func (lt *LoopTable) Current() *Loop { if len(lt.stack) == 0 { return nil diff --git a/pkg/compiler/internal/loop.go b/pkg/compiler/internal/loop.go index 504f8a59..3a3af1a5 100644 --- a/pkg/compiler/internal/loop.go +++ b/pkg/compiler/internal/loop.go @@ -64,6 +64,19 @@ func (c *LoopCompiler) compileInitialization(ctx fql.IForExpressionContext) antl loop.EmitInitialization(c.ctx.Registers, c.ctx.Emitter) + if !loop.Allocate { + // If the current loop must push distinct items, we must patch the dest dataset + if loop.Distinct { + parent := c.ctx.Loops.FindParent(c.ctx.Loops.Depth()) + + if parent == nil { + panic("parent loop not found in loop table") + } + + c.ctx.Emitter.Patchx(parent.Pos, 1) + } + } + return returnRuleCtx } diff --git a/pkg/stdlib/collections/count_distinct.go b/pkg/stdlib/collections/count_distinct.go index f7c4958f..9baa5e9b 100644 --- a/pkg/stdlib/collections/count_distinct.go +++ b/pkg/stdlib/collections/count_distinct.go @@ -2,6 +2,7 @@ package collections import ( "context" + "github.com/MontFerret/ferret/pkg/runtime" "github.com/MontFerret/ferret/pkg/runtime/core" ) diff --git a/test/integration/vm/vm_for_distinct_test.go b/test/integration/vm/vm_for_distinct_test.go index 746b9ae9..e9113eb2 100644 --- a/test/integration/vm/vm_for_distinct_test.go +++ b/test/integration/vm/vm_for_distinct_test.go @@ -27,11 +27,11 @@ func TestForDistinct(t *testing.T) { []any{[]any{"foo"}, []any{"bar"}, []any{"qaz"}, []any{"abc"}}, ), CaseArray(` -LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"] + LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"] -FOR s IN strs - SORT s - RETURN DISTINCT s + FOR s IN strs + SORT s + RETURN DISTINCT s `, []any{"abc", "bar", "foo", "qaz"}, "Should sort and respect DISTINCT keyword"), CaseArray( ` @@ -135,7 +135,7 @@ FOR s IN strs map[string]any{"active": true}, }), CaseArray(` -LET users = [ + LET users = [ { active: true, age: 39, @@ -161,16 +161,220 @@ LET users = [ married: true } ] -FOR u IN users - COLLECT genderGroup = u.gender - AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age) - - RETURN DISTINCT { - minAge, - maxAge - } + FOR u IN users + COLLECT genderGroup = u.gender + AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age) + + RETURN DISTINCT { + minAge, + maxAge + } `, []any{ map[string]any{"maxAge": 45, "minAge": 39}, }, "Should collect and aggregate values by a single key"), + // Test DISTINCT with null values + CaseArray(` + LET users = [ + { + active: true, + age: null, + gender: "m" + }, + { + active: true, + age: 25, + gender: "f" + }, + { + active: true, + age: null, + gender: "m" + }, + { + active: false, + age: 45, + gender: "m" + } + ] + FOR u IN users + RETURN DISTINCT u.age + `, []any{nil, 25, 45}, "Should handle null values with DISTINCT"), + + // Test DISTINCT with nested FOR loops + CaseArray(` + LET departments = ["IT", "Marketing", "HR"] + + FOR dept IN departments + FOR gender IN ["m", "f"] + RETURN DISTINCT { department: dept, gender } + `, []any{ + map[string]any{"department": "IT", "gender": "m"}, + map[string]any{"department": "IT", "gender": "f"}, + map[string]any{"department": "Marketing", "gender": "m"}, + map[string]any{"department": "Marketing", "gender": "f"}, + map[string]any{"department": "HR", "gender": "m"}, + map[string]any{"department": "HR", "gender": "f"}, + }, "Should handle DISTINCT with nested FOR loops"), + + // Test DISTINCT with complex objects and nested properties + CaseArray(` + LET users = [ + { + name: "John", + department: { + name: "IT", + location: "Building A" + } + }, + { + name: "Jane", + department: { + name: "Marketing", + location: "Building B" + } + }, + { + name: "Bob", + department: { + name: "IT", + location: "Building A" + } + }, + { + name: "Alice", + department: { + name: "HR", + location: "Building B" + } + } + ] + FOR u IN users + RETURN DISTINCT u.department + `, []any{ + map[string]any{"name": "IT", "location": "Building A"}, + map[string]any{"name": "Marketing", "location": "Building B"}, + map[string]any{"name": "HR", "location": "Building B"}, + }, "Should handle DISTINCT with complex objects and nested properties"), + + // Test DISTINCT with calculated values + CaseArray(` + LET users = [ + { age: 25 }, + { age: 32 }, + { age: 45 }, + { age: 26 }, + { age: 31 } + ] + FOR u IN users + RETURN DISTINCT FLOOR(u.age / 10) * 10 + `, []any{20, 30, 40}, "Should handle DISTINCT with calculated values"), + + // Test DISTINCT with empty arrays + CaseArray(` + LET emptyArray = [] + FOR i IN emptyArray + RETURN DISTINCT i + `, []any{}, "Should handle DISTINCT with empty arrays"), + + // Test DISTINCT with SORT BY multiple fields + CaseArray(` + LET users = [ + { name: "John", age: 30, gender: "m" }, + { name: "Jane", age: 25, gender: "f" }, + { name: "Bob", age: 30, gender: "m" }, + { name: "Alice", age: 35, gender: "f" }, + { name: "Mike", age: 25, gender: "m" } + ] + FOR u IN users + SORT u.age DESC, u.gender + RETURN DISTINCT u.age + `, []any{35, 30, 25}, "Should handle DISTINCT with SORT BY multiple fields"), + + // Test DISTINCT with multiple levels of nesting + CaseArray(` + LET departments = ["IT", "Marketing", "HR"] + LET genders = ["m", "f"] + LET statuses = ["active", "inactive"] + + FOR dept IN departments + FOR gender IN genders + FOR status IN statuses + RETURN DISTINCT { + department: dept, + gender: gender + } + `, []any{ + map[string]any{"department": "IT", "gender": "m"}, + map[string]any{"department": "IT", "gender": "f"}, + map[string]any{"department": "Marketing", "gender": "m"}, + map[string]any{"department": "Marketing", "gender": "f"}, + map[string]any{"department": "HR", "gender": "m"}, + map[string]any{"department": "HR", "gender": "f"}, + }, "Should handle DISTINCT with multiple levels of nesting"), + + // Test DISTINCT with multiple levels of nesting + CaseArray(` + LET departments = ["IT", "Marketing", "HR"] + LET genders = ["m", "f"] + LET statuses = ["active", "inactive"] + + FOR dept IN departments + SORT dept + FOR gender IN genders + SORT gender + FOR status IN statuses + SORT status + RETURN DISTINCT { + department: dept, + gender: gender + } + `, []any{ + map[string]any{"department": "HR", "gender": "f"}, + map[string]any{"department": "HR", "gender": "m"}, + map[string]any{"department": "IT", "gender": "f"}, + map[string]any{"department": "IT", "gender": "m"}, + map[string]any{"department": "Marketing", "gender": "f"}, + map[string]any{"department": "Marketing", "gender": "m"}, + }, "Should handle DISTINCT with multiple levels of nesting with SORT"), + + // Test DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT + CaseArray(` + LET users = [ + { name: "John", department: "IT", age: 30 }, + { name: "Jane", department: "Marketing", age: 25 }, + { name: "Bob", department: "IT", age: 40 }, + { name: "Alice", department: "HR", age: 35 }, + { name: "Mike", department: "Marketing", age: 45 } + ] + + FOR u IN users + COLLECT dept = u.department + AGGREGATE avgAge = AVERAGE(u.age) + RETURN DISTINCT { + department: dept, + ageCategory: avgAge > 35 ? "Senior" : "Junior" + } + `, []any{ + map[string]any{"department": "HR", "ageCategory": "Junior"}, + map[string]any{"department": "IT", "ageCategory": "Junior"}, + map[string]any{"department": "Marketing", "ageCategory": "Junior"}, + }, "Should handle DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT"), + + // Test DISTINCT with array comparison and sorting + CaseArray(` + LET users = [ + { name: "John", skills: ["JavaScript", "Python"] }, + { name: "Jane", skills: ["Java", "C++"] }, + { name: "Bob", skills: ["JavaScript", "Python"] }, + { name: "Alice", skills: ["Python", "JavaScript"] } + ] + + FOR u IN users + SORT u.name + RETURN DISTINCT SORTED(u.skills) + `, []any{ + []any{"JavaScript", "Python"}, + []any{"C++", "Java"}, + }, "Should handle DISTINCT with array comparison and sorting"), }) }