1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Refactor loop and dataset handling; introduce Patchx method for emitter, add FindParent to loop table, and update DISTINCT logic with nested loop support. Expand integration tests for DISTINCT scenarios.

This commit is contained in:
Tim Voronov
2025-06-28 14:29:53 -04:00
parent 7dbcc24e04
commit e09f981560
5 changed files with 255 additions and 13 deletions

View File

@@ -27,6 +27,18 @@ func (e *Emitter) Position() int {
return len(e.instructions) - 1
}
func (e *Emitter) Patchx(pos int, arg int) {
current := e.instructions[pos]
e.instructions[pos] = vm.Instruction{
Opcode: current.Opcode,
Operands: [3]vm.Operand{
current.Operands[0],
vm.Operand(arg),
current.Operands[2],
},
}
}
// PatchSwapAB modifies an instruction at the given position to swap operands and update its operation and destination.
func (e *Emitter) PatchSwapAB(pos int, op vm.Opcode, dst, src1 vm.Operand) {
e.instructions[pos] = vm.Instruction{

View File

@@ -55,6 +55,18 @@ func (lt *LoopTable) Pop() *Loop {
return top
}
func (lt *LoopTable) FindParent(pos int) *Loop {
for i := pos - 1; i >= 0; i-- {
loop := lt.stack[i]
if loop.Allocate {
return loop
}
}
return nil
}
func (lt *LoopTable) Current() *Loop {
if len(lt.stack) == 0 {
return nil

View File

@@ -64,6 +64,19 @@ func (c *LoopCompiler) compileInitialization(ctx fql.IForExpressionContext) antl
loop.EmitInitialization(c.ctx.Registers, c.ctx.Emitter)
if !loop.Allocate {
// If the current loop must push distinct items, we must patch the dest dataset
if loop.Distinct {
parent := c.ctx.Loops.FindParent(c.ctx.Loops.Depth())
if parent == nil {
panic("parent loop not found in loop table")
}
c.ctx.Emitter.Patchx(parent.Pos, 1)
}
}
return returnRuleCtx
}

View File

@@ -2,6 +2,7 @@ package collections
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime"
"github.com/MontFerret/ferret/pkg/runtime/core"
)

View File

@@ -27,11 +27,11 @@ func TestForDistinct(t *testing.T) {
[]any{[]any{"foo"}, []any{"bar"}, []any{"qaz"}, []any{"abc"}},
),
CaseArray(`
LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"]
LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"]
FOR s IN strs
SORT s
RETURN DISTINCT s
FOR s IN strs
SORT s
RETURN DISTINCT s
`, []any{"abc", "bar", "foo", "qaz"}, "Should sort and respect DISTINCT keyword"),
CaseArray(
`
@@ -135,7 +135,7 @@ FOR s IN strs
map[string]any{"active": true},
}),
CaseArray(`
LET users = [
LET users = [
{
active: true,
age: 39,
@@ -161,16 +161,220 @@ LET users = [
married: true
}
]
FOR u IN users
COLLECT genderGroup = u.gender
AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
RETURN DISTINCT {
minAge,
maxAge
}
FOR u IN users
COLLECT genderGroup = u.gender
AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
RETURN DISTINCT {
minAge,
maxAge
}
`, []any{
map[string]any{"maxAge": 45, "minAge": 39},
}, "Should collect and aggregate values by a single key"),
// Test DISTINCT with null values
CaseArray(`
LET users = [
{
active: true,
age: null,
gender: "m"
},
{
active: true,
age: 25,
gender: "f"
},
{
active: true,
age: null,
gender: "m"
},
{
active: false,
age: 45,
gender: "m"
}
]
FOR u IN users
RETURN DISTINCT u.age
`, []any{nil, 25, 45}, "Should handle null values with DISTINCT"),
// Test DISTINCT with nested FOR loops
CaseArray(`
LET departments = ["IT", "Marketing", "HR"]
FOR dept IN departments
FOR gender IN ["m", "f"]
RETURN DISTINCT { department: dept, gender }
`, []any{
map[string]any{"department": "IT", "gender": "m"},
map[string]any{"department": "IT", "gender": "f"},
map[string]any{"department": "Marketing", "gender": "m"},
map[string]any{"department": "Marketing", "gender": "f"},
map[string]any{"department": "HR", "gender": "m"},
map[string]any{"department": "HR", "gender": "f"},
}, "Should handle DISTINCT with nested FOR loops"),
// Test DISTINCT with complex objects and nested properties
CaseArray(`
LET users = [
{
name: "John",
department: {
name: "IT",
location: "Building A"
}
},
{
name: "Jane",
department: {
name: "Marketing",
location: "Building B"
}
},
{
name: "Bob",
department: {
name: "IT",
location: "Building A"
}
},
{
name: "Alice",
department: {
name: "HR",
location: "Building B"
}
}
]
FOR u IN users
RETURN DISTINCT u.department
`, []any{
map[string]any{"name": "IT", "location": "Building A"},
map[string]any{"name": "Marketing", "location": "Building B"},
map[string]any{"name": "HR", "location": "Building B"},
}, "Should handle DISTINCT with complex objects and nested properties"),
// Test DISTINCT with calculated values
CaseArray(`
LET users = [
{ age: 25 },
{ age: 32 },
{ age: 45 },
{ age: 26 },
{ age: 31 }
]
FOR u IN users
RETURN DISTINCT FLOOR(u.age / 10) * 10
`, []any{20, 30, 40}, "Should handle DISTINCT with calculated values"),
// Test DISTINCT with empty arrays
CaseArray(`
LET emptyArray = []
FOR i IN emptyArray
RETURN DISTINCT i
`, []any{}, "Should handle DISTINCT with empty arrays"),
// Test DISTINCT with SORT BY multiple fields
CaseArray(`
LET users = [
{ name: "John", age: 30, gender: "m" },
{ name: "Jane", age: 25, gender: "f" },
{ name: "Bob", age: 30, gender: "m" },
{ name: "Alice", age: 35, gender: "f" },
{ name: "Mike", age: 25, gender: "m" }
]
FOR u IN users
SORT u.age DESC, u.gender
RETURN DISTINCT u.age
`, []any{35, 30, 25}, "Should handle DISTINCT with SORT BY multiple fields"),
// Test DISTINCT with multiple levels of nesting
CaseArray(`
LET departments = ["IT", "Marketing", "HR"]
LET genders = ["m", "f"]
LET statuses = ["active", "inactive"]
FOR dept IN departments
FOR gender IN genders
FOR status IN statuses
RETURN DISTINCT {
department: dept,
gender: gender
}
`, []any{
map[string]any{"department": "IT", "gender": "m"},
map[string]any{"department": "IT", "gender": "f"},
map[string]any{"department": "Marketing", "gender": "m"},
map[string]any{"department": "Marketing", "gender": "f"},
map[string]any{"department": "HR", "gender": "m"},
map[string]any{"department": "HR", "gender": "f"},
}, "Should handle DISTINCT with multiple levels of nesting"),
// Test DISTINCT with multiple levels of nesting
CaseArray(`
LET departments = ["IT", "Marketing", "HR"]
LET genders = ["m", "f"]
LET statuses = ["active", "inactive"]
FOR dept IN departments
SORT dept
FOR gender IN genders
SORT gender
FOR status IN statuses
SORT status
RETURN DISTINCT {
department: dept,
gender: gender
}
`, []any{
map[string]any{"department": "HR", "gender": "f"},
map[string]any{"department": "HR", "gender": "m"},
map[string]any{"department": "IT", "gender": "f"},
map[string]any{"department": "IT", "gender": "m"},
map[string]any{"department": "Marketing", "gender": "f"},
map[string]any{"department": "Marketing", "gender": "m"},
}, "Should handle DISTINCT with multiple levels of nesting with SORT"),
// Test DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT
CaseArray(`
LET users = [
{ name: "John", department: "IT", age: 30 },
{ name: "Jane", department: "Marketing", age: 25 },
{ name: "Bob", department: "IT", age: 40 },
{ name: "Alice", department: "HR", age: 35 },
{ name: "Mike", department: "Marketing", age: 45 }
]
FOR u IN users
COLLECT dept = u.department
AGGREGATE avgAge = AVERAGE(u.age)
RETURN DISTINCT {
department: dept,
ageCategory: avgAge > 35 ? "Senior" : "Junior"
}
`, []any{
map[string]any{"department": "HR", "ageCategory": "Junior"},
map[string]any{"department": "IT", "ageCategory": "Junior"},
map[string]any{"department": "Marketing", "ageCategory": "Junior"},
}, "Should handle DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT"),
// Test DISTINCT with array comparison and sorting
CaseArray(`
LET users = [
{ name: "John", skills: ["JavaScript", "Python"] },
{ name: "Jane", skills: ["Java", "C++"] },
{ name: "Bob", skills: ["JavaScript", "Python"] },
{ name: "Alice", skills: ["Python", "JavaScript"] }
]
FOR u IN users
SORT u.name
RETURN DISTINCT SORTED(u.skills)
`, []any{
[]any{"JavaScript", "Python"},
[]any{"C++", "Java"},
}, "Should handle DISTINCT with array comparison and sorting"),
})
}