Refactor loop and dataset handling; introduce Patchx method for emitter, add FindParent to loop table, and update DISTINCT logic with nested loop support. Expand integration tests for DISTINCT scenarios.

2025-08-13 19:52:52 +02:00 · 2025-06-28 14:29:53 -04:00
parent 7dbcc24e04
commit e09f981560
5 changed files with 255 additions and 13 deletions
--- a/pkg/compiler/internal/core/emitter.go
+++ b/pkg/compiler/internal/core/emitter.go
@@ -27,6 +27,18 @@ func (e *Emitter) Position() int {
 	return len(e.instructions) - 1
 }

+func (e *Emitter) Patchx(pos int, arg int) {
+	current := e.instructions[pos]
+	e.instructions[pos] = vm.Instruction{
+		Opcode: current.Opcode,
+		Operands: [3]vm.Operand{
+			current.Operands[0],
+			vm.Operand(arg),
+			current.Operands[2],
+		},
+	}
+}
+
 // PatchSwapAB modifies an instruction at the given position to swap operands and update its operation and destination.
 func (e *Emitter) PatchSwapAB(pos int, op vm.Opcode, dst, src1 vm.Operand) {
 	e.instructions[pos] = vm.Instruction{
--- a/pkg/compiler/internal/core/loops.go
+++ b/pkg/compiler/internal/core/loops.go
@@ -55,6 +55,18 @@ func (lt *LoopTable) Pop() *Loop {
 	return top
 }

+func (lt *LoopTable) FindParent(pos int) *Loop {
+	for i := pos - 1; i >= 0; i-- {
+		loop := lt.stack[i]
+
+		if loop.Allocate {
+			return loop
+		}
+	}
+
+	return nil
+}
+
 func (lt *LoopTable) Current() *Loop {
 	if len(lt.stack) == 0 {
 		return nil
--- a/pkg/compiler/internal/loop.go
+++ b/pkg/compiler/internal/loop.go
@@ -64,6 +64,19 @@ func (c *LoopCompiler) compileInitialization(ctx fql.IForExpressionContext) antl

 	loop.EmitInitialization(c.ctx.Registers, c.ctx.Emitter)

+	if !loop.Allocate {
+		// If the current loop must push distinct items, we must patch the dest dataset
+		if loop.Distinct {
+			parent := c.ctx.Loops.FindParent(c.ctx.Loops.Depth())
+
+			if parent == nil {
+				panic("parent loop not found in loop table")
+			}
+
+			c.ctx.Emitter.Patchx(parent.Pos, 1)
+		}
+	}
+
 	return returnRuleCtx
 }

--- a/pkg/stdlib/collections/count_distinct.go
+++ b/pkg/stdlib/collections/count_distinct.go
@@ -2,6 +2,7 @@ package collections

 import (
 	"context"
+
 	"github.com/MontFerret/ferret/pkg/runtime"
 	"github.com/MontFerret/ferret/pkg/runtime/core"
 )
--- a/test/integration/vm/vm_for_distinct_test.go
+++ b/test/integration/vm/vm_for_distinct_test.go
@@ -27,11 +27,11 @@ func TestForDistinct(t *testing.T) {
 			[]any{[]any{"foo"}, []any{"bar"}, []any{"qaz"}, []any{"abc"}},
 		),
 		CaseArray(`
-LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"]
+			LET strs = ["foo", "bar", "qaz", "foo", "abc", "bar"]

-FOR s IN strs
-	SORT s
-	RETURN DISTINCT s
+			FOR s IN strs
+				SORT s
+				RETURN DISTINCT s
 `, []any{"abc", "bar", "foo", "qaz"}, "Should sort and respect DISTINCT keyword"),
 		CaseArray(
 			`
@@ -135,7 +135,7 @@ FOR s IN strs
 				map[string]any{"active": true},
 			}),
 		CaseArray(`
-LET users = [
+			LET users = [
 				{
 					active: true,
 					age: 39,
@@ -161,16 +161,220 @@ LET users = [
 					married: true
 				}
 			]
-FOR u IN users
-  COLLECT genderGroup = u.gender
-   AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
-
-  RETURN DISTINCT {
-	minAge,
-    maxAge
-  }
+			FOR u IN users
+			  COLLECT genderGroup = u.gender
+			   AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age)
+			
+			  RETURN DISTINCT {
+				minAge,
+				maxAge
+			  }
 `, []any{
 			map[string]any{"maxAge": 45, "minAge": 39},
 		}, "Should collect and aggregate values by a single key"),
+		// Test DISTINCT with null values
+		CaseArray(`
+			LET users = [
+				{
+					active: true,
+					age: null,
+					gender: "m"
+				},
+				{
+					active: true,
+					age: 25,
+					gender: "f"
+				},
+				{
+					active: true,
+					age: null,
+					gender: "m"
+				},
+				{
+					active: false,
+					age: 45,
+					gender: "m"
+				}
+			]
+			FOR u IN users
+				RETURN DISTINCT u.age
+		`, []any{nil, 25, 45}, "Should handle null values with DISTINCT"),
+
+		// Test DISTINCT with nested FOR loops
+		CaseArray(`
+			LET departments = ["IT", "Marketing", "HR"]
+
+			FOR dept IN departments
+				FOR gender IN ["m", "f"]
+					RETURN DISTINCT { department: dept, gender }
+		`, []any{
+			map[string]any{"department": "IT", "gender": "m"},
+			map[string]any{"department": "IT", "gender": "f"},
+			map[string]any{"department": "Marketing", "gender": "m"},
+			map[string]any{"department": "Marketing", "gender": "f"},
+			map[string]any{"department": "HR", "gender": "m"},
+			map[string]any{"department": "HR", "gender": "f"},
+		}, "Should handle DISTINCT with nested FOR loops"),
+
+		// Test DISTINCT with complex objects and nested properties
+		CaseArray(`
+			LET users = [
+				{
+					name: "John",
+					department: {
+						name: "IT",
+						location: "Building A"
+					}
+				},
+				{
+					name: "Jane",
+					department: {
+						name: "Marketing",
+						location: "Building B"
+					}
+				},
+				{
+					name: "Bob",
+					department: {
+						name: "IT",
+						location: "Building A"
+					}
+				},
+				{
+					name: "Alice",
+					department: {
+						name: "HR",
+						location: "Building B"
+					}
+				}
+			]
+			FOR u IN users
+				RETURN DISTINCT u.department
+		`, []any{
+			map[string]any{"name": "IT", "location": "Building A"},
+			map[string]any{"name": "Marketing", "location": "Building B"},
+			map[string]any{"name": "HR", "location": "Building B"},
+		}, "Should handle DISTINCT with complex objects and nested properties"),
+
+		// Test DISTINCT with calculated values
+		CaseArray(`
+			LET users = [
+				{ age: 25 },
+				{ age: 32 },
+				{ age: 45 },
+				{ age: 26 },
+				{ age: 31 }
+			]
+			FOR u IN users
+				RETURN DISTINCT FLOOR(u.age / 10) * 10
+		`, []any{20, 30, 40}, "Should handle DISTINCT with calculated values"),
+
+		// Test DISTINCT with empty arrays
+		CaseArray(`
+			LET emptyArray = []
+			FOR i IN emptyArray
+				RETURN DISTINCT i
+		`, []any{}, "Should handle DISTINCT with empty arrays"),
+
+		// Test DISTINCT with SORT BY multiple fields
+		CaseArray(`
+			LET users = [
+				{ name: "John", age: 30, gender: "m" },
+				{ name: "Jane", age: 25, gender: "f" },
+				{ name: "Bob", age: 30, gender: "m" },
+				{ name: "Alice", age: 35, gender: "f" },
+				{ name: "Mike", age: 25, gender: "m" }
+			]
+			FOR u IN users
+				SORT u.age DESC, u.gender
+				RETURN DISTINCT u.age
+		`, []any{35, 30, 25}, "Should handle DISTINCT with SORT BY multiple fields"),
+
+		// Test DISTINCT with multiple levels of nesting
+		CaseArray(`
+			LET departments = ["IT", "Marketing", "HR"]
+			LET genders = ["m", "f"]
+			LET statuses = ["active", "inactive"]
+
+			FOR dept IN departments
+				FOR gender IN genders
+					FOR status IN statuses
+						RETURN DISTINCT { 
+							department: dept, 
+							gender: gender
+						}
+		`, []any{
+			map[string]any{"department": "IT", "gender": "m"},
+			map[string]any{"department": "IT", "gender": "f"},
+			map[string]any{"department": "Marketing", "gender": "m"},
+			map[string]any{"department": "Marketing", "gender": "f"},
+			map[string]any{"department": "HR", "gender": "m"},
+			map[string]any{"department": "HR", "gender": "f"},
+		}, "Should handle DISTINCT with multiple levels of nesting"),
+
+		// Test DISTINCT with multiple levels of nesting
+		CaseArray(`
+			LET departments = ["IT", "Marketing", "HR"]
+			LET genders = ["m", "f"]
+			LET statuses = ["active", "inactive"]
+
+			FOR dept IN departments
+				SORT dept
+				FOR gender IN genders
+					SORT gender
+					FOR status IN statuses
+						SORT status
+						RETURN DISTINCT { 
+							department: dept, 
+							gender: gender
+						}
+		`, []any{
+			map[string]any{"department": "HR", "gender": "f"},
+			map[string]any{"department": "HR", "gender": "m"},
+			map[string]any{"department": "IT", "gender": "f"},
+			map[string]any{"department": "IT", "gender": "m"},
+			map[string]any{"department": "Marketing", "gender": "f"},
+			map[string]any{"department": "Marketing", "gender": "m"},
+		}, "Should handle DISTINCT with multiple levels of nesting with SORT"),
+
+		// Test DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT
+		CaseArray(`
+			LET users = [
+				{ name: "John", department: "IT", age: 30 },
+				{ name: "Jane", department: "Marketing", age: 25 },
+				{ name: "Bob", department: "IT", age: 40 },
+				{ name: "Alice", department: "HR", age: 35 },
+				{ name: "Mike", department: "Marketing", age: 45 }
+			]
+
+			FOR u IN users
+				COLLECT dept = u.department
+				AGGREGATE avgAge = AVERAGE(u.age)
+				RETURN DISTINCT {
+					department: dept,
+					ageCategory: avgAge > 35 ? "Senior" : "Junior"
+				}
+		`, []any{
+			map[string]any{"department": "HR", "ageCategory": "Junior"},
+			map[string]any{"department": "IT", "ageCategory": "Junior"},
+			map[string]any{"department": "Marketing", "ageCategory": "Junior"},
+		}, "Should handle DISTINCT with a combination of COLLECT, AGGREGATE, and DISTINCT"),
+
+		// Test DISTINCT with array comparison and sorting
+		CaseArray(`
+			LET users = [
+				{ name: "John", skills: ["JavaScript", "Python"] },
+				{ name: "Jane", skills: ["Java", "C++"] },
+				{ name: "Bob", skills: ["JavaScript", "Python"] },
+				{ name: "Alice", skills: ["Python", "JavaScript"] }
+			]
+
+			FOR u IN users
+				SORT u.name
+				RETURN DISTINCT SORTED(u.skills)
+		`, []any{
+			[]any{"JavaScript", "Python"},
+			[]any{"C++", "Java"},
+		}, "Should handle DISTINCT with array comparison and sorting"),
 	})
 }