diff --git a/taskfile/node_git.go b/taskfile/node_git.go index 883b7480..eba1a5c6 100644 --- a/taskfile/node_git.go +++ b/taskfile/node_git.go @@ -7,7 +7,7 @@ import ( "os" "path/filepath" "strings" - "time" + "sync" giturls "github.com/chainguard-dev/git-urls" "github.com/hashicorp/go-getter" @@ -26,6 +26,36 @@ type GitNode struct { path string } +type gitRepoCache struct { + mu sync.Mutex // Protects the locks map + locks map[string]*sync.Mutex // One mutex per repo cache key +} + +func (c *gitRepoCache) getLockForRepo(cacheKey string) *sync.Mutex { + c.mu.Lock() + defer c.mu.Unlock() + + if _, exists := c.locks[cacheKey]; !exists { + c.locks[cacheKey] = &sync.Mutex{} + } + + return c.locks[cacheKey] +} + +var globalGitRepoCache = &gitRepoCache{ + locks: make(map[string]*sync.Mutex), +} + +func CleanGitCache() error { + // Clear the in-memory locks map to prevent memory leak + globalGitRepoCache.mu.Lock() + globalGitRepoCache.locks = make(map[string]*sync.Mutex) + globalGitRepoCache.mu.Unlock() + + cacheDir := filepath.Join(os.TempDir(), "task-git-repos") + return os.RemoveAll(cacheDir) +} + func NewGitNode( entrypoint string, dir string, @@ -83,42 +113,64 @@ func (node *GitNode) buildURL() string { return fmt.Sprintf("git::%s?ref=%s&depth=1", baseURL, ref) } -func (node *GitNode) ReadContext(ctx context.Context) ([]byte, error) { - return node.readWithGoGetter(ctx) -} +// getOrCloneRepo returns the path to a cached git repository. +// If the repository is not cached, it clones it first. +// This function is thread-safe: multiple goroutines cloning the same repo+ref +// will synchronize, and only one clone operation will occur. +// +// The cache directory is /tmp/task-git-repos/{cache_key}/ +func (node *GitNode) getOrCloneRepo(ctx context.Context) (string, error) { + cacheKey := node.repoCacheKey() -func (node *GitNode) readWithGoGetter(ctx context.Context) ([]byte, error) { - // IMPORTANT: Do NOT create tmpDir in advance! - // If the directory exists, go-getter will use update() instead of clone() - // which is 3x slower (git init + fetch --tags + pull instead of a simple clone) - tmpDir := filepath.Join(os.TempDir(), fmt.Sprintf("task-git-getter-%d", time.Now().UnixNano())) + repoMutex := globalGitRepoCache.getLockForRepo(cacheKey) + repoMutex.Lock() + defer repoMutex.Unlock() - defer func() { - _ = os.RemoveAll(tmpDir) - }() + // Check if context was cancelled while waiting for lock + if err := ctx.Err(); err != nil { + return "", fmt.Errorf("context cancelled while waiting for repository lock: %w", err) + } + + cacheDir := filepath.Join(os.TempDir(), "task-git-repos", cacheKey) + + // check if repo is already cached (under the lock) + gitDir := filepath.Join(cacheDir, ".git") + if _, err := os.Stat(gitDir); err == nil { + return cacheDir, nil + } getterURL := node.buildURL() client := &getter.Client{ Ctx: ctx, Src: getterURL, - Dst: tmpDir, + Dst: cacheDir, Mode: getter.ClientModeDir, } - // Clone repository into tmpdir if err := client.Get(); err != nil { + _ = os.RemoveAll(cacheDir) + return "", fmt.Errorf("failed to clone repository: %w", err) + } + + return cacheDir, nil +} + +func (node *GitNode) ReadContext(ctx context.Context) ([]byte, error) { + // Get or clone the repository into cache + repoDir, err := node.getOrCloneRepo(ctx) + if err != nil { return nil, err } - // Build path to Taskfile in tmpdir + // Build path to Taskfile in the cached repo taskfilePath := node.path if taskfilePath == "" { taskfilePath = "Taskfile.yml" } - filePath := filepath.Join(tmpDir, taskfilePath) + filePath := filepath.Join(repoDir, taskfilePath) - // Read file + // Read file from cached repo b, err := os.ReadFile(filePath) if err != nil { return nil, err @@ -168,6 +220,22 @@ func (node *GitNode) CacheKey() string { return fmt.Sprintf("git.%s.%s.%s", node.url.Host, prefix, checksum) } +// repoCacheKey generates a unique cache key for the repository+ref combination. +// Unlike CacheKey() which includes the file path, this identifies the repository itself. +// Two GitNodes with the same repo+ref but different file paths will share the same cache. +// +// Returns a path like: github.com/user/repo.git/main +func (node *GitNode) repoCacheKey() string { + repoPath := strings.Trim(node.url.Path, "/") + + ref := node.ref + if ref == "" { + ref = "HEAD" + } + + return filepath.Join(node.url.Host, repoPath, ref) +} + func splitURLOnDoubleSlash(u *url.URL) (string, string) { x := strings.Split(u.Path, "//") switch len(x) { diff --git a/taskfile/node_git_test.go b/taskfile/node_git_test.go index a84d6580..5afe4c19 100644 --- a/taskfile/node_git_test.go +++ b/taskfile/node_git_test.go @@ -1,6 +1,7 @@ package taskfile import ( + "fmt" "testing" "github.com/stretchr/testify/assert" @@ -149,3 +150,100 @@ func TestGitNode_buildURL(t *testing.T) { }) } } + +func TestRepoCacheKey_SameRepoSameRef(t *testing.T) { + t.Parallel() + + // Same repo, same ref, different files should have SAME cache key + node1, err := NewGitNode("https://github.com/foo/bar.git//file1.yml?ref=main", "", false) + require.NoError(t, err) + + node2, err := NewGitNode("https://github.com/foo/bar.git//dir/file2.yml?ref=main", "", false) + require.NoError(t, err) + + key1 := node1.repoCacheKey() + key2 := node2.repoCacheKey() + + assert.Equal(t, key1, key2, "Same repo+ref should generate same cache key regardless of file path") +} + +func TestRepoCacheKey_SameRepoDifferentRef(t *testing.T) { + t.Parallel() + + // Same repo, different ref should have DIFFERENT cache keys + node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=dev", "", false) + require.NoError(t, err) + + key1 := node1.repoCacheKey() + key2 := node2.repoCacheKey() + + assert.NotEqual(t, key1, key2, "Different refs should generate different cache keys") +} + +func TestRepoCacheKey_DifferentRepos(t *testing.T) { + t.Parallel() + + // Different repos should have DIFFERENT cache keys + node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + node2, err := NewGitNode("https://github.com/foo/other.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + key1 := node1.repoCacheKey() + key2 := node2.repoCacheKey() + + assert.NotEqual(t, key1, key2, "Different repos should generate different cache keys") +} + +func TestRepoCacheKey_NoRefVsHead(t *testing.T) { + t.Parallel() + + // No ref (defaults to HEAD) vs explicit HEAD should have SAME cache key + node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml", "", false) + require.NoError(t, err) + + node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=HEAD", "", false) + require.NoError(t, err) + + key1 := node1.repoCacheKey() + key2 := node2.repoCacheKey() + + assert.Equal(t, key1, key2, "No ref and explicit HEAD should generate same cache key") +} + +func TestRepoCacheKey_SSHvsHTTPS(t *testing.T) { + t.Parallel() + + // SSH vs HTTPS pointing to same repo should have SAME cache key + // They clone the same repo, so we want to share the cache + node1, err := NewGitNode("git@github.com:foo/bar.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + key1 := node1.repoCacheKey() + key2 := node2.repoCacheKey() + + assert.Equal(t, key1, key2, "SSH and HTTPS for same repo should share cache") +} + +func TestRepoCacheKey_Consistency(t *testing.T) { + t.Parallel() + + // Calling repoCacheKey multiple times on same node should return same key + node, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false) + require.NoError(t, err) + + key1 := node.repoCacheKey() + fmt.Printf("key1 : %#v\n", key1) + key2 := node.repoCacheKey() + key3 := node.repoCacheKey() + + assert.Equal(t, key1, key2) + assert.Equal(t, key2, key3) +} diff --git a/taskfile/reader.go b/taskfile/reader.go index 402c3f72..99ad738e 100644 --- a/taskfile/reader.go +++ b/taskfile/reader.go @@ -187,9 +187,20 @@ func (o *promptFuncOption) ApplyToReader(r *Reader) { // building an [ast.TaskfileGraph] as it goes. If any errors occur, they will be // returned immediately. func (r *Reader) Read(ctx context.Context, node Node) (*ast.TaskfileGraph, error) { + startTime := time.Now() + + // Clean up git cache after reading all taskfiles + defer func() { + _ = CleanGitCache() + }() + if err := r.include(ctx, node); err != nil { return nil, err } + + elapsed := time.Since(startTime) + r.debugf("task: Taskfiles loaded in %.2fs\n", elapsed.Seconds()) + return r.graph, nil }