mirror of
https://github.com/go-task/task.git
synced 2025-11-23 22:24:45 +02:00
feat: add caching to avoid multiple clone
This commit is contained in:
@@ -7,7 +7,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"sync"
|
||||||
|
|
||||||
giturls "github.com/chainguard-dev/git-urls"
|
giturls "github.com/chainguard-dev/git-urls"
|
||||||
"github.com/hashicorp/go-getter"
|
"github.com/hashicorp/go-getter"
|
||||||
@@ -26,6 +26,36 @@ type GitNode struct {
|
|||||||
path string
|
path string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type gitRepoCache struct {
|
||||||
|
mu sync.Mutex // Protects the locks map
|
||||||
|
locks map[string]*sync.Mutex // One mutex per repo cache key
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *gitRepoCache) getLockForRepo(cacheKey string) *sync.Mutex {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
if _, exists := c.locks[cacheKey]; !exists {
|
||||||
|
c.locks[cacheKey] = &sync.Mutex{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.locks[cacheKey]
|
||||||
|
}
|
||||||
|
|
||||||
|
var globalGitRepoCache = &gitRepoCache{
|
||||||
|
locks: make(map[string]*sync.Mutex),
|
||||||
|
}
|
||||||
|
|
||||||
|
func CleanGitCache() error {
|
||||||
|
// Clear the in-memory locks map to prevent memory leak
|
||||||
|
globalGitRepoCache.mu.Lock()
|
||||||
|
globalGitRepoCache.locks = make(map[string]*sync.Mutex)
|
||||||
|
globalGitRepoCache.mu.Unlock()
|
||||||
|
|
||||||
|
cacheDir := filepath.Join(os.TempDir(), "task-git-repos")
|
||||||
|
return os.RemoveAll(cacheDir)
|
||||||
|
}
|
||||||
|
|
||||||
func NewGitNode(
|
func NewGitNode(
|
||||||
entrypoint string,
|
entrypoint string,
|
||||||
dir string,
|
dir string,
|
||||||
@@ -83,42 +113,64 @@ func (node *GitNode) buildURL() string {
|
|||||||
return fmt.Sprintf("git::%s?ref=%s&depth=1", baseURL, ref)
|
return fmt.Sprintf("git::%s?ref=%s&depth=1", baseURL, ref)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (node *GitNode) ReadContext(ctx context.Context) ([]byte, error) {
|
// getOrCloneRepo returns the path to a cached git repository.
|
||||||
return node.readWithGoGetter(ctx)
|
// If the repository is not cached, it clones it first.
|
||||||
}
|
// This function is thread-safe: multiple goroutines cloning the same repo+ref
|
||||||
|
// will synchronize, and only one clone operation will occur.
|
||||||
|
//
|
||||||
|
// The cache directory is /tmp/task-git-repos/{cache_key}/
|
||||||
|
func (node *GitNode) getOrCloneRepo(ctx context.Context) (string, error) {
|
||||||
|
cacheKey := node.repoCacheKey()
|
||||||
|
|
||||||
func (node *GitNode) readWithGoGetter(ctx context.Context) ([]byte, error) {
|
repoMutex := globalGitRepoCache.getLockForRepo(cacheKey)
|
||||||
// IMPORTANT: Do NOT create tmpDir in advance!
|
repoMutex.Lock()
|
||||||
// If the directory exists, go-getter will use update() instead of clone()
|
defer repoMutex.Unlock()
|
||||||
// which is 3x slower (git init + fetch --tags + pull instead of a simple clone)
|
|
||||||
tmpDir := filepath.Join(os.TempDir(), fmt.Sprintf("task-git-getter-%d", time.Now().UnixNano()))
|
|
||||||
|
|
||||||
defer func() {
|
// Check if context was cancelled while waiting for lock
|
||||||
_ = os.RemoveAll(tmpDir)
|
if err := ctx.Err(); err != nil {
|
||||||
}()
|
return "", fmt.Errorf("context cancelled while waiting for repository lock: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cacheDir := filepath.Join(os.TempDir(), "task-git-repos", cacheKey)
|
||||||
|
|
||||||
|
// check if repo is already cached (under the lock)
|
||||||
|
gitDir := filepath.Join(cacheDir, ".git")
|
||||||
|
if _, err := os.Stat(gitDir); err == nil {
|
||||||
|
return cacheDir, nil
|
||||||
|
}
|
||||||
|
|
||||||
getterURL := node.buildURL()
|
getterURL := node.buildURL()
|
||||||
|
|
||||||
client := &getter.Client{
|
client := &getter.Client{
|
||||||
Ctx: ctx,
|
Ctx: ctx,
|
||||||
Src: getterURL,
|
Src: getterURL,
|
||||||
Dst: tmpDir,
|
Dst: cacheDir,
|
||||||
Mode: getter.ClientModeDir,
|
Mode: getter.ClientModeDir,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clone repository into tmpdir
|
|
||||||
if err := client.Get(); err != nil {
|
if err := client.Get(); err != nil {
|
||||||
|
_ = os.RemoveAll(cacheDir)
|
||||||
|
return "", fmt.Errorf("failed to clone repository: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return cacheDir, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (node *GitNode) ReadContext(ctx context.Context) ([]byte, error) {
|
||||||
|
// Get or clone the repository into cache
|
||||||
|
repoDir, err := node.getOrCloneRepo(ctx)
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build path to Taskfile in tmpdir
|
// Build path to Taskfile in the cached repo
|
||||||
taskfilePath := node.path
|
taskfilePath := node.path
|
||||||
if taskfilePath == "" {
|
if taskfilePath == "" {
|
||||||
taskfilePath = "Taskfile.yml"
|
taskfilePath = "Taskfile.yml"
|
||||||
}
|
}
|
||||||
filePath := filepath.Join(tmpDir, taskfilePath)
|
filePath := filepath.Join(repoDir, taskfilePath)
|
||||||
|
|
||||||
// Read file
|
// Read file from cached repo
|
||||||
b, err := os.ReadFile(filePath)
|
b, err := os.ReadFile(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -168,6 +220,22 @@ func (node *GitNode) CacheKey() string {
|
|||||||
return fmt.Sprintf("git.%s.%s.%s", node.url.Host, prefix, checksum)
|
return fmt.Sprintf("git.%s.%s.%s", node.url.Host, prefix, checksum)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// repoCacheKey generates a unique cache key for the repository+ref combination.
|
||||||
|
// Unlike CacheKey() which includes the file path, this identifies the repository itself.
|
||||||
|
// Two GitNodes with the same repo+ref but different file paths will share the same cache.
|
||||||
|
//
|
||||||
|
// Returns a path like: github.com/user/repo.git/main
|
||||||
|
func (node *GitNode) repoCacheKey() string {
|
||||||
|
repoPath := strings.Trim(node.url.Path, "/")
|
||||||
|
|
||||||
|
ref := node.ref
|
||||||
|
if ref == "" {
|
||||||
|
ref = "HEAD"
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(node.url.Host, repoPath, ref)
|
||||||
|
}
|
||||||
|
|
||||||
func splitURLOnDoubleSlash(u *url.URL) (string, string) {
|
func splitURLOnDoubleSlash(u *url.URL) (string, string) {
|
||||||
x := strings.Split(u.Path, "//")
|
x := strings.Split(u.Path, "//")
|
||||||
switch len(x) {
|
switch len(x) {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package taskfile
|
package taskfile
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -149,3 +150,100 @@ func TestGitNode_buildURL(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_SameRepoSameRef(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Same repo, same ref, different files should have SAME cache key
|
||||||
|
node1, err := NewGitNode("https://github.com/foo/bar.git//file1.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node2, err := NewGitNode("https://github.com/foo/bar.git//dir/file2.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node1.repoCacheKey()
|
||||||
|
key2 := node2.repoCacheKey()
|
||||||
|
|
||||||
|
assert.Equal(t, key1, key2, "Same repo+ref should generate same cache key regardless of file path")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_SameRepoDifferentRef(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Same repo, different ref should have DIFFERENT cache keys
|
||||||
|
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=dev", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node1.repoCacheKey()
|
||||||
|
key2 := node2.repoCacheKey()
|
||||||
|
|
||||||
|
assert.NotEqual(t, key1, key2, "Different refs should generate different cache keys")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_DifferentRepos(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Different repos should have DIFFERENT cache keys
|
||||||
|
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node2, err := NewGitNode("https://github.com/foo/other.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node1.repoCacheKey()
|
||||||
|
key2 := node2.repoCacheKey()
|
||||||
|
|
||||||
|
assert.NotEqual(t, key1, key2, "Different repos should generate different cache keys")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_NoRefVsHead(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// No ref (defaults to HEAD) vs explicit HEAD should have SAME cache key
|
||||||
|
node1, err := NewGitNode("https://github.com/foo/bar.git//file.yml", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=HEAD", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node1.repoCacheKey()
|
||||||
|
key2 := node2.repoCacheKey()
|
||||||
|
|
||||||
|
assert.Equal(t, key1, key2, "No ref and explicit HEAD should generate same cache key")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_SSHvsHTTPS(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// SSH vs HTTPS pointing to same repo should have SAME cache key
|
||||||
|
// They clone the same repo, so we want to share the cache
|
||||||
|
node1, err := NewGitNode("git@github.com:foo/bar.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node2, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node1.repoCacheKey()
|
||||||
|
key2 := node2.repoCacheKey()
|
||||||
|
|
||||||
|
assert.Equal(t, key1, key2, "SSH and HTTPS for same repo should share cache")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepoCacheKey_Consistency(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Calling repoCacheKey multiple times on same node should return same key
|
||||||
|
node, err := NewGitNode("https://github.com/foo/bar.git//file.yml?ref=main", "", false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
key1 := node.repoCacheKey()
|
||||||
|
fmt.Printf("key1 : %#v\n", key1)
|
||||||
|
key2 := node.repoCacheKey()
|
||||||
|
key3 := node.repoCacheKey()
|
||||||
|
|
||||||
|
assert.Equal(t, key1, key2)
|
||||||
|
assert.Equal(t, key2, key3)
|
||||||
|
}
|
||||||
|
|||||||
@@ -187,9 +187,20 @@ func (o *promptFuncOption) ApplyToReader(r *Reader) {
|
|||||||
// building an [ast.TaskfileGraph] as it goes. If any errors occur, they will be
|
// building an [ast.TaskfileGraph] as it goes. If any errors occur, they will be
|
||||||
// returned immediately.
|
// returned immediately.
|
||||||
func (r *Reader) Read(ctx context.Context, node Node) (*ast.TaskfileGraph, error) {
|
func (r *Reader) Read(ctx context.Context, node Node) (*ast.TaskfileGraph, error) {
|
||||||
|
startTime := time.Now()
|
||||||
|
|
||||||
|
// Clean up git cache after reading all taskfiles
|
||||||
|
defer func() {
|
||||||
|
_ = CleanGitCache()
|
||||||
|
}()
|
||||||
|
|
||||||
if err := r.include(ctx, node); err != nil {
|
if err := r.include(ctx, node); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
elapsed := time.Since(startTime)
|
||||||
|
r.debugf("task: Taskfiles loaded in %.2fs\n", elapsed.Seconds())
|
||||||
|
|
||||||
return r.graph, nil
|
return r.graph, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user