1
0
mirror of https://github.com/go-task/task.git synced 2024-12-04 10:24:45 +02:00

feat(checksum): replace md5 with xxh3 to improve performance (#1325)

This commit is contained in:
Reilly Brogan 2023-09-13 19:26:48 -05:00 committed by GitHub
parent 978d66e148
commit 1417f9f6cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 29 additions and 19 deletions

View File

@ -29,7 +29,7 @@ These are some major changes done on `v3`:
- A global `method:` was added to allow setting the default method, and Task's
default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and
`TIMESTAMP` which contains, respectively, the md5 checksum and greatest
`TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest
modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables

View File

@ -21,7 +21,7 @@ These are some major changes done on `v3`:
- Added support for `.env` like files
- Added `label:` setting to task so one can override how the task name appear in the logs
- A global `method:` was added to allow setting the default method, and Task's default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the md5 checksum and greatest modification timestamp of the files listed on `sources:`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables
- Added `dir:` option to `includes` to allow choosing on which directory an included Taskfile will run:

View File

@ -21,7 +21,7 @@ These are some major changes done on `v3`:
- Added support for `.env` like files
- Added `label:` setting to task so one can override how the task name appear in the logs
- A global `method:` was added to allow setting the default method, and Task's default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the md5 checksum and greatest modification timestamp of the files listed on `sources:`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables
- Added `dir:` option to `includes` to allow choosing on which directory an included Taskfile will run:

View File

@ -21,7 +21,7 @@ These are some major changes done on `v3`:
- Added support for `.env` like files
- Added `label:` setting to task so one can override how the task name appear in the logs
- A global `method:` was added to allow setting the default method, and Task's default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the md5 checksum and greatest modification timestamp of the files listed on `sources:`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables
- Added `dir:` option to `includes` to allow choosing on which directory an included Taskfile will run:

View File

@ -21,7 +21,7 @@ These are some major changes done on `v3`:
- Added support for `.env` like files
- Added `label:` setting to task so one can override how the task name appear in the logs
- A global `method:` was added to allow setting the default method, and Task's default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the md5 checksum and greatest modification timestamp of the files listed on `sources:`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables
- Added `dir:` option to `includes` to allow choosing on which directory an included Taskfile will run:

View File

@ -21,7 +21,7 @@ sidebar_position: 5
- Добавлена поддержка `.env` файлов
- Добавлен параметр `label:`. Появилась возможность переопределить имя задачи в логах
- Глобальный параметр `method:` был добавлен для установки метода по умолчанию, а задача по умолчанию изменена на `checksum`
- Добавлены 2 магические переменные, используемые в функции `status:` - `CHECKSUM` и `TIMESTAMP`, которые содержат, контрольную сумму md5 и наибольшую отметку времени изменения файлов, перечисленных в `sources:`
- Добавлены 2 магические переменные, используемые в функции `status:` - `CHECKSUM` и `TIMESTAMP`, которые содержат, контрольную сумму XXH3 и наибольшую отметку времени изменения файлов, перечисленных в `sources:`
- Кроме того, переменная `TASK` всегда доступна по имени текущей задачи
- Переменные CLI всегда считаются глобальными переменными
- Добавлена опция `dir:` в `includes` для того, чтобы выбрать, в каком каталоге Taskfile будет запущен:

View File

@ -21,7 +21,7 @@ These are some major changes done on `v3`:
- Added support for `.env` like files
- Added `label:` setting to task so one can override how the task name appear in the logs
- A global `method:` was added to allow setting the default method, and Task's default changed to `checksum`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the md5 checksum and greatest modification timestamp of the files listed on `sources:`
- Two magic variables were added when using `status:`: `CHECKSUM` and `TIMESTAMP` which contains, respectively, the XXH3 checksum and greatest modification timestamp of the files listed on `sources:`
- Also, the `TASK` variable is always available with the current task name
- CLI variables are always treated as global variables
- Added `dir:` option to `includes` to allow choosing on which directory an included Taskfile will run:

View File

@ -21,7 +21,7 @@ Taskfile 文件的 `version:` 关键字接受语义化字符串, 所以 `2`, `
- 支持类 `.env` 文件
- 添加 `label:` 设置后可以覆盖任务名称在日志中的显示方式
- 添加了全局 `method:` 允许设置默认方法,Task 的默认值更改为 `checksum`
- 使用 `status:`: `CHECKSUM``TIMESTAMP` 时新增了两个魔术变量,分别包含 `sources:` 列出的文件的 md5 checksum 和最大修改时间戳
- 使用 `status:`: `CHECKSUM``TIMESTAMP` 时新增了两个魔术变量,分别包含 `sources:` 列出的文件的 XXH3 checksum 和最大修改时间戳
- 另外,`TASK` 变量总是可以使用当前的任务名称
- CLI 变量始终被视为全局变量
- 向 `includes` 添加了 `dir:` 选项,以允许选择包含的任务文件将在哪个目录上运行:

2
go.mod
View File

@ -13,6 +13,7 @@ require (
github.com/sajari/fuzzy v1.0.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
github.com/zeebo/xxh3 v1.0.2
golang.org/x/exp v0.0.0-20230212135524-a684f29349b6
golang.org/x/sync v0.3.0
golang.org/x/term v0.12.0
@ -22,6 +23,7 @@ require (
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect

5
go.sum
View File

@ -12,6 +12,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
@ -41,6 +43,9 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
golang.org/x/exp v0.0.0-20230212135524-a684f29349b6 h1:Ic9KukPQ7PegFzHckNiMTQXGgEszA7mY2Fn4ZMtnMbw=
golang.org/x/exp v0.0.0-20230212135524-a684f29349b6/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=

View File

@ -1,7 +1,6 @@
package fingerprint
import (
"crypto/md5"
"fmt"
"io"
"os"
@ -9,6 +8,8 @@ import (
"regexp"
"strings"
"github.com/zeebo/xxh3"
"github.com/go-task/task/v3/internal/filepathext"
"github.com/go-task/task/v3/taskfile"
)
@ -35,16 +36,16 @@ func (checker *ChecksumChecker) IsUpToDate(t *taskfile.Task) (bool, error) {
checksumFile := checker.checksumFilePath(t)
data, _ := os.ReadFile(checksumFile)
oldMd5 := strings.TrimSpace(string(data))
oldHash := strings.TrimSpace(string(data))
newMd5, err := checker.checksum(t)
newHash, err := checker.checksum(t)
if err != nil {
return false, nil
}
if !checker.dry && oldMd5 != newMd5 {
if !checker.dry && oldHash != newHash {
_ = os.MkdirAll(filepathext.SmartJoin(checker.tempDir, "checksum"), 0o755)
if err = os.WriteFile(checksumFile, []byte(newMd5+"\n"), 0o644); err != nil {
if err = os.WriteFile(checksumFile, []byte(newHash+"\n"), 0o644); err != nil {
return false, err
}
}
@ -65,7 +66,7 @@ func (checker *ChecksumChecker) IsUpToDate(t *taskfile.Task) (bool, error) {
}
}
return oldMd5 == newMd5, nil
return oldHash == newHash, nil
}
func (checker *ChecksumChecker) Value(t *taskfile.Task) (any, error) {
@ -89,23 +90,25 @@ func (c *ChecksumChecker) checksum(t *taskfile.Task) (string, error) {
return "", err
}
h := md5.New()
h := xxh3.New()
buf := make([]byte, 128*1024)
for _, f := range sources {
// also sum the filename, so checksum changes for renaming a file
if _, err := io.Copy(h, strings.NewReader(filepath.Base(f))); err != nil {
if _, err := io.CopyBuffer(h, strings.NewReader(filepath.Base(f)), buf); err != nil {
return "", err
}
f, err := os.Open(f)
if err != nil {
return "", err
}
if _, err = io.Copy(h, f); err != nil {
if _, err = io.CopyBuffer(h, f, buf); err != nil {
return "", err
}
f.Close()
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
hash := h.Sum128()
return fmt.Sprintf("%x%x", hash.Hi, hash.Lo), nil
}
func (checker *ChecksumChecker) checksumFilePath(t *taskfile.Task) string {

View File

@ -846,7 +846,7 @@ func TestStatusVariables(t *testing.T) {
require.NoError(t, e.Setup())
require.NoError(t, e.Run(context.Background(), taskfile.Call{Task: "build"}))
assert.Contains(t, buff.String(), "a41e7948dcd321db412ce61d3d5c9864")
assert.Contains(t, buff.String(), "3e464c4b03f4b65d740e1e130d4d108a")
inf, err := os.Stat(filepathext.SmartJoin(dir, "source.txt"))
require.NoError(t, err)