1
0
mirror of https://github.com/mgechev/revive.git synced 2025-07-17 01:12:27 +02:00

Check string literals against regular expressions (#511)

Add string-format rule
This commit is contained in:
Keith Scroggs
2021-04-18 12:35:30 -04:00
committed by GitHub
parent 8635ef94ee
commit 2afe668e7b
7 changed files with 448 additions and 1 deletions

View File

@ -357,6 +357,7 @@ List of all available rules. The rules ported from `golint` are left unchanged a
| [`unhandled-error`](./RULES_DESCRIPTIONS.md#unhandled-error) | []string | Warns on unhandled errors returned by funcion calls | no | yes |
| [`cognitive-complexity`](./RULES_DESCRIPTIONS.md#cognitive-complexity) | int | Sets restriction for maximum Cognitive complexity. | no | no |
| [`string-of-int`](./RULES_DESCRIPTIONS.md#string-of-int) | n/a | Warns on suspicious casts from int to string | no | yes |
| [`string-format`](./RULES_DESCRIPTIONS.md#string-format) | map | Warns on specific string literals that fail one or more user-configured regular expressions | no | no |
| [`early-return`](./RULES_DESCRIPTIONS.md#early-return) | n/a | Spots if-then-else statements that can be refactored to simplify code reading | no | no |
| [`unconditional-recursion`](./RULES_DESCRIPTIONS.md#unconditional-recursion) | n/a | Warns on function calls that will lead to (direct) infinite recursion | no | no |
| [`identical-branches`](./RULES_DESCRIPTIONS.md#identical-branches) | n/a | Spots if-then-else statements with identical `then` and `else` branches | no | no |

View File

@ -53,6 +53,7 @@ List of all available rules.
- [redefines-builtin-id](#redefines-builtin-id)
- [string-of-int](#string-of-int)
- [struct-tag](#struct-tag)
- [string-format](#string-format)
- [superfluous-else](#superfluous-else)
- [time-naming](#time-naming)
- [var-naming](#var-naming)
@ -488,6 +489,29 @@ _Description_: explicit type conversion `string(i)` where `i` has an integer ty
_Configuration_: N/A
## string-format
_Description_: This rule allows you to configure a list of regular expressions that string literals in certain function calls are checked against.
This is geared towards user facing applications where string literals are often used for messages that will be presented to users, so it may be desirable to enforce consistent formatting.
_Configuration_: Each argument is a slice containing 2-3 strings: a scope, a regex, and an optional error message.
1. The first string defines a scope. This controls which string literals the regex will apply to, and is defined as a function argument. It must contain at least a function name (`core.WriteError`). Scopes may optionally contain a number specifying which argument in the function to check (`core.WriteError[1]`), as well as a struct field (`core.WriteError[1].Message`, only works for top level fields). Function arguments are counted starting at 0, so `[0]` would refer to the first argument, `[1]` would refer to the second, etc. If no argument number is provided, the first argument will be used (same as `[0]`).
2. The second string is a regular expression (beginning and ending with a `/` character), which will be used to check the string literals in the scope.
3. The third string (optional) is a message containing the purpose for the regex, which will be used in lint errors.
Example:
```toml
[rule.string-format]
arguments = [
["core.WriteError[1].Message", "/^([^A-Z]|$)/", "must not start with a capital letter"],
["fmt.Errorf[0]", "/(^|[^\\.!?])$/", "must not end in punctuation"],
["panic", "/^[^\\n]*$/", "must not contain line breaks"]]
```
## struct-tag
_Description_: Struct tags are not checked at compile time.

View File

@ -72,6 +72,7 @@ var allRules = append([]lint.Rule{
&rule.UnhandledErrorRule{},
&rule.CognitiveComplexityRule{},
&rule.StringOfIntRule{},
&rule.StringFormatRule{},
&rule.EarlyReturnRule{},
&rule.UnconditionalRecursionRule{},
&rule.IdenticalBranchesRule{},

1
go.sum
View File

@ -44,7 +44,6 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

281
rule/string-format.go Normal file
View File

@ -0,0 +1,281 @@
package rule
import (
"fmt"
"go/ast"
"go/token"
"regexp"
"strconv"
"github.com/mgechev/revive/lint"
)
// #region Revive API
// StringFormatRule lints strings and/or comments according to a set of regular expressions given as Arguments
type StringFormatRule struct{}
// Apply applies the rule to the given file.
func (r *StringFormatRule) Apply(file *lint.File, arguments lint.Arguments) []lint.Failure {
var failures []lint.Failure
onFailure := func(failure lint.Failure) {
failures = append(failures, failure)
}
w := lintStringFormatRule{onFailure: onFailure}
w.parseArguments(arguments)
ast.Walk(w, file.AST)
return failures
}
func (r *StringFormatRule) Name() string {
return "string-format"
}
// Public wrapper around w.parseArguments used for testing, returns the error message provided to panic, or nil if no error was encountered
func (r *StringFormatRule) ParseArgumentsTest(arguments lint.Arguments) *string {
w := lintStringFormatRule{}
c := make(chan interface{})
// Parse the arguments in a goroutine, defer a recover() call, return the error encountered (or nil if there was no error)
go func() {
defer func() {
err := recover()
c <- err
}()
w.parseArguments(arguments)
}()
err := <-c
if err != nil {
e := fmt.Sprintf("%s", err)
return &e
}
return nil
}
// #endregion
// #region Internal structure
type lintStringFormatRule struct {
onFailure func(lint.Failure)
rules []stringFormatSubrule
stringDeclarations map[string]string
}
type stringFormatSubrule struct {
parent *lintStringFormatRule
scope stringFormatSubruleScope
regexp *regexp.Regexp
errorMessage string
}
type stringFormatSubruleScope struct {
funcName string // Function name the rule is scoped to
argument int // (optional) Which argument in calls to the function is checked against the rule (the first argument is checked by default)
field string // (optional) If the argument to be checked is a struct, which member of the struct is checked against the rule (top level members only)
}
// Regex inserted to match valid function/struct field identifiers
const identRegex = "[_A-Za-z][_A-Za-z0-9]*"
var parseStringFormatScope = regexp.MustCompile(
fmt.Sprintf("^(%s(?:\\.%s)?)(?:\\[([0-9]+)\\](?:\\.(%s))?)?$", identRegex, identRegex, identRegex))
// #endregion
// #region Argument parsing
func (w *lintStringFormatRule) parseArguments(arguments lint.Arguments) {
for i, argument := range arguments {
scope, regex, errorMessage := w.parseArgument(argument, i)
w.rules = append(w.rules, stringFormatSubrule{
parent: w,
scope: scope,
regexp: regex,
errorMessage: errorMessage,
})
}
}
func (w lintStringFormatRule) parseArgument(argument interface{}, ruleNum int) (scope stringFormatSubruleScope, regex *regexp.Regexp, errorMessage string) {
g, ok := argument.([]interface{}) // Cast to generic slice first
if !ok {
w.configError("argument is not a slice", ruleNum, 0)
}
if len(g) < 2 {
w.configError("less than two slices found in argument, scope and regex are required", ruleNum, len(g)-1)
}
rule := make([]string, len(g))
for i, obj := range g {
val, ok := obj.(string)
if !ok {
w.configError("unexpected value, string was expected", ruleNum, i)
}
rule[i] = val
}
// Validate scope and regex length
if len(rule[0]) == 0 {
w.configError("empty scope provided", ruleNum, 0)
} else if len(rule[1]) < 2 {
w.configError("regex is too small (regexes should begin and end with '/')", ruleNum, 1)
}
// Parse rule scope
scope = stringFormatSubruleScope{}
matches := parseStringFormatScope.FindStringSubmatch(rule[0])
if matches == nil {
// The rule's scope didn't match the parsing regex at all, probably a configuration error
w.parseError("unable to parse rule scope", ruleNum, 0)
} else if len(matches) != 4 {
// The rule's scope matched the parsing regex, but an unexpected number of submatches was returned, probably a bug
w.parseError(fmt.Sprintf("unexpected number of submatches when parsing scope: %d, expected 4", len(matches)), ruleNum, 0)
}
scope.funcName = matches[1]
if len(matches[2]) > 0 {
var err error
scope.argument, err = strconv.Atoi(matches[2])
if err != nil {
w.parseError("unable to parse argument number in rule scope", ruleNum, 0)
}
}
if len(matches[3]) > 0 {
scope.field = matches[3]
}
// Strip / characters from the beginning and end of rule[1] before compiling
regex, err := regexp.Compile(rule[1][1 : len(rule[1])-1])
if err != nil {
w.parseError(fmt.Sprintf("unable to compile %s as regexp", rule[1]), ruleNum, 1)
}
// Use custom error message if provided
if len(rule) == 3 {
errorMessage = rule[2]
}
return scope, regex, errorMessage
}
// Report an invalid config, this is specifically the user's fault
func (w lintStringFormatRule) configError(msg string, ruleNum, option int) {
panic(fmt.Sprintf("invalid configuration for string-format: %s [argument %d, option %d]", msg, ruleNum, option))
}
// Report a general config parsing failure, this may be the user's fault, but it isn't known for certain
func (w lintStringFormatRule) parseError(msg string, ruleNum, option int) {
panic(fmt.Sprintf("failed to parse configuration for string-format: %s [argument %d, option %d]", msg, ruleNum, option))
}
// #endregion
// #region Node traversal
func (w lintStringFormatRule) Visit(node ast.Node) ast.Visitor {
// First, check if node is a call expression
call, ok := node.(*ast.CallExpr)
if !ok {
return w
}
// Get the name of the call expression to check against rule scope
callName, ok := w.getCallName(call)
if !ok {
return w
}
for _, rule := range w.rules {
if rule.scope.funcName == callName {
rule.Apply(call)
}
}
return w
}
// Return the name of a call expression in the form of package.Func or Func
func (w lintStringFormatRule) getCallName(call *ast.CallExpr) (callName string, ok bool) {
if ident, ok := call.Fun.(*ast.Ident); ok {
// Local function call
return ident.Name, true
}
if selector, ok := call.Fun.(*ast.SelectorExpr); ok {
// Scoped function call
scope, ok := selector.X.(*ast.Ident)
if !ok {
return "", false
}
return scope.Name + "." + selector.Sel.Name, true
}
return "", false
}
// #endregion
// #region Linting logic
// Apply a single format rule to a call expression (should be done after verifying the that the call expression matches the rule's scope)
func (rule stringFormatSubrule) Apply(call *ast.CallExpr) {
if len(call.Args) <= rule.scope.argument {
return
}
arg := call.Args[rule.scope.argument]
var lit *ast.BasicLit
if len(rule.scope.field) > 0 {
// Try finding the scope's Field, treating arg as a composite literal
composite, ok := arg.(*ast.CompositeLit)
if !ok {
return
}
for _, el := range composite.Elts {
kv, ok := el.(*ast.KeyValueExpr)
if !ok {
continue
}
key, ok := kv.Key.(*ast.Ident)
if !ok || key.Name != rule.scope.field {
continue
}
// We're now dealing with the exact field in the rule's scope, so if anything fails, we can safely return instead of continuing the loop
lit, ok = kv.Value.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return
}
}
} else {
var ok bool
// Treat arg as a string literal
lit, ok = arg.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return
}
}
// Unquote the string literal before linting
unquoted := lit.Value[1 : len(lit.Value)-1]
rule.lintMessage(unquoted, lit)
}
func (rule stringFormatSubrule) lintMessage(s string, node ast.Node) {
// Fail if the string doesn't match the user's regex
if rule.regexp.MatchString(s) {
return
}
var failure string
if len(rule.errorMessage) > 0 {
failure = rule.errorMessage
} else {
failure = fmt.Sprintf("string literal doesn't match user defined regex /%s/", rule.regexp.String())
}
rule.parent.onFailure(lint.Failure{
Confidence: 1,
Failure: failure,
Node: node})
}
// #endregion

111
test/string-format_test.go Normal file
View File

@ -0,0 +1,111 @@
package test
import (
"testing"
"github.com/mgechev/revive/lint"
"github.com/mgechev/revive/rule"
)
func TestStringFormat(t *testing.T) {
testRule(t, "string-format", &rule.StringFormatRule{}, &lint.RuleConfig{
Arguments: lint.Arguments{
[]interface{}{
"stringFormatMethod1", // The first argument is checked by default
"/^[A-Z]/",
"must start with a capital letter"},
[]interface{}{
"stringFormatMethod2[2].d",
"/[^\\.]$/"}, // Must not end with a period
[]interface{}{
"s.Method3[2]",
"/^[^Tt][^Hh]/",
"must not start with 'th'"}}})
}
func TestStringFormatArgumentParsing(t *testing.T) {
r := &rule.StringFormatRule{}
type argumentsTest struct {
name string
config lint.Arguments
expectedError *string
}
stringPtr := func(s string) *string {
return &s
}
tests := []argumentsTest{
{
name: "Not a Slice",
config: lint.Arguments{
"this is not a slice"},
expectedError: stringPtr("invalid configuration for string-format: argument is not a slice [argument 0, option 0]")},
{
name: "Missing Regex",
config: lint.Arguments{
[]interface{}{
"method[0]"}},
expectedError: stringPtr("invalid configuration for string-format: less than two slices found in argument, scope and regex are required [argument 0, option 0]")},
{
name: "Bad Argument Type",
config: lint.Arguments{
[]interface{}{
1}},
expectedError: stringPtr("invalid configuration for string-format: less than two slices found in argument, scope and regex are required [argument 0, option 0]")},
{
name: "Empty Scope",
config: lint.Arguments{
[]interface{}{
"",
"//"}},
expectedError: stringPtr("invalid configuration for string-format: empty scope provided [argument 0, option 0]")},
{
name: "Small or Empty Regex",
config: lint.Arguments{
[]interface{}{
"method[1].a",
"-"}},
expectedError: stringPtr("invalid configuration for string-format: regex is too small (regexes should begin and end with '/') [argument 0, option 1]")},
{
name: "Bad Scope",
config: lint.Arguments{
[]interface{}{
"1.a",
"//"}},
expectedError: stringPtr("failed to parse configuration for string-format: unable to parse rule scope [argument 0, option 0]")},
{
name: "Bad Regex",
config: lint.Arguments{
[]interface{}{
"method[1].a",
"/(/"}},
expectedError: stringPtr("failed to parse configuration for string-format: unable to compile /(/ as regexp [argument 0, option 1]")},
{
name: "Sample Config",
config: lint.Arguments{
[]interface{}{
"core.WriteError[1].Message", "/^([^A-Z]$)/", "must not start with a capital letter"},
[]interface{}{
"fmt.Errorf[0]", "/^|[^\\.!?]$/", "must not end in punctuation"},
[]interface{}{
"panic", "/^[^\\n]*$/", "must not contain line breaks"}}},
{
name: "Underscores in Scope",
config: lint.Arguments{
[]interface{}{
"some_pkg._some_function_name[5].some_member",
"//"}}}}
for _, a := range tests {
t.Run(a.name, func(t *testing.T) {
err := r.ParseArgumentsTest(a.config)
if err != nil {
if a.expectedError == nil || *err != *a.expectedError {
t.Errorf("unexpected panic message: %s", *err)
}
} else if a.expectedError != nil {
t.Error("error expected but not received")
}
})
}
}

30
testdata/string-format.go vendored Normal file
View File

@ -0,0 +1,30 @@
// Test string literal regex checks
package pkg
func stringFormatMethod1(a, b string) {
}
func stringFormatMethod2(a, b string, c struct {
d string
}) {
}
type stringFormatMethods struct{}
func (s stringFormatMethods) Method3(a, b, c string) {
}
func stringFormat() {
stringFormatMethod1("This string is fine", "")
stringFormatMethod1("this string is not capitalized", "") // MATCH /must start with a capital letter/
stringFormatMethod2(s3, "", struct {
d string
}{
d: "This string is capitalized, but ends with a period."}) // MATCH /string literal doesn't match user defined regex /[^\.]$//
s := stringFormatMethods{}
s.Method3("", "", "This string starts with th") // MATCH /must not start with 'th'/
}