1
0
mirror of https://github.com/go-task/task.git synced 2024-12-16 10:59:23 +02:00
task/vendor/github.com/mvdan/sh/syntax/lexer.go
2017-05-17 14:49:27 -03:00

1035 lines
17 KiB
Go

// Copyright (c) 2016, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package syntax
import (
"bytes"
"io"
"unicode/utf8"
)
// bytes that form or start a token
func regOps(r rune) bool {
switch r {
case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
return true
}
return false
}
// tokenize these inside parameter expansions
func paramOps(r rune) bool {
switch r {
case '}', '#', '!', ':', '-', '+', '=', '?', '%', '[', ']', '/', '^',
',', '@':
return true
}
return false
}
// tokenize these inside arithmetic expansions
func arithmOps(r rune) bool {
switch r {
case '+', '-', '!', '*', '/', '%', '(', ')', '^', '<', '>', ':', '=',
',', '?', '|', '&', ']':
return true
}
return false
}
func wordBreak(r rune) bool {
switch r {
case ' ', '\t', '\n', ';', '&', '>', '<', '|', '(', ')', '\r':
return true
}
return false
}
func (p *Parser) rune() rune {
retry:
if p.npos < len(p.bs) {
if b := p.bs[p.npos]; b < utf8.RuneSelf {
if p.npos++; b == '\n' {
p.f.lines = append(p.f.lines, p.getPos())
}
if p.litBs != nil {
p.litBs = append(p.litBs, b)
}
r := rune(b)
p.r = r
return r
}
if p.npos+utf8.UTFMax >= len(p.bs) {
// we might need up to 4 bytes to read a full
// non-ascii rune
p.fill()
}
var w int
p.r, w = utf8.DecodeRune(p.bs[p.npos:])
if p.litBs != nil {
p.litBs = append(p.litBs, p.bs[p.npos:p.npos+w]...)
}
p.npos += w
if p.r == utf8.RuneError && w == 1 {
p.posErr(p.getPos(), "invalid UTF-8 encoding")
}
} else {
if p.r == utf8.RuneSelf {
} else if p.fill(); p.bs == nil {
p.npos++
p.r = utf8.RuneSelf
} else {
goto retry
}
}
return p.r
}
func (p *Parser) unrune(r rune) {
if p.r != utf8.RuneSelf {
p.npos -= utf8.RuneLen(p.r)
p.r = r
}
}
// fill reads more bytes from the input src into readBuf. Any bytes that
// had not yet been used at the end of the buffer are slid into the
// beginning of the buffer.
func (p *Parser) fill() {
left := len(p.bs) - p.npos
p.offs += p.npos
copy(p.readBuf[:left], p.readBuf[p.npos:])
var n int
var err error
if p.readErr == nil {
n, err = p.src.Read(p.readBuf[left:])
p.readErr = err
} else {
n, err = 0, p.readErr
}
if n == 0 {
// don't use p.errPass as we don't want to overwrite p.tok
if err != nil && err != io.EOF {
p.err = err
}
if left > 0 {
p.bs = p.readBuf[:left]
} else {
p.bs = nil
}
} else {
p.bs = p.readBuf[:left+n]
}
p.npos = 0
}
func (p *Parser) nextKeepSpaces() {
r := p.r
if p.pos = p.getPos(); r > utf8.RuneSelf {
p.pos -= Pos(utf8.RuneLen(r) - 1)
}
switch p.quote {
case paramExpRepl:
switch r {
case '}', '/':
p.tok = p.paramToken(r)
case '`', '"', '$':
p.tok = p.dqToken(r)
default:
p.advanceLitOther(r)
}
case dblQuotes:
switch r {
case '`', '"', '$':
p.tok = p.dqToken(r)
default:
p.advanceLitDquote(r)
}
case hdocBody, hdocBodyTabs:
if r == '`' || r == '$' {
p.tok = p.dqToken(r)
} else if p.hdocStop == nil {
p.tok = illegalTok
} else {
p.advanceLitHdoc(r)
}
case paramExpExp:
switch r {
case '}':
p.rune()
p.tok = rightBrace
case '`', '"', '$':
p.tok = p.dqToken(r)
default:
p.advanceLitOther(r)
}
default: // sglQuotes
if r == '\'' {
p.rune()
p.tok = sglQuote
} else {
p.advanceLitOther(r)
}
}
}
func (p *Parser) next() {
if p.r == utf8.RuneSelf {
p.tok = _EOF
return
}
p.spaced, p.newLine = false, false
if p.quote&allKeepSpaces != 0 {
p.nextKeepSpaces()
return
}
r := p.r
skipSpace:
for {
switch r {
case utf8.RuneSelf:
p.tok = _EOF
return
case ' ', '\t', '\r':
p.spaced = true
r = p.rune()
case '\n':
if p.quote == arithmExprLet || p.quote == hdocWord {
p.tok = illegalTok
return
}
p.spaced, p.newLine = true, true
r = p.rune()
if len(p.heredocs) > p.buriedHdocs {
if p.doHeredocs(); p.tok == _EOF {
return
}
r = p.r
}
case '\\':
if !p.peekByte('\n') {
break skipSpace
}
p.rune()
r = p.rune()
default:
break skipSpace
}
}
if p.pos = p.getPos(); r > utf8.RuneSelf {
p.pos -= Pos(utf8.RuneLen(r) - 1)
}
switch {
case p.quote&allRegTokens != 0:
switch r {
case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
p.tok = p.regToken(r)
case '#':
r = p.rune()
p.newLit(r)
for r != utf8.RuneSelf && r != '\n' {
r = p.rune()
}
if p.keepComments {
p.f.Comments = append(p.f.Comments, &Comment{
Hash: p.pos,
Text: p.endLit(),
})
} else {
p.litBs = nil
}
p.next()
case '?', '*', '+', '@', '!':
if p.peekByte('(') {
switch r {
case '?':
p.tok = globQuest
case '*':
p.tok = globStar
case '+':
p.tok = globPlus
case '@':
p.tok = globAt
default: // '!'
p.tok = globExcl
}
p.rune()
p.rune()
} else {
p.advanceLitNone(r)
}
default:
p.advanceLitNone(r)
}
case p.quote&allArithmExpr != 0 && arithmOps(r):
p.tok = p.arithmToken(r)
case p.quote&allParamExp != 0 && paramOps(r):
p.tok = p.paramToken(r)
case p.quote == testRegexp:
if regOps(r) && r != '(' {
p.tok = p.regToken(r)
} else {
p.advanceLitRe(r)
}
case regOps(r):
p.tok = p.regToken(r)
default:
p.advanceLitOther(r)
}
if p.err != nil && p.tok != _EOF {
p.tok = _EOF
}
}
func (p *Parser) peekByte(b byte) bool {
if p.npos == len(p.bs) && p.readErr == nil {
p.fill()
}
return p.npos < len(p.bs) && p.bs[p.npos] == b
}
func (p *Parser) regToken(r rune) token {
switch r {
case '\'':
p.rune()
return sglQuote
case '"':
p.rune()
return dblQuote
case '`':
p.rune()
return bckQuote
case '&':
switch p.rune() {
case '&':
p.rune()
return andAnd
case '>':
if !p.bash() {
break
}
if p.rune() == '>' {
p.rune()
return appAll
}
return rdrAll
}
return and
case '|':
switch p.rune() {
case '|':
p.rune()
return orOr
case '&':
if !p.bash() {
break
}
p.rune()
return pipeAll
}
return or
case '$':
switch p.rune() {
case '\'':
if !p.bash() {
break
}
p.rune()
return dollSglQuote
case '"':
if !p.bash() {
break
}
p.rune()
return dollDblQuote
case '{':
p.rune()
return dollBrace
case '[':
if !p.bash() {
break
}
p.rune()
return dollBrack
case '(':
if p.rune() == '(' {
p.rune()
return dollDblParen
}
return dollParen
}
return dollar
case '(':
if p.rune() == '(' && p.bash() {
p.rune()
return dblLeftParen
}
return leftParen
case ')':
p.rune()
return rightParen
case ';':
switch p.rune() {
case ';':
if p.rune() == '&' && p.bash() {
p.rune()
return dblSemiFall
}
return dblSemicolon
case '&':
if !p.bash() {
break
}
p.rune()
return semiFall
}
return semicolon
case '<':
switch p.rune() {
case '<':
if r = p.rune(); r == '-' {
p.rune()
return dashHdoc
} else if r == '<' && p.bash() {
p.rune()
return wordHdoc
}
return hdoc
case '>':
p.rune()
return rdrInOut
case '&':
p.rune()
return dplIn
case '(':
if !p.bash() {
break
}
p.rune()
return cmdIn
}
return rdrIn
default: // '>'
switch p.rune() {
case '>':
p.rune()
return appOut
case '&':
p.rune()
return dplOut
case '|':
p.rune()
return clbOut
case '(':
if !p.bash() {
break
}
p.rune()
return cmdOut
}
return rdrOut
}
}
func (p *Parser) dqToken(r rune) token {
switch r {
case '"':
p.rune()
return dblQuote
case '`':
p.rune()
return bckQuote
default: // '$'
switch p.rune() {
case '{':
p.rune()
return dollBrace
case '[':
if !p.bash() {
break
}
p.rune()
return dollBrack
case '(':
if p.rune() == '(' {
p.rune()
return dollDblParen
}
return dollParen
}
return dollar
}
}
func (p *Parser) paramToken(r rune) token {
switch r {
case '}':
p.rune()
return rightBrace
case ':':
switch p.rune() {
case '+':
p.rune()
return colPlus
case '-':
p.rune()
return colMinus
case '?':
p.rune()
return colQuest
case '=':
p.rune()
return colAssgn
}
return colon
case '+':
p.rune()
return plus
case '-':
p.rune()
return minus
case '?':
p.rune()
return quest
case '=':
p.rune()
return assgn
case '%':
if p.rune() == '%' {
p.rune()
return dblPerc
}
return perc
case '#':
if p.rune() == '#' {
p.rune()
return dblHash
}
return hash
case '!':
p.rune()
return exclMark
case '[':
p.rune()
return leftBrack
case '/':
if p.rune() == '/' {
p.rune()
return dblSlash
}
return slash
case '^':
if p.rune() == '^' {
p.rune()
return dblCaret
}
return caret
case ',':
if p.rune() == ',' {
p.rune()
return dblComma
}
return comma
default: // '@'
p.rune()
return at
}
}
func (p *Parser) arithmToken(r rune) token {
switch r {
case '!':
if p.rune() == '=' {
p.rune()
return nequal
}
return exclMark
case '=':
if p.rune() == '=' {
p.rune()
return equal
}
return assgn
case '(':
p.rune()
return leftParen
case ')':
p.rune()
return rightParen
case '&':
switch p.rune() {
case '&':
p.rune()
return andAnd
case '=':
p.rune()
return andAssgn
}
return and
case '|':
switch p.rune() {
case '|':
p.rune()
return orOr
case '=':
p.rune()
return orAssgn
}
return or
case '<':
switch p.rune() {
case '<':
if p.rune() == '=' {
p.rune()
return shlAssgn
}
return hdoc
case '=':
p.rune()
return lequal
}
return rdrIn
case '>':
switch p.rune() {
case '>':
if p.rune() == '=' {
p.rune()
return shrAssgn
}
return appOut
case '=':
p.rune()
return gequal
}
return rdrOut
case '+':
switch p.rune() {
case '+':
p.rune()
return addAdd
case '=':
p.rune()
return addAssgn
}
return plus
case '-':
switch p.rune() {
case '-':
p.rune()
return subSub
case '=':
p.rune()
return subAssgn
}
return minus
case '%':
if p.rune() == '=' {
p.rune()
return remAssgn
}
return perc
case '*':
switch p.rune() {
case '*':
p.rune()
return power
case '=':
p.rune()
return mulAssgn
}
return star
case '/':
if p.rune() == '=' {
p.rune()
return quoAssgn
}
return slash
case '^':
if p.rune() == '=' {
p.rune()
return xorAssgn
}
return caret
case ']':
p.rune()
return rightBrack
case ',':
p.rune()
return comma
case '?':
p.rune()
return quest
default: // ':'
p.rune()
return colon
}
}
func (p *Parser) newLit(r rune) {
// don't let r == utf8.RuneSelf go to the second case as RuneLen
// would return -1
if r <= utf8.RuneSelf {
p.litBs = p.litBuf[:1]
p.litBs[0] = byte(r)
} else if p.npos <= len(p.bs) {
w := utf8.RuneLen(r)
p.litBs = append(p.litBuf[:0], p.bs[p.npos-w:p.npos]...)
}
}
func (p *Parser) discardLit(n int) { p.litBs = p.litBs[:len(p.litBs)-n] }
func (p *Parser) endLit() (s string) {
if p.r == utf8.RuneSelf {
s = string(p.litBs)
} else if len(p.litBs) > 0 {
s = string(p.litBs[:len(p.litBs)-1])
}
p.litBs = nil
return
}
func (p *Parser) advanceLitOther(r rune) {
tok := _LitWord
loop:
for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
switch r {
case '\\': // escaped byte follows
if r = p.rune(); r == '\n' {
p.discardLit(2)
}
case '\n':
switch p.quote {
case sglQuotes, paramExpRepl, paramExpExp:
default:
break loop
}
case '\'':
switch p.quote {
case paramExpExp, paramExpRepl:
default:
break loop
}
case '"', '`', '$':
if p.quote != sglQuotes {
tok = _Lit
break loop
}
case '}':
if p.quote&allParamExp != 0 {
break loop
}
case '/':
if p.quote&allParamExp != 0 && p.quote != paramExpExp {
break loop
}
case ']':
if p.quote&allRbrack != 0 {
break loop
}
case '!', '*':
if p.quote&allArithmExpr != 0 {
break loop
}
if p.quote == paramName && p.peekByte('(') {
tok = _Lit
break loop
}
case '?':
if p.quote == paramName && p.peekByte('(') {
tok = _Lit
break loop
}
fallthrough
case ':', '=', '%', '^', ',':
if p.quote&allArithmExpr != 0 || p.quote&allParamReg != 0 {
break loop
}
case '@':
if p.quote == paramName && p.peekByte('(') {
tok = _Lit
break loop
}
fallthrough
case '#', '[':
if p.quote&allParamReg != 0 {
break loop
}
if r == '[' && p.bash() && p.quote&allArithmExpr != 0 {
break loop
}
case '+':
if p.quote == paramName && p.peekByte('(') {
tok = _Lit
break loop
}
fallthrough
case '-':
switch p.quote {
case paramExpExp, paramExpRepl, sglQuotes:
default:
break loop
}
case ' ', '\t', ';', '&', '>', '<', '|', '(', ')', '\r':
switch p.quote {
case paramExpExp, paramExpRepl, sglQuotes:
default:
break loop
}
}
}
p.tok, p.val = tok, p.endLit()
}
func (p *Parser) advanceLitNone(r rune) {
p.asPos = 0
tok := _LitWord
loop:
for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
switch r {
case ' ', '\t', '\n', '\r', '&', '|', ';', '(', ')':
break loop
case '\\': // escaped byte follows
r = p.rune()
switch r {
case '\n':
p.discardLit(2)
case '\\':
// TODO: also treat escaped ` and $
// differently in backquotes
if p.quote == subCmdBckquo {
p.discardLit(1)
if r = p.rune(); r == '\\' {
p.discardLit(1)
p.rune()
}
}
}
case '>', '<':
if p.peekByte('(') {
tok = _Lit
} else {
tok = _LitRedir
}
break loop
case '`':
if p.quote != subCmdBckquo {
tok = _Lit
}
break loop
case '"', '\'', '$':
tok = _Lit
break loop
case '?', '*', '+', '@', '!':
if p.peekByte('(') {
tok = _Lit
break loop
}
case '=':
p.asPos = len(p.litBs) - 1
case '[':
if p.bash() && len(p.litBs) > 1 && p.litBs[0] != '[' {
tok = _Lit
break loop
}
}
}
p.tok, p.val = tok, p.endLit()
}
func (p *Parser) advanceLitDquote(r rune) {
tok := _LitWord
loop:
for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
switch r {
case '"':
break loop
case '\\': // escaped byte follows
p.rune()
case '`', '$':
tok = _Lit
break loop
}
}
p.tok, p.val = tok, p.endLit()
}
func (p *Parser) advanceLitHdoc(r rune) {
p.tok = _Lit
p.newLit(r)
if p.quote == hdocBodyTabs {
for r == '\t' {
r = p.rune()
}
}
lStart := len(p.litBs) - 1
loop:
for ; r != utf8.RuneSelf; r = p.rune() {
switch r {
case '`', '$':
break loop
case '\\': // escaped byte follows
p.rune()
case '\n':
if bytes.Equal(p.litBs[lStart:len(p.litBs)-1], p.hdocStop) {
p.val = p.endLit()[:lStart]
p.hdocStop = nil
return
}
if p.quote == hdocBodyTabs {
for p.peekByte('\t') {
p.rune()
}
}
lStart = len(p.litBs)
}
}
if bytes.Equal(p.litBs[lStart:], p.hdocStop) {
p.val = p.endLit()[:lStart]
p.hdocStop = nil
} else {
p.val = p.endLit()
}
}
func (p *Parser) hdocLitWord() *Word {
r := p.r
p.newLit(r)
pos, val := p.getPos(), ""
for {
if r == utf8.RuneSelf {
val = p.endLit()
break
}
if p.quote == hdocBodyTabs {
for r == '\t' {
r = p.rune()
}
}
lStart := len(p.litBs) - 1
for r != utf8.RuneSelf && r != '\n' {
r = p.rune()
}
lEnd := len(p.litBs)
if r != utf8.RuneSelf {
lEnd--
}
if bytes.Equal(p.litBs[lStart:lEnd], p.hdocStop) {
val = p.endLit()[:lStart]
break
}
r = p.rune()
}
l := p.lit(pos, val)
return p.word(p.wps(l))
}
func (p *Parser) advanceLitRe(r rune) {
lparens := 0
loop:
for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
switch r {
case '\\':
p.rune()
case '(':
lparens++
case ')':
lparens--
case ' ', '\t', '\r', '\n', ';':
if lparens == 0 {
break loop
}
}
}
p.tok, p.val = _LitWord, p.endLit()
}
func testUnaryOp(val string) token {
switch val {
case "!":
return exclMark
case "-e", "-a":
return tsExists
case "-f":
return tsRegFile
case "-d":
return tsDirect
case "-c":
return tsCharSp
case "-b":
return tsBlckSp
case "-p":
return tsNmPipe
case "-S":
return tsSocket
case "-L", "-h":
return tsSmbLink
case "-k":
return tsSticky
case "-g":
return tsGIDSet
case "-u":
return tsUIDSet
case "-G":
return tsGrpOwn
case "-O":
return tsUsrOwn
case "-N":
return tsModif
case "-r":
return tsRead
case "-w":
return tsWrite
case "-x":
return tsExec
case "-s":
return tsNoEmpty
case "-t":
return tsFdTerm
case "-z":
return tsEmpStr
case "-n":
return tsNempStr
case "-o":
return tsOptSet
case "-v":
return tsVarSet
case "-R":
return tsRefVar
default:
return illegalTok
}
}
func testBinaryOp(val string) token {
switch val {
case "==", "=":
return equal
case "!=":
return nequal
case "=~":
return tsReMatch
case "-nt":
return tsNewer
case "-ot":
return tsOlder
case "-ef":
return tsDevIno
case "-eq":
return tsEql
case "-ne":
return tsNeq
case "-le":
return tsLeq
case "-ge":
return tsGeq
case "-lt":
return tsLss
case "-gt":
return tsGtr
default:
return illegalTok
}
}