// Copyright (c) 2016, Daniel Martí // See LICENSE for licensing information package syntax import "fmt" // Node represents an AST node. type Node interface { // Pos returns the first character of the node Pos() Pos // End returns the character immediately after the node End() Pos } // File is a shell program. type File struct { Name string Stmts []*Stmt Comments []*Comment lines []Pos } // Pos is the internal representation of a position within a source // file. type Pos uint32 // IsValid reports whether the position is valid. All positions in nodes // returned by Parse are valid. func (p Pos) IsValid() bool { return p > 0 } const maxPos = Pos(^uint32(0)) // Position describes a position within a source file including the line // and column location. A Position is valid if the line number is > 0. type Position struct { Filename string // if any Offset int // byte offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (in bytes) } // IsValid reports whether the position is valid. All positions in nodes // returned by Parse are valid. func (p Position) IsValid() bool { return p.Line > 0 } // String returns the position in the "file:line:column" form, or // "line:column" if there is no filename available. func (p Position) String() string { prefix := "" if p.Filename != "" { prefix = p.Filename + ":" } return fmt.Sprintf("%s%d:%d", prefix, p.Line, p.Column) } func (f *File) Pos() Pos { if len(f.Stmts) == 0 { return 0 } return f.Stmts[0].Pos() } func (f *File) End() Pos { if len(f.Stmts) == 0 { return 0 } return f.Stmts[len(f.Stmts)-1].End() } func (f *File) Position(p Pos) (pos Position) { pos.Filename = f.Name pos.Offset = int(p) - 1 if i := searchPos(f.lines, p); i >= 0 { pos.Line, pos.Column = i+1, int(p-f.lines[i]) } return } func searchPos(a []Pos, x Pos) int { i, j := 0, len(a) for i < j { h := i + (j-i)/2 if a[h] <= x { i = h + 1 } else { j = h } } return i - 1 } func posMax(p1, p2 Pos) Pos { if p2 > p1 { return p2 } return p1 } // Comment represents a single comment on a single line. type Comment struct { Hash Pos Text string } func (c *Comment) Pos() Pos { return c.Hash } func (c *Comment) End() Pos { return c.Hash + Pos(len(c.Text)) } // Stmt represents a statement, otherwise known as a compound command. // It is compromised of a command and other components that may come // before or after it. type Stmt struct { Cmd Command Position Pos Semicolon Pos Negated bool // ! stmt Background bool // stmt & Coprocess bool // mksh's |& Assigns []*Assign // a=x b=y stmt Redirs []*Redirect // stmt >a 0 { end = posMax(end, s.Assigns[len(s.Assigns)-1].End()) } if len(s.Redirs) > 0 { end = posMax(end, s.Redirs[len(s.Redirs)-1].End()) } return end } // Command represents all nodes that are simple commands, which are // directly placed in a Stmt. // // These are *CallExpr, *IfClause, *WhileClause, *ForClause, // *CaseClause, *Block, *Subshell, *BinaryCmd, *FuncDecl, *ArithmCmd, // *TestClause, *DeclClause, *LetClause, *TimeClause, and *CoprocClause. type Command interface { Node commandNode() } func (*CallExpr) commandNode() {} func (*IfClause) commandNode() {} func (*WhileClause) commandNode() {} func (*ForClause) commandNode() {} func (*CaseClause) commandNode() {} func (*Block) commandNode() {} func (*Subshell) commandNode() {} func (*BinaryCmd) commandNode() {} func (*FuncDecl) commandNode() {} func (*ArithmCmd) commandNode() {} func (*TestClause) commandNode() {} func (*DeclClause) commandNode() {} func (*LetClause) commandNode() {} func (*TimeClause) commandNode() {} func (*CoprocClause) commandNode() {} // Assign represents an assignment to a variable. type Assign struct { Append bool // += Naked bool // without '=' Name *Lit Index ArithmExpr // [i] Key *DblQuoted // ["k"] Value *Word // =val Array *ArrayExpr // =(arr) } func (a *Assign) Pos() Pos { return a.Name.Pos() } func (a *Assign) End() Pos { if a.Value != nil { return a.Value.End() } if a.Array != nil { return a.Array.End() } if a.Index != nil { return a.Index.End() + 2 } else if a.Key != nil { return a.Key.End() + 2 } if a.Naked { return a.Name.End() } return a.Name.End() + 1 } // Redirect represents an input/output redirection. type Redirect struct { OpPos Pos Op RedirOperator N *Lit // N> Word *Word // >word Hdoc *Word // here-document body } func (r *Redirect) Pos() Pos { if r.N != nil { return r.N.Pos() } return r.OpPos } func (r *Redirect) End() Pos { return r.Word.End() } // CallExpr represents a command execution or function call. type CallExpr struct { Args []*Word } func (c *CallExpr) Pos() Pos { return c.Args[0].Pos() } func (c *CallExpr) End() Pos { return c.Args[len(c.Args)-1].End() } // Subshell represents a series of commands that should be executed in a // nested shell environment. type Subshell struct { Lparen, Rparen Pos Stmts []*Stmt } func (s *Subshell) Pos() Pos { return s.Lparen } func (s *Subshell) End() Pos { return s.Rparen + 1 } // Block represents a series of commands that should be executed in a // nested scope. type Block struct { Lbrace, Rbrace Pos Stmts []*Stmt } func (b *Block) Pos() Pos { return b.Rbrace } func (b *Block) End() Pos { return b.Rbrace + 1 } // IfClause represents an if statement. type IfClause struct { If, Then, Else, Fi Pos CondStmts []*Stmt ThenStmts []*Stmt Elifs []*Elif ElseStmts []*Stmt } func (c *IfClause) Pos() Pos { return c.If } func (c *IfClause) End() Pos { return c.Fi + 2 } // Elif represents an "else if" case in an if clause. type Elif struct { Elif, Then Pos CondStmts []*Stmt ThenStmts []*Stmt } // WhileClause represents a while or an until clause. type WhileClause struct { While, Do, Done Pos Until bool CondStmts []*Stmt DoStmts []*Stmt } func (w *WhileClause) Pos() Pos { return w.While } func (w *WhileClause) End() Pos { return w.Done + 4 } // ForClause represents a for clause. type ForClause struct { For, Do, Done Pos Loop Loop DoStmts []*Stmt } func (f *ForClause) Pos() Pos { return f.For } func (f *ForClause) End() Pos { return f.Done + 4 } // Loop holds either *WordIter or *CStyleLoop. type Loop interface { Node loopNode() } func (*WordIter) loopNode() {} func (*CStyleLoop) loopNode() {} // WordIter represents the iteration of a variable over a series of // words in a for clause. type WordIter struct { Name *Lit Items []*Word } func (w *WordIter) Pos() Pos { return w.Name.Pos() } func (w *WordIter) End() Pos { return posMax(w.Name.End(), wordLastEnd(w.Items)) } // CStyleLoop represents the behaviour of a for clause similar to the C // language. // // This node will never appear when in PosixConformant mode. type CStyleLoop struct { Lparen, Rparen Pos Init, Cond, Post ArithmExpr } func (c *CStyleLoop) Pos() Pos { return c.Lparen } func (c *CStyleLoop) End() Pos { return c.Rparen + 2 } // BinaryCmd represents a binary expression between two statements. type BinaryCmd struct { OpPos Pos Op BinCmdOperator X, Y *Stmt } func (b *BinaryCmd) Pos() Pos { return b.X.Pos() } func (b *BinaryCmd) End() Pos { return b.Y.End() } // FuncDecl represents the declaration of a function. type FuncDecl struct { Position Pos RsrvWord bool // non-posix "function " style Name *Lit Body *Stmt } func (f *FuncDecl) Pos() Pos { return f.Position } func (f *FuncDecl) End() Pos { return f.Body.End() } // Word represents a non-empty list of nodes that are contiguous to each // other. The word is delimeted by word boundaries. type Word struct { Parts []WordPart } func (w *Word) Pos() Pos { return w.Parts[0].Pos() } func (w *Word) End() Pos { return w.Parts[len(w.Parts)-1].End() } // WordPart represents all nodes that can form a word. // // These are *Lit, *SglQuoted, *DblQuoted, *ParamExp, *CmdSubst, // *ArithmExp, *ProcSubst, and *ExtGlob. type WordPart interface { Node wordPartNode() } func (*Lit) wordPartNode() {} func (*SglQuoted) wordPartNode() {} func (*DblQuoted) wordPartNode() {} func (*ParamExp) wordPartNode() {} func (*CmdSubst) wordPartNode() {} func (*ArithmExp) wordPartNode() {} func (*ProcSubst) wordPartNode() {} func (*ExtGlob) wordPartNode() {} // Lit represents an unquoted string consisting of characters that were // not tokenized. type Lit struct { ValuePos, ValueEnd Pos Value string } func (l *Lit) Pos() Pos { return l.ValuePos } func (l *Lit) End() Pos { return l.ValueEnd } // SglQuoted represents a string within single quotes. type SglQuoted struct { Position Pos Dollar bool // $'' Value string } func (q *SglQuoted) Pos() Pos { return q.Position } func (q *SglQuoted) End() Pos { end := q.Position + 2 + Pos(len(q.Value)) if q.Dollar { end++ } return end } // DblQuoted represents a list of nodes within double quotes. type DblQuoted struct { Position Pos Dollar bool // $"" Parts []WordPart } func (q *DblQuoted) Pos() Pos { return q.Position } func (q *DblQuoted) End() Pos { if len(q.Parts) == 0 { if q.Dollar { return q.Position + 3 } return q.Position + 2 } return q.Parts[len(q.Parts)-1].End() + 1 } // CmdSubst represents a command substitution. type CmdSubst struct { Left, Right Pos Stmts []*Stmt TempFile bool // mksh's ${ foo;} ReplyVar bool // mksh's ${|foo;} } func (c *CmdSubst) Pos() Pos { return c.Left } func (c *CmdSubst) End() Pos { return c.Right + 1 } // ParamExp represents a parameter expansion. type ParamExp struct { Dollar, Rbrace Pos Short bool // $a instead of ${a} Indirect bool // ${!a} Length bool // ${#a} Width bool // ${%a} Param *Lit Index ArithmExpr // ${a[i]} Key *DblQuoted // ${a["k"]} Slice *Slice // ${a:x:y} Repl *Replace // ${a/x/y} Exp *Expansion // ${a:-b}, ${a#b}, etc } func (p *ParamExp) Pos() Pos { return p.Dollar } func (p *ParamExp) End() Pos { if !p.Short { return p.Rbrace + 1 } if p.Index != nil { return p.Index.End() + 1 } else if p.Key != nil { return p.Key.End() + 1 } return p.Param.End() } func (p *ParamExp) nakedIndex() bool { return p.Short && (p.Index != nil || p.Key != nil) } // Slice represents character slicing inside a ParamExp. // // This node will never appear when in PosixConformant mode. type Slice struct { Offset, Length ArithmExpr } // Replace represents a search and replace inside a ParamExp. type Replace struct { All bool Orig, With *Word } // Expansion represents string manipulation in a ParamExp other than // those covered by Replace. type Expansion struct { Op ParExpOperator Word *Word } // ArithmExp represents an arithmetic expansion. type ArithmExp struct { Left, Right Pos Bracket bool // deprecated $[expr] form Unsigned bool // mksh's $((# expr)) X ArithmExpr } func (a *ArithmExp) Pos() Pos { return a.Left } func (a *ArithmExp) End() Pos { if a.Bracket { return a.Right + 1 } return a.Right + 2 } // ArithmCmd represents an arithmetic command. // // This node will never appear when in PosixConformant mode. type ArithmCmd struct { Left, Right Pos Unsigned bool // mksh's ((# expr)) X ArithmExpr } func (a *ArithmCmd) Pos() Pos { return a.Left } func (a *ArithmCmd) End() Pos { return a.Right + 2 } // ArithmExpr represents all nodes that form arithmetic expressions. // // These are *BinaryArithm, *UnaryArithm, *ParenArithm, and *Word. type ArithmExpr interface { Node arithmExprNode() } func (*BinaryArithm) arithmExprNode() {} func (*UnaryArithm) arithmExprNode() {} func (*ParenArithm) arithmExprNode() {} func (*Word) arithmExprNode() {} // BinaryArithm represents a binary expression between two arithmetic // expression. // // If Op is any assign operator, X will be a word with a single *Lit // whose value is a valid name. // // Ternary operators like "a ? b : c" are fit into this structure. Thus, // if Op == Quest, Y will be a *BinaryArithm with Op == Colon. Op can // only be Colon in that scenario. type BinaryArithm struct { OpPos Pos Op BinAritOperator X, Y ArithmExpr } func (b *BinaryArithm) Pos() Pos { return b.X.Pos() } func (b *BinaryArithm) End() Pos { return b.Y.End() } // UnaryArithm represents an unary expression over a node, either before // or after it. // // If Op is Inc or Dec, X will be a word with a single *Lit whose value // is a valid name. type UnaryArithm struct { OpPos Pos Op UnAritOperator Post bool X ArithmExpr } func (u *UnaryArithm) Pos() Pos { if u.Post { return u.X.Pos() } return u.OpPos } func (u *UnaryArithm) End() Pos { if u.Post { return u.OpPos + 2 } return u.X.End() } // ParenArithm represents an expression within parentheses inside an // ArithmExp. type ParenArithm struct { Lparen, Rparen Pos X ArithmExpr } func (p *ParenArithm) Pos() Pos { return p.Lparen } func (p *ParenArithm) End() Pos { return p.Rparen + 1 } // CaseClause represents a case (switch) clause. type CaseClause struct { Case, Esac Pos Word *Word Items []*CaseItem } func (c *CaseClause) Pos() Pos { return c.Case } func (c *CaseClause) End() Pos { return c.Esac + 4 } // CaseItem represents a pattern list (case) within a CaseClause. type CaseItem struct { Op CaseOperator OpPos Pos Patterns []*Word Stmts []*Stmt } // TestClause represents a Bash extended test clause. // // This node will never appear when in PosixConformant mode. type TestClause struct { Left, Right Pos X TestExpr } func (t *TestClause) Pos() Pos { return t.Left } func (t *TestClause) End() Pos { return t.Right + 2 } // TestExpr represents all nodes that form arithmetic expressions. // // These are *BinaryTest, *UnaryTest, *ParenTest, and *Word. type TestExpr interface { Node testExprNode() } func (*BinaryTest) testExprNode() {} func (*UnaryTest) testExprNode() {} func (*ParenTest) testExprNode() {} func (*Word) testExprNode() {} // BinaryTest represents a binary expression between two arithmetic // expression. type BinaryTest struct { OpPos Pos Op BinTestOperator X, Y TestExpr } func (b *BinaryTest) Pos() Pos { return b.X.Pos() } func (b *BinaryTest) End() Pos { return b.Y.End() } // UnaryTest represents an unary expression over a node, either before // or after it. type UnaryTest struct { OpPos Pos Op UnTestOperator X TestExpr } func (u *UnaryTest) Pos() Pos { return u.OpPos } func (u *UnaryTest) End() Pos { return u.X.End() } // ParenTest represents an expression within parentheses inside an // TestExp. type ParenTest struct { Lparen, Rparen Pos X TestExpr } func (p *ParenTest) Pos() Pos { return p.Lparen } func (p *ParenTest) End() Pos { return p.Rparen + 1 } // DeclClause represents a Bash declare clause. // // This node will never appear when in PosixConformant mode. type DeclClause struct { Position Pos Variant string // "declare", "local", etc Opts []*Word Assigns []*Assign } func (d *DeclClause) Pos() Pos { return d.Position } func (d *DeclClause) End() Pos { if len(d.Assigns) > 0 { return d.Assigns[len(d.Assigns)-1].End() } return wordLastEnd(d.Opts) } // ArrayExpr represents a Bash array expression. // // This node will never appear when in PosixConformant mode. type ArrayExpr struct { Lparen, Rparen Pos Elems []*ArrayElem } func (a *ArrayExpr) Pos() Pos { return a.Lparen } func (a *ArrayExpr) End() Pos { return a.Rparen + 1 } type ArrayElem struct { Index ArithmExpr Key *DblQuoted Value *Word } func (a *ArrayElem) Pos() Pos { if a.Index != nil { return a.Index.Pos() } else if a.Key != nil { return a.Key.Pos() } return a.Value.Pos() } func (a *ArrayElem) End() Pos { return a.Value.End() } // ExtGlob represents a Bash extended globbing expression. Note that // these are parsed independently of whether shopt has been called or // not. // // This node will never appear when in PosixConformant mode. type ExtGlob struct { OpPos Pos Op GlobOperator Pattern *Lit } func (e *ExtGlob) Pos() Pos { return e.OpPos } func (e *ExtGlob) End() Pos { return e.Pattern.End() + 1 } // ProcSubst represents a Bash process substitution. // // This node will never appear when in PosixConformant mode. type ProcSubst struct { OpPos, Rparen Pos Op ProcOperator Stmts []*Stmt } func (s *ProcSubst) Pos() Pos { return s.OpPos } func (s *ProcSubst) End() Pos { return s.Rparen + 1 } // TimeClause represents a Bash time clause. // // This node will never appear when in PosixConformant mode. type TimeClause struct { Time Pos Stmt *Stmt } func (c *TimeClause) Pos() Pos { return c.Time } func (c *TimeClause) End() Pos { if c.Stmt == nil { return c.Time + 4 } return c.Stmt.End() } // CoprocClause represents a Bash coproc clause. // // This node will never appear when in PosixConformant mode. type CoprocClause struct { Coproc Pos Name *Lit Stmt *Stmt } func (c *CoprocClause) Pos() Pos { return c.Coproc } func (c *CoprocClause) End() Pos { return c.Stmt.End() } // LetClause represents a Bash let clause. // // This node will never appear when in PosixConformant mode. type LetClause struct { Let Pos Exprs []ArithmExpr } func (l *LetClause) Pos() Pos { return l.Let } func (l *LetClause) End() Pos { return l.Exprs[len(l.Exprs)-1].End() } func wordLastEnd(ws []*Word) Pos { if len(ws) == 0 { return 0 } return ws[len(ws)-1].End() }