1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-19 21:10:15 +02:00

Fix Dollar-Quoted Strings (postgres + cql)

This commit refactors code from the markdown lexer into the chroma
package, and alters the PostgreSQL and CQL lexers to make use of it.

Additionally, an example markdown with the various sublexers is added.
This commit is contained in:
Kenneth Shaw 2018-06-10 16:33:48 +07:00
parent 307ceefd96
commit 95d0a9381b
8 changed files with 743 additions and 27 deletions

View File

@ -23,6 +23,15 @@ var CassandraCQL = internal.Register(MustNewLexer(
{`(ascii|bigint|blob|boolean|counter|date|decimal|double|float|frozen|inet|int|list|map|set|smallint|text|time|timestamp|timeuuid|tinyint|tuple|uuid|varchar|varint)\b`, NameBuiltin, nil},
{Words(``, `\b`, `ADD`, `AGGREGATE`, `ALL`, `ALLOW`, `ALTER`, `AND`, `ANY`, `APPLY`, `AS`, `ASC`, `AUTHORIZE`, `BATCH`, `BEGIN`, `BY`, `CLUSTERING`, `COLUMNFAMILY`, `COMPACT`, `CONSISTENCY`, `COUNT`, `CREATE`, `CUSTOM`, `DELETE`, `DESC`, `DISTINCT`, `DROP`, `EACH_QUORUM`, `ENTRIES`, `EXISTS`, `FILTERING`, `FROM`, `FULL`, `GRANT`, `IF`, `IN`, `INDEX`, `INFINITY`, `INSERT`, `INTO`, `KEY`, `KEYS`, `KEYSPACE`, `KEYSPACES`, `LEVEL`, `LIMIT`, `LOCAL_ONE`, `LOCAL_QUORUM`, `MATERIALIZED`, `MODIFY`, `NAN`, `NORECURSIVE`, `NOSUPERUSER`, `NOT`, `OF`, `ON`, `ONE`, `ORDER`, `PARTITION`, `PASSWORD`, `PER`, `PERMISSION`, `PERMISSIONS`, `PRIMARY`, `QUORUM`, `RENAME`, `REVOKE`, `SCHEMA`, `SELECT`, `STATIC`, `STORAGE`, `SUPERUSER`, `TABLE`, `THREE`, `TO`, `TOKEN`, `TRUNCATE`, `TTL`, `TWO`, `TYPE`, `UNLOGGED`, `UPDATE`, `USE`, `USER`, `USERS`, `USING`, `VALUES`, `VIEW`, `WHERE`, `WITH`, `WRITETIME`, `REPLICATION`, `OR`, `REPLACE`, `FUNCTION`, `CALLED`, `INPUT`, `RETURNS`, `LANGUAGE`, `ROLE`, `ROLES`, `TRIGGER`, `DURABLE_WRITES`, `LOGIN`, `OPTIONS`, `LOGGED`, `SFUNC`, `STYPE`, `FINALFUNC`, `INITCOND`, `IS`, `CONTAINS`, `JSON`, `PAGING`, `OFF`), Keyword, nil},
{"[+*/<>=~!@#%^&|`?-]+", Operator, nil},
{`(?s)(java|javascript)(\s+)(AS)(\s+)('|\$\$)(.*?)(\5)`,
UsingByGroup(
internal.Get,
1, 6,
NameBuiltin, TextWhitespace, Keyword, TextWhitespace,
LiteralStringHeredoc, LiteralStringHeredoc, LiteralStringHeredoc,
),
nil,
},
{`(true|false|null)\b`, KeywordConstant, nil},
{`0x[0-9a-f]+`, LiteralNumberHex, nil},
{`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`, LiteralNumberHex, nil},

View File

@ -21,8 +21,15 @@ var Markdown = internal.Register(MustNewLexer(
{`^(\s*)([*-])(\s)(.+\n)`, ByGroups(Text, Keyword, Text, UsingSelf("inline")), nil},
{`^(\s*)([0-9]+\.)( .+\n)`, ByGroups(Text, Keyword, UsingSelf("inline")), nil},
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
{"^(```\\n)([\\w\\W]*?)(^```$)", ByGroups(LiteralString, Text, LiteralString), nil},
{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)", EmitterFunc(markdownCodeBlock), nil},
{"^(```\\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
UsingByGroup(
internal.Get,
2, 4,
String, String, String, Text, String,
),
nil,
},
Include("inline"),
},
"inline": {
@ -38,26 +45,3 @@ var Markdown = internal.Register(MustNewLexer(
},
},
))
func markdownCodeBlock(groups []string, lexer Lexer) Iterator {
iterators := []Iterator{}
tokens := []*Token{
{String, groups[1]},
{String, groups[2]},
{Text, groups[3]},
}
code := groups[4]
lexer = internal.Get(groups[2])
if lexer == nil {
tokens = append(tokens, &Token{String, code})
iterators = append(iterators, Literator(tokens...))
} else {
sub, err := lexer.Tokenise(nil, code)
if err != nil {
panic(err)
}
iterators = append(iterators, Literator(tokens...), sub)
}
iterators = append(iterators, Literator(&Token{String, groups[5]}))
return Concaterator(iterators...)
}

View File

@ -21,6 +21,18 @@ var PostgreSQL = internal.Register(MustNewLexer(
{`--.*\n?`, CommentSingle, nil},
{`/\*`, CommentMultiline, Push("multiline-comments")},
{`(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b`, NameBuiltin, nil},
{`(?s)(DO)(\s+)(?:(LANGUAGE)?(\s+)('?)(\w+)?('?)(\s+))?(\$)([^$]*)(\$)(.*?)(\$)(\10)(\$)`,
UsingByGroup(
internal.Get,
6, 12,
Keyword, Text, Keyword, Text, // DO LANGUAGE
StringSingle, StringSingle, StringSingle, Text, // 'plpgsql'
StringHeredoc, StringHeredoc, StringHeredoc, // $tag$
StringHeredoc, // (code block)
StringHeredoc, StringHeredoc, StringHeredoc, // $tag$
),
nil,
},
{Words(``, `\b`, `ABORT`, `ABSOLUTE`, `ACCESS`, `ACTION`, `ADD`, `ADMIN`, `AFTER`, `AGGREGATE`, `ALL`, `ALSO`, `ALTER`, `ALWAYS`, `ANALYSE`, `ANALYZE`, `AND`, `ANY`, `ARRAY`, `AS`, `ASC`, `ASSERTION`, `ASSIGNMENT`, `ASYMMETRIC`, `AT`, `ATTRIBUTE`, `AUTHORIZATION`, `BACKWARD`, `BEFORE`, `BEGIN`, `BETWEEN`, `BIGINT`, `BINARY`, `BIT`, `BOOLEAN`, `BOTH`, `BY`, `CACHE`, `CALLED`, `CASCADE`, `CASCADED`, `CASE`, `CAST`, `CATALOG`, `CHAIN`, `CHAR`, `CHARACTER`, `CHARACTERISTICS`, `CHECK`, `CHECKPOINT`, `CLASS`, `CLOSE`, `CLUSTER`, `COALESCE`, `COLLATE`, `COLLATION`, `COLUMN`, `COMMENT`, `COMMENTS`, `COMMIT`, `COMMITTED`, `CONCURRENTLY`, `CONFIGURATION`, `CONNECTION`, `CONSTRAINT`, `CONSTRAINTS`, `CONTENT`, `CONTINUE`, `CONVERSION`, `COPY`, `COST`, `CREATE`, `CROSS`, `CSV`, `CURRENT`, `CURRENT_CATALOG`, `CURRENT_DATE`, `CURRENT_ROLE`, `CURRENT_SCHEMA`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `CURRENT_USER`, `CURSOR`, `CYCLE`, `DATA`, `DATABASE`, `DAY`, `DEALLOCATE`, `DEC`, `DECIMAL`, `DECLARE`, `DEFAULT`, `DEFAULTS`, `DEFERRABLE`, `DEFERRED`, `DEFINER`, `DELETE`, `DELIMITER`, `DELIMITERS`, `DESC`, `DICTIONARY`, `DISABLE`, `DISCARD`, `DISTINCT`, `DO`, `DOCUMENT`, `DOMAIN`, `DOUBLE`, `DROP`, `EACH`, `ELSE`, `ENABLE`, `ENCODING`, `ENCRYPTED`, `END`, `ENUM`, `ESCAPE`, `EVENT`, `EXCEPT`, `EXCLUDE`, `EXCLUDING`, `EXCLUSIVE`, `EXECUTE`, `EXISTS`, `EXPLAIN`, `EXTENSION`, `EXTERNAL`, `EXTRACT`, `FALSE`, `FAMILY`, `FETCH`, `FILTER`, `FIRST`, `FLOAT`, `FOLLOWING`, `FOR`, `FORCE`, `FOREIGN`, `FORWARD`, `FREEZE`, `FROM`, `FULL`, `FUNCTION`, `FUNCTIONS`, `GLOBAL`, `GRANT`, `GRANTED`, `GREATEST`, `GROUP`, `HANDLER`, `HAVING`, `HEADER`, `HOLD`, `HOUR`, `IDENTITY`, `IF`, `ILIKE`, `IMMEDIATE`, `IMMUTABLE`, `IMPLICIT`, `IN`, `INCLUDING`, `INCREMENT`, `INDEX`, `INDEXES`, `INHERIT`, `INHERITS`, `INITIALLY`, `INLINE`, `INNER`, `INOUT`, `INPUT`, `INSENSITIVE`, `INSERT`, `INSTEAD`, `INT`, `INTEGER`, `INTERSECT`, `INTERVAL`, `INTO`, `INVOKER`, `IS`, `ISNULL`, `ISOLATION`, `JOIN`, `KEY`, `LABEL`, `LANGUAGE`, `LARGE`, `LAST`, `LATERAL`, `LC_COLLATE`, `LC_CTYPE`, `LEADING`, `LEAKPROOF`, `LEAST`, `LEFT`, `LEVEL`, `LIKE`, `LIMIT`, `LISTEN`, `LOAD`, `LOCAL`, `LOCALTIME`, `LOCALTIMESTAMP`, `LOCATION`, `LOCK`, `MAPPING`, `MATCH`, `MATERIALIZED`, `MAXVALUE`, `MINUTE`, `MINVALUE`, `MODE`, `MONTH`, `MOVE`, `NAME`, `NAMES`, `NATIONAL`, `NATURAL`, `NCHAR`, `NEXT`, `NO`, `NONE`, `NOT`, `NOTHING`, `NOTIFY`, `NOTNULL`, `NOWAIT`, `NULL`, `NULLIF`, `NULLS`, `NUMERIC`, `OBJECT`, `OF`, `OFF`, `OFFSET`, `OIDS`, `ON`, `ONLY`, `OPERATOR`, `OPTION`, `OPTIONS`, `OR`, `ORDER`, `ORDINALITY`, `OUT`, `OUTER`, `OVER`, `OVERLAPS`, `OVERLAY`, `OWNED`, `OWNER`, `PARSER`, `PARTIAL`, `PARTITION`, `PASSING`, `PASSWORD`, `PLACING`, `PLANS`, `POLICY`, `POSITION`, `PRECEDING`, `PRECISION`, `PREPARE`, `PREPARED`, `PRESERVE`, `PRIMARY`, `PRIOR`, `PRIVILEGES`, `PROCEDURAL`, `PROCEDURE`, `PROGRAM`, `QUOTE`, `RANGE`, `READ`, `REAL`, `REASSIGN`, `RECHECK`, `RECURSIVE`, `REF`, `REFERENCES`, `REFRESH`, `REINDEX`, `RELATIVE`, `RELEASE`, `RENAME`, `REPEATABLE`, `REPLACE`, `REPLICA`, `RESET`, `RESTART`, `RESTRICT`, `RETURNING`, `RETURNS`, `REVOKE`, `RIGHT`, `ROLE`, `ROLLBACK`, `ROW`, `ROWS`, `RULE`, `SAVEPOINT`, `SCHEMA`, `SCROLL`, `SEARCH`, `SECOND`, `SECURITY`, `SELECT`, `SEQUENCE`, `SEQUENCES`, `SERIALIZABLE`, `SERVER`, `SESSION`, `SESSION_USER`, `SET`, `SETOF`, `SHARE`, `SHOW`, `SIMILAR`, `SIMPLE`, `SMALLINT`, `SNAPSHOT`, `SOME`, `STABLE`, `STANDALONE`, `START`, `STATEMENT`, `STATISTICS`, `STDIN`, `STDOUT`, `STORAGE`, `STRICT`, `STRIP`, `SUBSTRING`, `SYMMETRIC`, `SYSID`, `SYSTEM`, `TABLE`, `TABLES`, `TABLESPACE`, `TEMP`, `TEMPLATE`, `TEMPORARY`, `TEXT`, `THEN`, `TIME`, `TIMESTAMP`, `TO`, `TRAILING`, `TRANSACTION`, `TREAT`, `TRIGGER`, `TRIM`, `TRUE`, `TRUNCATE`, `TRUSTED`, `TYPE`, `TYPES`, `UNBOUNDED`, `UNCOMMITTED`, `UNENCRYPTED`, `UNION`, `UNIQUE`, `UNKNOWN`, `UNLISTEN`, `UNLOGGED`, `UNTIL`, `UPDATE`, `USER`, `USING`, `VACUUM`, `VALID`, `VALIDATE`, `VALIDATOR`, `VALUE`, `VALUES`, `VARCHAR`, `VARIADIC`, `VARYING`, `VERBOSE`, `VERSION`, `VIEW`, `VIEWS`, `VOLATILE`, `WHEN`, `WHERE`, `WHITESPACE`, `WINDOW`, `WITH`, `WITHIN`, `WITHOUT`, `WORK`, `WRAPPER`, `WRITE`, `XML`, `XMLATTRIBUTES`, `XMLCONCAT`, `XMLELEMENT`, `XMLEXISTS`, `XMLFOREST`, `XMLPARSE`, `XMLPI`, `XMLROOT`, `XMLSERIALIZE`, `YEAR`, `YES`, `ZONE`), Keyword, nil},
{"[+*/<>=~!@#%^&|`?-]+", Operator, nil},
{`::`, Operator, nil},
@ -29,7 +41,18 @@ var PostgreSQL = internal.Register(MustNewLexer(
{`[0-9]+`, LiteralNumberInteger, nil},
{`((?:E|U&)?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("string")},
{`((?:U&)?)(")`, ByGroups(LiteralStringAffix, LiteralStringName), Push("quoted-ident")},
// { `(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, ?? <function language_callback at 0x101105400> ??, nil },
{`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)(\s+)(LANGUAGE)?(\s+)('?)(\w+)?('?)`,
UsingByGroup(internal.Get,
12, 4,
StringHeredoc, StringHeredoc, StringHeredoc, // $tag$
StringHeredoc, // (code block)
StringHeredoc, StringHeredoc, StringHeredoc, // $tag$
Text, Keyword, Text, // <space> LANGUAGE <space>
StringSingle, StringSingle, StringSingle, // 'type'
),
nil,
},
{`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, LiteralStringHeredoc, nil},
{`[a-z_]\w*`, Name, nil},
{`:(['"]?)[a-z]\w*\b\1`, NameVariable, nil},
{`[;:()\[\]{},.]`, Punctuation, nil},

View File

@ -54,3 +54,15 @@ CREATE MATERIALIZED VIEW cyclist_by_country AS SELECT age, birthday, name, count
CREATE MATERIALIZED VIEW cyclist_by_birthday AS SELECT age, birthday, name, country FROM cyclist_mv WHERE birthday is NOT NULL AND cid IS NOT NULL PRIMARY KEY (birthday, cid);
DROP MATERIALIZED VIEW cyclist_by_age;
INSERT INTO cycling.calendar (race_id, race_name, race_start_date, race_end_date) VALUES (200, 'placeholder', '2015-05-27', '2015-05-27') USING TIMESTAMP 123456789;
CREATE FUNCTION IF NOT EXISTS cycling.left (column TEXT,num int)
RETURNS NULL ON NULL INPUT
RETURNS text
LANGUAGE javascript AS $$
column.substring(0,num)
$$;
CREATE OR REPLACE FUNCTION cycling.fLog (input double)
CALLED ON NULL INPUT
RETURNS double LANGUAGE java AS
'return Double.valueOf(Math.log(input.doubleValue()));';

View File

@ -969,5 +969,117 @@
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberFloat","value":"123456789"},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"FUNCTION"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"NOT"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"EXISTS"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"cycling"},
{"type":"Punctuation","value":"."},
{"type":"Name","value":"left"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"("},
{"type":"Name","value":"column"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"TEXT"},
{"type":"Punctuation","value":","},
{"type":"Name","value":"num"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"int"},
{"type":"Punctuation","value":")"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"ON"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"INPUT"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"text"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"LANGUAGE"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"javascript"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"AS"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Text","value":" \n "},
{"type":"NameOther","value":"column"},
{"type":"Punctuation","value":"."},
{"type":"NameOther","value":"substring"},
{"type":"Punctuation","value":"("},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Punctuation","value":","},
{"type":"NameOther","value":"num"},
{"type":"Punctuation","value":")"},
{"type":"Text","value":" \n"},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"OR"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"REPLACE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"FUNCTION"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"cycling"},
{"type":"Punctuation","value":"."},
{"type":"Name","value":"fLog"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"("},
{"type":"Keyword","value":"input"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"double"},
{"type":"Punctuation","value":")"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"CALLED"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"ON"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"INPUT"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"double"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"LANGUAGE"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"java"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"AS"},
{"type":"TextWhitespace","value":"\n"},
{"type":"LiteralStringHeredoc","value":"'"},
{"type":"Keyword","value":"return"},
{"type":"Text","value":" "},
{"type":"Name","value":"Double"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"valueOf"},
{"type":"Operator","value":"("},
{"type":"Name","value":"Math"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"log"},
{"type":"Operator","value":"("},
{"type":"Name","value":"input"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"doubleValue"},
{"type":"Operator","value":"()));"},
{"type":"LiteralStringHeredoc","value":"'"},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":"\n"}
]

81
lexers/testdata/markdown.actual vendored Normal file
View File

@ -0,0 +1,81 @@
# about
## user defined function in cql
```javascript
column.substring(0,num)
```
```cql
CREATE FUNCTION IF NOT EXISTS cycling.left (column TEXT,num int)
RETURNS NULL ON NULL INPUT
RETURNS text
LANGUAGE javascript AS $$
column.substring(0,num)
$$;
CREATE OR REPLACE FUNCTION cycling.fLog (input double)
CALLED ON NULL INPUT
RETURNS double LANGUAGE java AS
'return Double.valueOf(Math.log(input.doubleValue()));';
```
```postgres
DROP TABLE IF EXISTS emp CASCADE;
CREATE TABLE emp (
empname text,
salary integer,
last_date timestamp,
last_user text
);
select
$my_tag$aoeuaoeu$my_tag$ as blah
;
CREATE OR REPLACE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$
BEGIN
-- Check that empname and salary are given
IF NEW.empname IS NULL THEN
RAISE EXCEPTION 'empname cannot be null';
END IF;
IF NEW.salary IS NULL THEN
RAISE EXCEPTION '% cannot have null salary', NEW.empname;
END IF;
-- Who works for us when she must pay for it?
IF NEW.salary < 0 THEN
RAISE EXCEPTION '% cannot have a negative salary', NEW.empname;
END IF;
-- Remember who changed the payroll when
NEW.last_date := current_timestamp;
NEW.last_user := current_user;
RETURN NEW;
END;
$emp_stamp$ LANGUAGE plpgsql;
CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp
FOR EACH ROW EXECUTE PROCEDURE emp_stamp();
DO language plpgsql $$
declare r record;
begin
for r in select * from books
loop
execute 'select ''' || r.title || '''';
end loop;
end
$$;
DO $$
declare r record;
begin
for r in select * from books
loop
execute 'select ''' || r.title || '''';
end loop;
end
$$;
```

427
lexers/testdata/markdown.expected vendored Normal file
View File

@ -0,0 +1,427 @@
[
{"type":"GenericHeading","value":"#"},
{"type":"Text","value":" about\n"},
{"type":"Error","value":"\n"},
{"type":"GenericSubheading","value":"##"},
{"type":"Text","value":" user defined function in cql\n"},
{"type":"Error","value":"\n"},
{"type":"LiteralString","value":"```javascript\n"},
{"type":"Text","value":" "},
{"type":"NameOther","value":"column"},
{"type":"Punctuation","value":"."},
{"type":"NameOther","value":"substring"},
{"type":"Punctuation","value":"("},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Punctuation","value":","},
{"type":"NameOther","value":"num"},
{"type":"Punctuation","value":")"},
{"type":"Text","value":" \n"},
{"type":"LiteralString","value":"```"},
{"type":"Error","value":"\n\n"},
{"type":"LiteralString","value":"```cql\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"FUNCTION"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"NOT"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"EXISTS"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"cycling"},
{"type":"Punctuation","value":"."},
{"type":"Name","value":"left"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"("},
{"type":"Name","value":"column"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"TEXT"},
{"type":"Punctuation","value":","},
{"type":"Name","value":"num"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"int"},
{"type":"Punctuation","value":")"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"ON"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"INPUT"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"text"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"LANGUAGE"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"javascript"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"AS"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Text","value":" \n "},
{"type":"NameOther","value":"column"},
{"type":"Punctuation","value":"."},
{"type":"NameOther","value":"substring"},
{"type":"Punctuation","value":"("},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Punctuation","value":","},
{"type":"NameOther","value":"num"},
{"type":"Punctuation","value":")"},
{"type":"Text","value":" \n"},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"OR"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"REPLACE"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"FUNCTION"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"cycling"},
{"type":"Punctuation","value":"."},
{"type":"Name","value":"fLog"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"("},
{"type":"Keyword","value":"input"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"double"},
{"type":"Punctuation","value":")"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"CALLED"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"ON"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordConstant","value":"NULL"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"INPUT"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":"RETURNS"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"double"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"LANGUAGE"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"java"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"AS"},
{"type":"TextWhitespace","value":"\n"},
{"type":"LiteralStringHeredoc","value":"'"},
{"type":"Keyword","value":"return"},
{"type":"Text","value":" "},
{"type":"Name","value":"Double"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"valueOf"},
{"type":"Operator","value":"("},
{"type":"Name","value":"Math"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"log"},
{"type":"Operator","value":"("},
{"type":"Name","value":"input"},
{"type":"Operator","value":"."},
{"type":"NameAttribute","value":"doubleValue"},
{"type":"Operator","value":"()));"},
{"type":"LiteralStringHeredoc","value":"'"},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":"\n"},
{"type":"LiteralString","value":"```"},
{"type":"Error","value":"\n\n"},
{"type":"LiteralString","value":"```postgres\n"},
{"type":"Keyword","value":"DROP"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"TABLE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EXISTS"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"CASCADE"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"TABLE"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"("},
{"type":"Text","value":"\n "},
{"type":"Name","value":"empname"},
{"type":"Text","value":" "},
{"type":"NameBuiltin","value":"text"},
{"type":"Punctuation","value":","},
{"type":"Text","value":"\n "},
{"type":"Name","value":"salary"},
{"type":"Text","value":" "},
{"type":"NameBuiltin","value":"integer"},
{"type":"Punctuation","value":","},
{"type":"Text","value":"\n "},
{"type":"Name","value":"last_date"},
{"type":"Text","value":" "},
{"type":"NameBuiltin","value":"timestamp"},
{"type":"Punctuation","value":","},
{"type":"Text","value":"\n "},
{"type":"Name","value":"last_user"},
{"type":"Text","value":" "},
{"type":"NameBuiltin","value":"text"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":");"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"select"},
{"type":"Text","value":" \n "},
{"type":"LiteralStringHeredoc","value":"$my_tag$aoeuaoeu$my_tag$"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"as"},
{"type":"Text","value":" "},
{"type":"Name","value":"blah"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"OR"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"REPLACE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"FUNCTION"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp_stamp"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"RETURNS"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"trigger"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"AS"},
{"type":"Text","value":" "},
{"type":"LiteralStringHeredoc","value":"$emp_stamp$"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"BEGIN"},
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"-- Check that empname and salary are given\n"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"empname"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IS"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"NULL"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"THEN"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"RAISE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EXCEPTION"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"'empname cannot be null'"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"END"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"IF"},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"salary"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IS"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"NULL"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"THEN"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"RAISE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EXCEPTION"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"'% cannot have null salary'"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"empname"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"END"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n "},
{"type":"CommentSingle","value":"-- Who works for us when she must pay for it?\n"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"salary"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u003c"},
{"type":"Text","value":" "},
{"type":"LiteralNumberFloat","value":"0"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"THEN"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"RAISE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EXCEPTION"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"'% cannot have a negative salary'"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"empname"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"END"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"IF"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n "},
{"type":"CommentSingle","value":"-- Remember who changed the payroll when\n"},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"last_date"},
{"type":"Text","value":" "},
{"type":"Operator","value":":="},
{"type":"Text","value":" "},
{"type":"Keyword","value":"current_timestamp"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"NEW"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"last_user"},
{"type":"Text","value":" "},
{"type":"Operator","value":":="},
{"type":"Text","value":" "},
{"type":"Keyword","value":"current_user"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"RETURN"},
{"type":"Text","value":" "},
{"type":"Name","value":"NEW"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"END"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"LiteralStringHeredoc","value":"$emp_stamp$"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"LANGUAGE"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"plpgsql"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"CREATE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"TRIGGER"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp_stamp"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"BEFORE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"INSERT"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"OR"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"UPDATE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"ON"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"FOR"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EACH"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"ROW"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"EXECUTE"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"PROCEDURE"},
{"type":"Text","value":" "},
{"type":"Name","value":"emp_stamp"},
{"type":"Punctuation","value":"();"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"DO"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"language"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"plpgsql"},
{"type":"Text","value":" "},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"declare"},
{"type":"Text","value":" "},
{"type":"Name","value":"r"},
{"type":"Text","value":" "},
{"type":"NameBuiltin","value":"record"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"begin"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"for"},
{"type":"Text","value":" "},
{"type":"Name","value":"r"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"in"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"select"},
{"type":"Text","value":" "},
{"type":"Operator","value":"*"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"from"},
{"type":"Text","value":" "},
{"type":"Name","value":"books"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"loop"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"execute"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"'select '''"},
{"type":"Text","value":" "},
{"type":"Operator","value":"||"},
{"type":"Text","value":" "},
{"type":"Name","value":"r"},
{"type":"LiteralNumberFloat","value":"."},
{"type":"Name","value":"title"},
{"type":"Text","value":" "},
{"type":"Operator","value":"||"},
{"type":"Text","value":" "},
{"type":"LiteralStringSingle","value":"''''"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"end"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"loop"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"Keyword","value":"end"},
{"type":"Text","value":"\n"},
{"type":"LiteralStringHeredoc","value":"$$"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"DO"},
{"type":"Text","value":" "},
{"type":"LiteralStringHeredoc","value":"$$\ndeclare r record;\nbegin\n for r in select * from books\nloop\n execute 'select ''' || r.title || '''';\nend loop;\nend\n$$"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"LiteralString","value":"```"},
{"type":"Error","value":"\n"}
]

View File

@ -33,7 +33,7 @@ func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(grou
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
iterators := make([]Iterator, 0, len(groups)-1)
// NOTE: If this panics, there is a mismatch with groups. Uncomment the following line to debug.
// NOTE: If this panics, there is a mismatch with groups
for i, group := range groups[1:] {
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
}
@ -41,6 +41,74 @@ func ByGroups(emitters ...Emitter) Emitter {
})
}
// UsingByGroup emits tokens for the matched groups in the regex using a
// "sublexer". Used when lexing code blocks where the name of a sublexer is
// contained within the block, for example on a Markdown text block or SQL
// language block.
//
// The sublexer will be retrieved using sublexerGetFunc (typically
// internal.Get), using the captured value from the matched sublexerNameGroup.
//
// If sublexerGetFunc returns a non-nil lexer for the captured sublexerNameGroup,
// then tokens for the matched codeGroup will be emitted using the retrieved
// lexer. Otherwise, if the sublexer is nil, then tokens will be emitted from
// the passed emitter.
//
// Example:
//
// var Markdown = internal.Register(MustNewLexer(
// &Config{
// Name: "markdown",
// Aliases: []string{"md", "mkd"},
// Filenames: []string{"*.md", "*.mkd", "*.markdown"},
// MimeTypes: []string{"text/x-markdown"},
// },
// Rules{
// "root": {
// {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
// UsingByGroup(
// internal.Get,
// 2, 4,
// String, String, String, Text, String,
// ),
// nil,
// },
// },
// },
// ))
//
// See the lexers/m/markdown.go for the complete example.
//
// Note: panic's if the number emitters does not equal the number of matched
// groups in the regex.
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
// bounds check
if len(emitters) != len(groups)-1 {
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
}
// grab sublexer
sublexer := sublexerGetFunc(groups[sublexerNameGroup])
// build iterators
iterators := make([]Iterator, len(groups)-1)
for i, group := range groups[1:] {
if i == codeGroup-1 && sublexer != nil {
var err error
iterators[i], err = sublexer.Tokenise(nil, groups[codeGroup])
if err != nil {
panic(err)
}
} else {
iterators[i] = emitters[i].Emit([]string{group}, lexer)
}
}
return Concaterator(iterators...)
})
}
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, _ Lexer) Iterator {