diff --git a/lexers/c/cql.go b/lexers/c/cql.go index 1126916..0d460b9 100644 --- a/lexers/c/cql.go +++ b/lexers/c/cql.go @@ -23,6 +23,15 @@ var CassandraCQL = internal.Register(MustNewLexer( {`(ascii|bigint|blob|boolean|counter|date|decimal|double|float|frozen|inet|int|list|map|set|smallint|text|time|timestamp|timeuuid|tinyint|tuple|uuid|varchar|varint)\b`, NameBuiltin, nil}, {Words(``, `\b`, `ADD`, `AGGREGATE`, `ALL`, `ALLOW`, `ALTER`, `AND`, `ANY`, `APPLY`, `AS`, `ASC`, `AUTHORIZE`, `BATCH`, `BEGIN`, `BY`, `CLUSTERING`, `COLUMNFAMILY`, `COMPACT`, `CONSISTENCY`, `COUNT`, `CREATE`, `CUSTOM`, `DELETE`, `DESC`, `DISTINCT`, `DROP`, `EACH_QUORUM`, `ENTRIES`, `EXISTS`, `FILTERING`, `FROM`, `FULL`, `GRANT`, `IF`, `IN`, `INDEX`, `INFINITY`, `INSERT`, `INTO`, `KEY`, `KEYS`, `KEYSPACE`, `KEYSPACES`, `LEVEL`, `LIMIT`, `LOCAL_ONE`, `LOCAL_QUORUM`, `MATERIALIZED`, `MODIFY`, `NAN`, `NORECURSIVE`, `NOSUPERUSER`, `NOT`, `OF`, `ON`, `ONE`, `ORDER`, `PARTITION`, `PASSWORD`, `PER`, `PERMISSION`, `PERMISSIONS`, `PRIMARY`, `QUORUM`, `RENAME`, `REVOKE`, `SCHEMA`, `SELECT`, `STATIC`, `STORAGE`, `SUPERUSER`, `TABLE`, `THREE`, `TO`, `TOKEN`, `TRUNCATE`, `TTL`, `TWO`, `TYPE`, `UNLOGGED`, `UPDATE`, `USE`, `USER`, `USERS`, `USING`, `VALUES`, `VIEW`, `WHERE`, `WITH`, `WRITETIME`, `REPLICATION`, `OR`, `REPLACE`, `FUNCTION`, `CALLED`, `INPUT`, `RETURNS`, `LANGUAGE`, `ROLE`, `ROLES`, `TRIGGER`, `DURABLE_WRITES`, `LOGIN`, `OPTIONS`, `LOGGED`, `SFUNC`, `STYPE`, `FINALFUNC`, `INITCOND`, `IS`, `CONTAINS`, `JSON`, `PAGING`, `OFF`), Keyword, nil}, {"[+*/<>=~!@#%^&|`?-]+", Operator, nil}, + {`(?s)(java|javascript)(\s+)(AS)(\s+)('|\$\$)(.*?)(\5)`, + UsingByGroup( + internal.Get, + 1, 6, + NameBuiltin, TextWhitespace, Keyword, TextWhitespace, + LiteralStringHeredoc, LiteralStringHeredoc, LiteralStringHeredoc, + ), + nil, + }, {`(true|false|null)\b`, KeywordConstant, nil}, {`0x[0-9a-f]+`, LiteralNumberHex, nil}, {`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`, LiteralNumberHex, nil}, diff --git a/lexers/m/markdown.go b/lexers/m/markdown.go index 753f986..b4690e3 100644 --- a/lexers/m/markdown.go +++ b/lexers/m/markdown.go @@ -21,8 +21,15 @@ var Markdown = internal.Register(MustNewLexer( {`^(\s*)([*-])(\s)(.+\n)`, ByGroups(Text, Keyword, Text, UsingSelf("inline")), nil}, {`^(\s*)([0-9]+\.)( .+\n)`, ByGroups(Text, Keyword, UsingSelf("inline")), nil}, {`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil}, - {"^(```\\n)([\\w\\W]*?)(^```$)", ByGroups(LiteralString, Text, LiteralString), nil}, - {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)", EmitterFunc(markdownCodeBlock), nil}, + {"^(```\\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil}, + {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)", + UsingByGroup( + internal.Get, + 2, 4, + String, String, String, Text, String, + ), + nil, + }, Include("inline"), }, "inline": { @@ -38,26 +45,3 @@ var Markdown = internal.Register(MustNewLexer( }, }, )) - -func markdownCodeBlock(groups []string, lexer Lexer) Iterator { - iterators := []Iterator{} - tokens := []*Token{ - {String, groups[1]}, - {String, groups[2]}, - {Text, groups[3]}, - } - code := groups[4] - lexer = internal.Get(groups[2]) - if lexer == nil { - tokens = append(tokens, &Token{String, code}) - iterators = append(iterators, Literator(tokens...)) - } else { - sub, err := lexer.Tokenise(nil, code) - if err != nil { - panic(err) - } - iterators = append(iterators, Literator(tokens...), sub) - } - iterators = append(iterators, Literator(&Token{String, groups[5]})) - return Concaterator(iterators...) -} diff --git a/lexers/p/postgres.go b/lexers/p/postgres.go index 3564330..3afa54d 100644 --- a/lexers/p/postgres.go +++ b/lexers/p/postgres.go @@ -21,6 +21,18 @@ var PostgreSQL = internal.Register(MustNewLexer( {`--.*\n?`, CommentSingle, nil}, {`/\*`, CommentMultiline, Push("multiline-comments")}, {`(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b`, NameBuiltin, nil}, + {`(?s)(DO)(\s+)(?:(LANGUAGE)?(\s+)('?)(\w+)?('?)(\s+))?(\$)([^$]*)(\$)(.*?)(\$)(\10)(\$)`, + UsingByGroup( + internal.Get, + 6, 12, + Keyword, Text, Keyword, Text, // DO LANGUAGE + StringSingle, StringSingle, StringSingle, Text, // 'plpgsql' + StringHeredoc, StringHeredoc, StringHeredoc, // $tag$ + StringHeredoc, // (code block) + StringHeredoc, StringHeredoc, StringHeredoc, // $tag$ + ), + nil, + }, {Words(``, `\b`, `ABORT`, `ABSOLUTE`, `ACCESS`, `ACTION`, `ADD`, `ADMIN`, `AFTER`, `AGGREGATE`, `ALL`, `ALSO`, `ALTER`, `ALWAYS`, `ANALYSE`, `ANALYZE`, `AND`, `ANY`, `ARRAY`, `AS`, `ASC`, `ASSERTION`, `ASSIGNMENT`, `ASYMMETRIC`, `AT`, `ATTRIBUTE`, `AUTHORIZATION`, `BACKWARD`, `BEFORE`, `BEGIN`, `BETWEEN`, `BIGINT`, `BINARY`, `BIT`, `BOOLEAN`, `BOTH`, `BY`, `CACHE`, `CALLED`, `CASCADE`, `CASCADED`, `CASE`, `CAST`, `CATALOG`, `CHAIN`, `CHAR`, `CHARACTER`, `CHARACTERISTICS`, `CHECK`, `CHECKPOINT`, `CLASS`, `CLOSE`, `CLUSTER`, `COALESCE`, `COLLATE`, `COLLATION`, `COLUMN`, `COMMENT`, `COMMENTS`, `COMMIT`, `COMMITTED`, `CONCURRENTLY`, `CONFIGURATION`, `CONNECTION`, `CONSTRAINT`, `CONSTRAINTS`, `CONTENT`, `CONTINUE`, `CONVERSION`, `COPY`, `COST`, `CREATE`, `CROSS`, `CSV`, `CURRENT`, `CURRENT_CATALOG`, `CURRENT_DATE`, `CURRENT_ROLE`, `CURRENT_SCHEMA`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `CURRENT_USER`, `CURSOR`, `CYCLE`, `DATA`, `DATABASE`, `DAY`, `DEALLOCATE`, `DEC`, `DECIMAL`, `DECLARE`, `DEFAULT`, `DEFAULTS`, `DEFERRABLE`, `DEFERRED`, `DEFINER`, `DELETE`, `DELIMITER`, `DELIMITERS`, `DESC`, `DICTIONARY`, `DISABLE`, `DISCARD`, `DISTINCT`, `DO`, `DOCUMENT`, `DOMAIN`, `DOUBLE`, `DROP`, `EACH`, `ELSE`, `ENABLE`, `ENCODING`, `ENCRYPTED`, `END`, `ENUM`, `ESCAPE`, `EVENT`, `EXCEPT`, `EXCLUDE`, `EXCLUDING`, `EXCLUSIVE`, `EXECUTE`, `EXISTS`, `EXPLAIN`, `EXTENSION`, `EXTERNAL`, `EXTRACT`, `FALSE`, `FAMILY`, `FETCH`, `FILTER`, `FIRST`, `FLOAT`, `FOLLOWING`, `FOR`, `FORCE`, `FOREIGN`, `FORWARD`, `FREEZE`, `FROM`, `FULL`, `FUNCTION`, `FUNCTIONS`, `GLOBAL`, `GRANT`, `GRANTED`, `GREATEST`, `GROUP`, `HANDLER`, `HAVING`, `HEADER`, `HOLD`, `HOUR`, `IDENTITY`, `IF`, `ILIKE`, `IMMEDIATE`, `IMMUTABLE`, `IMPLICIT`, `IN`, `INCLUDING`, `INCREMENT`, `INDEX`, `INDEXES`, `INHERIT`, `INHERITS`, `INITIALLY`, `INLINE`, `INNER`, `INOUT`, `INPUT`, `INSENSITIVE`, `INSERT`, `INSTEAD`, `INT`, `INTEGER`, `INTERSECT`, `INTERVAL`, `INTO`, `INVOKER`, `IS`, `ISNULL`, `ISOLATION`, `JOIN`, `KEY`, `LABEL`, `LANGUAGE`, `LARGE`, `LAST`, `LATERAL`, `LC_COLLATE`, `LC_CTYPE`, `LEADING`, `LEAKPROOF`, `LEAST`, `LEFT`, `LEVEL`, `LIKE`, `LIMIT`, `LISTEN`, `LOAD`, `LOCAL`, `LOCALTIME`, `LOCALTIMESTAMP`, `LOCATION`, `LOCK`, `MAPPING`, `MATCH`, `MATERIALIZED`, `MAXVALUE`, `MINUTE`, `MINVALUE`, `MODE`, `MONTH`, `MOVE`, `NAME`, `NAMES`, `NATIONAL`, `NATURAL`, `NCHAR`, `NEXT`, `NO`, `NONE`, `NOT`, `NOTHING`, `NOTIFY`, `NOTNULL`, `NOWAIT`, `NULL`, `NULLIF`, `NULLS`, `NUMERIC`, `OBJECT`, `OF`, `OFF`, `OFFSET`, `OIDS`, `ON`, `ONLY`, `OPERATOR`, `OPTION`, `OPTIONS`, `OR`, `ORDER`, `ORDINALITY`, `OUT`, `OUTER`, `OVER`, `OVERLAPS`, `OVERLAY`, `OWNED`, `OWNER`, `PARSER`, `PARTIAL`, `PARTITION`, `PASSING`, `PASSWORD`, `PLACING`, `PLANS`, `POLICY`, `POSITION`, `PRECEDING`, `PRECISION`, `PREPARE`, `PREPARED`, `PRESERVE`, `PRIMARY`, `PRIOR`, `PRIVILEGES`, `PROCEDURAL`, `PROCEDURE`, `PROGRAM`, `QUOTE`, `RANGE`, `READ`, `REAL`, `REASSIGN`, `RECHECK`, `RECURSIVE`, `REF`, `REFERENCES`, `REFRESH`, `REINDEX`, `RELATIVE`, `RELEASE`, `RENAME`, `REPEATABLE`, `REPLACE`, `REPLICA`, `RESET`, `RESTART`, `RESTRICT`, `RETURNING`, `RETURNS`, `REVOKE`, `RIGHT`, `ROLE`, `ROLLBACK`, `ROW`, `ROWS`, `RULE`, `SAVEPOINT`, `SCHEMA`, `SCROLL`, `SEARCH`, `SECOND`, `SECURITY`, `SELECT`, `SEQUENCE`, `SEQUENCES`, `SERIALIZABLE`, `SERVER`, `SESSION`, `SESSION_USER`, `SET`, `SETOF`, `SHARE`, `SHOW`, `SIMILAR`, `SIMPLE`, `SMALLINT`, `SNAPSHOT`, `SOME`, `STABLE`, `STANDALONE`, `START`, `STATEMENT`, `STATISTICS`, `STDIN`, `STDOUT`, `STORAGE`, `STRICT`, `STRIP`, `SUBSTRING`, `SYMMETRIC`, `SYSID`, `SYSTEM`, `TABLE`, `TABLES`, `TABLESPACE`, `TEMP`, `TEMPLATE`, `TEMPORARY`, `TEXT`, `THEN`, `TIME`, `TIMESTAMP`, `TO`, `TRAILING`, `TRANSACTION`, `TREAT`, `TRIGGER`, `TRIM`, `TRUE`, `TRUNCATE`, `TRUSTED`, `TYPE`, `TYPES`, `UNBOUNDED`, `UNCOMMITTED`, `UNENCRYPTED`, `UNION`, `UNIQUE`, `UNKNOWN`, `UNLISTEN`, `UNLOGGED`, `UNTIL`, `UPDATE`, `USER`, `USING`, `VACUUM`, `VALID`, `VALIDATE`, `VALIDATOR`, `VALUE`, `VALUES`, `VARCHAR`, `VARIADIC`, `VARYING`, `VERBOSE`, `VERSION`, `VIEW`, `VIEWS`, `VOLATILE`, `WHEN`, `WHERE`, `WHITESPACE`, `WINDOW`, `WITH`, `WITHIN`, `WITHOUT`, `WORK`, `WRAPPER`, `WRITE`, `XML`, `XMLATTRIBUTES`, `XMLCONCAT`, `XMLELEMENT`, `XMLEXISTS`, `XMLFOREST`, `XMLPARSE`, `XMLPI`, `XMLROOT`, `XMLSERIALIZE`, `YEAR`, `YES`, `ZONE`), Keyword, nil}, {"[+*/<>=~!@#%^&|`?-]+", Operator, nil}, {`::`, Operator, nil}, @@ -29,7 +41,18 @@ var PostgreSQL = internal.Register(MustNewLexer( {`[0-9]+`, LiteralNumberInteger, nil}, {`((?:E|U&)?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("string")}, {`((?:U&)?)(")`, ByGroups(LiteralStringAffix, LiteralStringName), Push("quoted-ident")}, - // { `(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, ?? ??, nil }, + {`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)(\s+)(LANGUAGE)?(\s+)('?)(\w+)?('?)`, + UsingByGroup(internal.Get, + 12, 4, + StringHeredoc, StringHeredoc, StringHeredoc, // $tag$ + StringHeredoc, // (code block) + StringHeredoc, StringHeredoc, StringHeredoc, // $tag$ + Text, Keyword, Text, // LANGUAGE + StringSingle, StringSingle, StringSingle, // 'type' + ), + nil, + }, + {`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, LiteralStringHeredoc, nil}, {`[a-z_]\w*`, Name, nil}, {`:(['"]?)[a-z]\w*\b\1`, NameVariable, nil}, {`[;:()\[\]{},.]`, Punctuation, nil}, diff --git a/lexers/testdata/cql.actual b/lexers/testdata/cql.actual index 7925ecb..727171d 100644 --- a/lexers/testdata/cql.actual +++ b/lexers/testdata/cql.actual @@ -54,3 +54,15 @@ CREATE MATERIALIZED VIEW cyclist_by_country AS SELECT age, birthday, name, count CREATE MATERIALIZED VIEW cyclist_by_birthday AS SELECT age, birthday, name, country FROM cyclist_mv WHERE birthday is NOT NULL AND cid IS NOT NULL PRIMARY KEY (birthday, cid); DROP MATERIALIZED VIEW cyclist_by_age; INSERT INTO cycling.calendar (race_id, race_name, race_start_date, race_end_date) VALUES (200, 'placeholder', '2015-05-27', '2015-05-27') USING TIMESTAMP 123456789; + +CREATE FUNCTION IF NOT EXISTS cycling.left (column TEXT,num int) +RETURNS NULL ON NULL INPUT +RETURNS text +LANGUAGE javascript AS $$ + column.substring(0,num) +$$; + +CREATE OR REPLACE FUNCTION cycling.fLog (input double) +CALLED ON NULL INPUT +RETURNS double LANGUAGE java AS +'return Double.valueOf(Math.log(input.doubleValue()));'; diff --git a/lexers/testdata/cql.expected b/lexers/testdata/cql.expected index 17da6ba..bc5f849 100644 --- a/lexers/testdata/cql.expected +++ b/lexers/testdata/cql.expected @@ -969,5 +969,117 @@ {"type":"TextWhitespace","value":" "}, {"type":"LiteralNumberFloat","value":"123456789"}, {"type":"Punctuation","value":";"}, + {"type":"TextWhitespace","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"FUNCTION"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"NOT"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"EXISTS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Name","value":"cycling"}, + {"type":"Punctuation","value":"."}, + {"type":"Name","value":"left"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"("}, + {"type":"Name","value":"column"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"TEXT"}, + {"type":"Punctuation","value":","}, + {"type":"Name","value":"num"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"int"}, + {"type":"Punctuation","value":")"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"ON"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"INPUT"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"text"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"LANGUAGE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"javascript"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"AS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Text","value":" \n "}, + {"type":"NameOther","value":"column"}, + {"type":"Punctuation","value":"."}, + {"type":"NameOther","value":"substring"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralNumberInteger","value":"0"}, + {"type":"Punctuation","value":","}, + {"type":"NameOther","value":"num"}, + {"type":"Punctuation","value":")"}, + {"type":"Text","value":" \n"}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Punctuation","value":";"}, + {"type":"TextWhitespace","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"OR"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"REPLACE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"FUNCTION"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Name","value":"cycling"}, + {"type":"Punctuation","value":"."}, + {"type":"Name","value":"fLog"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"("}, + {"type":"Keyword","value":"input"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"double"}, + {"type":"Punctuation","value":")"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"CALLED"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"ON"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"INPUT"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"double"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"LANGUAGE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"java"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"AS"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"LiteralStringHeredoc","value":"'"}, + {"type":"Keyword","value":"return"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"Double"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"valueOf"}, + {"type":"Operator","value":"("}, + {"type":"Name","value":"Math"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"log"}, + {"type":"Operator","value":"("}, + {"type":"Name","value":"input"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"doubleValue"}, + {"type":"Operator","value":"()));"}, + {"type":"LiteralStringHeredoc","value":"'"}, + {"type":"Punctuation","value":";"}, {"type":"TextWhitespace","value":"\n"} ] diff --git a/lexers/testdata/markdown.actual b/lexers/testdata/markdown.actual new file mode 100644 index 0000000..3fc0a50 --- /dev/null +++ b/lexers/testdata/markdown.actual @@ -0,0 +1,81 @@ +# about + +## user defined function in cql + +```javascript + column.substring(0,num) +``` + +```cql +CREATE FUNCTION IF NOT EXISTS cycling.left (column TEXT,num int) +RETURNS NULL ON NULL INPUT +RETURNS text +LANGUAGE javascript AS $$ + column.substring(0,num) +$$; + +CREATE OR REPLACE FUNCTION cycling.fLog (input double) +CALLED ON NULL INPUT +RETURNS double LANGUAGE java AS +'return Double.valueOf(Math.log(input.doubleValue()));'; +``` + +```postgres +DROP TABLE IF EXISTS emp CASCADE; + +CREATE TABLE emp ( + empname text, + salary integer, + last_date timestamp, + last_user text +); + +select + $my_tag$aoeuaoeu$my_tag$ as blah +; + +CREATE OR REPLACE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$ +BEGIN + -- Check that empname and salary are given + IF NEW.empname IS NULL THEN + RAISE EXCEPTION 'empname cannot be null'; + END IF; + IF NEW.salary IS NULL THEN + RAISE EXCEPTION '% cannot have null salary', NEW.empname; + END IF; + + -- Who works for us when she must pay for it? + IF NEW.salary < 0 THEN + RAISE EXCEPTION '% cannot have a negative salary', NEW.empname; + END IF; + + -- Remember who changed the payroll when + NEW.last_date := current_timestamp; + NEW.last_user := current_user; + RETURN NEW; +END; +$emp_stamp$ LANGUAGE plpgsql; + +CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp + FOR EACH ROW EXECUTE PROCEDURE emp_stamp(); + +DO language plpgsql $$ +declare r record; +begin + for r in select * from books +loop + execute 'select ''' || r.title || ''''; +end loop; +end +$$; + +DO $$ +declare r record; +begin + for r in select * from books +loop + execute 'select ''' || r.title || ''''; +end loop; +end +$$; +``` diff --git a/lexers/testdata/markdown.expected b/lexers/testdata/markdown.expected new file mode 100644 index 0000000..2b5aa84 --- /dev/null +++ b/lexers/testdata/markdown.expected @@ -0,0 +1,427 @@ +[ + {"type":"GenericHeading","value":"#"}, + {"type":"Text","value":" about\n"}, + {"type":"Error","value":"\n"}, + {"type":"GenericSubheading","value":"##"}, + {"type":"Text","value":" user defined function in cql\n"}, + {"type":"Error","value":"\n"}, + {"type":"LiteralString","value":"```javascript\n"}, + {"type":"Text","value":" "}, + {"type":"NameOther","value":"column"}, + {"type":"Punctuation","value":"."}, + {"type":"NameOther","value":"substring"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralNumberInteger","value":"0"}, + {"type":"Punctuation","value":","}, + {"type":"NameOther","value":"num"}, + {"type":"Punctuation","value":")"}, + {"type":"Text","value":" \n"}, + {"type":"LiteralString","value":"```"}, + {"type":"Error","value":"\n\n"}, + {"type":"LiteralString","value":"```cql\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"FUNCTION"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"NOT"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"EXISTS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Name","value":"cycling"}, + {"type":"Punctuation","value":"."}, + {"type":"Name","value":"left"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"("}, + {"type":"Name","value":"column"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"TEXT"}, + {"type":"Punctuation","value":","}, + {"type":"Name","value":"num"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"int"}, + {"type":"Punctuation","value":")"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"ON"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"INPUT"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"text"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"LANGUAGE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"javascript"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"AS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Text","value":" \n "}, + {"type":"NameOther","value":"column"}, + {"type":"Punctuation","value":"."}, + {"type":"NameOther","value":"substring"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralNumberInteger","value":"0"}, + {"type":"Punctuation","value":","}, + {"type":"NameOther","value":"num"}, + {"type":"Punctuation","value":")"}, + {"type":"Text","value":" \n"}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Punctuation","value":";"}, + {"type":"TextWhitespace","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"OR"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"REPLACE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"FUNCTION"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Name","value":"cycling"}, + {"type":"Punctuation","value":"."}, + {"type":"Name","value":"fLog"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"("}, + {"type":"Keyword","value":"input"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"double"}, + {"type":"Punctuation","value":")"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"CALLED"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"ON"}, + {"type":"TextWhitespace","value":" "}, + {"type":"KeywordConstant","value":"NULL"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"INPUT"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"double"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"LANGUAGE"}, + {"type":"TextWhitespace","value":" "}, + {"type":"NameBuiltin","value":"java"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Keyword","value":"AS"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"LiteralStringHeredoc","value":"'"}, + {"type":"Keyword","value":"return"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"Double"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"valueOf"}, + {"type":"Operator","value":"("}, + {"type":"Name","value":"Math"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"log"}, + {"type":"Operator","value":"("}, + {"type":"Name","value":"input"}, + {"type":"Operator","value":"."}, + {"type":"NameAttribute","value":"doubleValue"}, + {"type":"Operator","value":"()));"}, + {"type":"LiteralStringHeredoc","value":"'"}, + {"type":"Punctuation","value":";"}, + {"type":"TextWhitespace","value":"\n"}, + {"type":"LiteralString","value":"```"}, + {"type":"Error","value":"\n\n"}, + {"type":"LiteralString","value":"```postgres\n"}, + {"type":"Keyword","value":"DROP"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"TABLE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EXISTS"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"CASCADE"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"TABLE"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"("}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"empname"}, + {"type":"Text","value":" "}, + {"type":"NameBuiltin","value":"text"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"salary"}, + {"type":"Text","value":" "}, + {"type":"NameBuiltin","value":"integer"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"last_date"}, + {"type":"Text","value":" "}, + {"type":"NameBuiltin","value":"timestamp"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"last_user"}, + {"type":"Text","value":" "}, + {"type":"NameBuiltin","value":"text"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":");"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"select"}, + {"type":"Text","value":" \n "}, + {"type":"LiteralStringHeredoc","value":"$my_tag$aoeuaoeu$my_tag$"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"as"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"blah"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"OR"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"REPLACE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"FUNCTION"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp_stamp"}, + {"type":"Punctuation","value":"()"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"RETURNS"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"trigger"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"AS"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringHeredoc","value":"$emp_stamp$"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"BEGIN"}, + {"type":"Text","value":"\n "}, + {"type":"CommentSingle","value":"-- Check that empname and salary are given\n"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"empname"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IS"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"NULL"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"THEN"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"RAISE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EXCEPTION"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"'empname cannot be null'"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"END"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"IF"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"salary"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IS"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"NULL"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"THEN"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"RAISE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EXCEPTION"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"'% cannot have null salary'"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"empname"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"END"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n "}, + {"type":"CommentSingle","value":"-- Who works for us when she must pay for it?\n"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"salary"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"\u003c"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberFloat","value":"0"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"THEN"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"RAISE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EXCEPTION"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"'% cannot have a negative salary'"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"empname"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"END"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"IF"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n "}, + {"type":"CommentSingle","value":"-- Remember who changed the payroll when\n"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"last_date"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":":="}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"current_timestamp"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"NEW"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"last_user"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":":="}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"current_user"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"RETURN"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"NEW"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"END"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"LiteralStringHeredoc","value":"$emp_stamp$"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"LANGUAGE"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"plpgsql"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"CREATE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"TRIGGER"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp_stamp"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"BEFORE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"INSERT"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"OR"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"UPDATE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"ON"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"FOR"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EACH"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"ROW"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"EXECUTE"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"PROCEDURE"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"emp_stamp"}, + {"type":"Punctuation","value":"();"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"DO"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"language"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"plpgsql"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"declare"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"r"}, + {"type":"Text","value":" "}, + {"type":"NameBuiltin","value":"record"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"begin"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"for"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"r"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"in"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"select"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"*"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"from"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"books"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"loop"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"execute"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"'select '''"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"||"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"r"}, + {"type":"LiteralNumberFloat","value":"."}, + {"type":"Name","value":"title"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"||"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringSingle","value":"''''"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"end"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"loop"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"end"}, + {"type":"Text","value":"\n"}, + {"type":"LiteralStringHeredoc","value":"$$"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"DO"}, + {"type":"Text","value":" "}, + {"type":"LiteralStringHeredoc","value":"$$\ndeclare r record;\nbegin\n for r in select * from books\nloop\n execute 'select ''' || r.title || '''';\nend loop;\nend\n$$"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"LiteralString","value":"```"}, + {"type":"Error","value":"\n"} +] diff --git a/regexp.go b/regexp.go index 75fb5d0..7c0586b 100644 --- a/regexp.go +++ b/regexp.go @@ -33,7 +33,7 @@ func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(grou func ByGroups(emitters ...Emitter) Emitter { return EmitterFunc(func(groups []string, lexer Lexer) Iterator { iterators := make([]Iterator, 0, len(groups)-1) - // NOTE: If this panics, there is a mismatch with groups. Uncomment the following line to debug. + // NOTE: If this panics, there is a mismatch with groups for i, group := range groups[1:] { iterators = append(iterators, emitters[i].Emit([]string{group}, lexer)) } @@ -41,6 +41,74 @@ func ByGroups(emitters ...Emitter) Emitter { }) } +// UsingByGroup emits tokens for the matched groups in the regex using a +// "sublexer". Used when lexing code blocks where the name of a sublexer is +// contained within the block, for example on a Markdown text block or SQL +// language block. +// +// The sublexer will be retrieved using sublexerGetFunc (typically +// internal.Get), using the captured value from the matched sublexerNameGroup. +// +// If sublexerGetFunc returns a non-nil lexer for the captured sublexerNameGroup, +// then tokens for the matched codeGroup will be emitted using the retrieved +// lexer. Otherwise, if the sublexer is nil, then tokens will be emitted from +// the passed emitter. +// +// Example: +// +// var Markdown = internal.Register(MustNewLexer( +// &Config{ +// Name: "markdown", +// Aliases: []string{"md", "mkd"}, +// Filenames: []string{"*.md", "*.mkd", "*.markdown"}, +// MimeTypes: []string{"text/x-markdown"}, +// }, +// Rules{ +// "root": { +// {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)", +// UsingByGroup( +// internal.Get, +// 2, 4, +// String, String, String, Text, String, +// ), +// nil, +// }, +// }, +// }, +// )) +// +// See the lexers/m/markdown.go for the complete example. +// +// Note: panic's if the number emitters does not equal the number of matched +// groups in the regex. +func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter { + return EmitterFunc(func(groups []string, lexer Lexer) Iterator { + // bounds check + if len(emitters) != len(groups)-1 { + panic("UsingByGroup expects number of emitters to be the same as len(groups)-1") + } + + // grab sublexer + sublexer := sublexerGetFunc(groups[sublexerNameGroup]) + + // build iterators + iterators := make([]Iterator, len(groups)-1) + for i, group := range groups[1:] { + if i == codeGroup-1 && sublexer != nil { + var err error + iterators[i], err = sublexer.Tokenise(nil, groups[codeGroup]) + if err != nil { + panic(err) + } + } else { + iterators[i] = emitters[i].Emit([]string{group}, lexer) + } + } + + return Concaterator(iterators...) + }) +} + // Using returns an Emitter that uses a given Lexer for parsing and emitting. func Using(lexer Lexer) Emitter { return EmitterFunc(func(groups []string, _ Lexer) Iterator {