1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-17 20:58:08 +02:00
chroma/lexers/sql.go
Kenneth Shaw 9401ea963f Expand single and double quote string lexing in mysql, sql, and transactsql lexers
This PR converts the simple string lexing in the mysql, sql, and
transactsql lexers to an expanded node, bringing their behavior inline
with the postgres lexer.

The rationale for this change is that currently, when using Chroma to
lex a partial SQL query with these three lexers (for example, while in
the middle of writing a database query), a string will not be matched by
the mysql, sql, and transactsql lexers until the closing single or
double-quote is encountered.

This behavior can be seen by running the following in a terminal:

    $ echo "select 'aoeu 0x9" |chroma -l sql
    $ echo "select 'aoeu 0x9'" |chroma -l sql

With the mysql, sql, and transactsql lexers, the above two lines will
have dramatically different output. Comparatively, if using the postgres
lexer instead of the sql lexer, the above will output both lines
identically (excluding the closing quote).

This change will make for a better and more consistent user experience
in any package using chroma to highlight partial, incomplete, or
incorrect SQL, and is arguably "more correct", as all SQL lexers will
now behave the same (at least with regards to single/double-quoted
strings or quoted identifiers).
2017-11-24 13:44:34 +11:00

49 lines
7.2 KiB
Go

package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Sql lexer.
var Sql = Register(MustNewLexer(
&Config{
Name: "SQL",
Aliases: []string{"sql"},
Filenames: []string{"*.sql"},
MimeTypes: []string{"text/x-sql"},
NotMultiline: true,
CaseInsensitive: true,
},
Rules{
"root": {
{`\s+`, Text, nil},
{`--.*\n?`, CommentSingle, nil},
{`/\*`, CommentMultiline, Push("multiline-comments")},
{`'`, LiteralStringSingle, Push("string")},
{`"`, LiteralStringDouble, Push("double-string")},
{Words(``, `\b`, `ABORT`, `ABS`, `ABSOLUTE`, `ACCESS`, `ADA`, `ADD`, `ADMIN`, `AFTER`, `AGGREGATE`, `ALIAS`, `ALL`, `ALLOCATE`, `ALTER`, `ANALYSE`, `ANALYZE`, `AND`, `ANY`, `ARE`, `AS`, `ASC`, `ASENSITIVE`, `ASSERTION`, `ASSIGNMENT`, `ASYMMETRIC`, `AT`, `ATOMIC`, `AUTHORIZATION`, `AVG`, `BACKWARD`, `BEFORE`, `BEGIN`, `BETWEEN`, `BITVAR`, `BIT_LENGTH`, `BOTH`, `BREADTH`, `BY`, `C`, `CACHE`, `CALL`, `CALLED`, `CARDINALITY`, `CASCADE`, `CASCADED`, `CASE`, `CAST`, `CATALOG`, `CATALOG_NAME`, `CHAIN`, `CHARACTERISTICS`, `CHARACTER_LENGTH`, `CHARACTER_SET_CATALOG`, `CHARACTER_SET_NAME`, `CHARACTER_SET_SCHEMA`, `CHAR_LENGTH`, `CHECK`, `CHECKED`, `CHECKPOINT`, `CLASS`, `CLASS_ORIGIN`, `CLOB`, `CLOSE`, `CLUSTER`, `COALSECE`, `COBOL`, `COLLATE`, `COLLATION`, `COLLATION_CATALOG`, `COLLATION_NAME`, `COLLATION_SCHEMA`, `COLUMN`, `COLUMN_NAME`, `COMMAND_FUNCTION`, `COMMAND_FUNCTION_CODE`, `COMMENT`, `COMMIT`, `COMMITTED`, `COMPLETION`, `CONDITION_NUMBER`, `CONNECT`, `CONNECTION`, `CONNECTION_NAME`, `CONSTRAINT`, `CONSTRAINTS`, `CONSTRAINT_CATALOG`, `CONSTRAINT_NAME`, `CONSTRAINT_SCHEMA`, `CONSTRUCTOR`, `CONTAINS`, `CONTINUE`, `CONVERSION`, `CONVERT`, `COPY`, `CORRESPONTING`, `COUNT`, `CREATE`, `CREATEDB`, `CREATEUSER`, `CROSS`, `CUBE`, `CURRENT`, `CURRENT_DATE`, `CURRENT_PATH`, `CURRENT_ROLE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `CURRENT_USER`, `CURSOR`, `CURSOR_NAME`, `CYCLE`, `DATA`, `DATABASE`, `DATETIME_INTERVAL_CODE`, `DATETIME_INTERVAL_PRECISION`, `DAY`, `DEALLOCATE`, `DECLARE`, `DEFAULT`, `DEFAULTS`, `DEFERRABLE`, `DEFERRED`, `DEFINED`, `DEFINER`, `DELETE`, `DELIMITER`, `DELIMITERS`, `DEREF`, `DESC`, `DESCRIBE`, `DESCRIPTOR`, `DESTROY`, `DESTRUCTOR`, `DETERMINISTIC`, `DIAGNOSTICS`, `DICTIONARY`, `DISCONNECT`, `DISPATCH`, `DISTINCT`, `DO`, `DOMAIN`, `DROP`, `DYNAMIC`, `DYNAMIC_FUNCTION`, `DYNAMIC_FUNCTION_CODE`, `EACH`, `ELSE`, `ELSIF`, `ENCODING`, `ENCRYPTED`, `END`, `END-EXEC`, `EQUALS`, `ESCAPE`, `EVERY`, `EXCEPTION`, `EXCEPT`, `EXCLUDING`, `EXCLUSIVE`, `EXEC`, `EXECUTE`, `EXISTING`, `EXISTS`, `EXPLAIN`, `EXTERNAL`, `EXTRACT`, `FALSE`, `FETCH`, `FINAL`, `FIRST`, `FOR`, `FORCE`, `FOREIGN`, `FORTRAN`, `FORWARD`, `FOUND`, `FREE`, `FREEZE`, `FROM`, `FULL`, `FUNCTION`, `G`, `GENERAL`, `GENERATED`, `GET`, `GLOBAL`, `GO`, `GOTO`, `GRANT`, `GRANTED`, `GROUP`, `GROUPING`, `HANDLER`, `HAVING`, `HIERARCHY`, `HOLD`, `HOST`, `IDENTITY`, `IF`, `IGNORE`, `ILIKE`, `IMMEDIATE`, `IMMUTABLE`, `IMPLEMENTATION`, `IMPLICIT`, `IN`, `INCLUDING`, `INCREMENT`, `INDEX`, `INDITCATOR`, `INFIX`, `INHERITS`, `INITIALIZE`, `INITIALLY`, `INNER`, `INOUT`, `INPUT`, `INSENSITIVE`, `INSERT`, `INSTANTIABLE`, `INSTEAD`, `INTERSECT`, `INTO`, `INVOKER`, `IS`, `ISNULL`, `ISOLATION`, `ITERATE`, `JOIN`, `KEY`, `KEY_MEMBER`, `KEY_TYPE`, `LANCOMPILER`, `LANGUAGE`, `LARGE`, `LAST`, `LATERAL`, `LEADING`, `LEFT`, `LENGTH`, `LESS`, `LEVEL`, `LIKE`, `LIMIT`, `LISTEN`, `LOAD`, `LOCAL`, `LOCALTIME`, `LOCALTIMESTAMP`, `LOCATION`, `LOCATOR`, `LOCK`, `LOWER`, `MAP`, `MATCH`, `MAX`, `MAXVALUE`, `MESSAGE_LENGTH`, `MESSAGE_OCTET_LENGTH`, `MESSAGE_TEXT`, `METHOD`, `MIN`, `MINUTE`, `MINVALUE`, `MOD`, `MODE`, `MODIFIES`, `MODIFY`, `MONTH`, `MORE`, `MOVE`, `MUMPS`, `NAMES`, `NATIONAL`, `NATURAL`, `NCHAR`, `NCLOB`, `NEW`, `NEXT`, `NO`, `NOCREATEDB`, `NOCREATEUSER`, `NONE`, `NOT`, `NOTHING`, `NOTIFY`, `NOTNULL`, `NULL`, `NULLABLE`, `NULLIF`, `OBJECT`, `OCTET_LENGTH`, `OF`, `OFF`, `OFFSET`, `OIDS`, `OLD`, `ON`, `ONLY`, `OPEN`, `OPERATION`, `OPERATOR`, `OPTION`, `OPTIONS`, `OR`, `ORDER`, `ORDINALITY`, `OUT`, `OUTER`, `OUTPUT`, `OVERLAPS`, `OVERLAY`, `OVERRIDING`, `OWNER`, `PAD`, `PARAMETER`, `PARAMETERS`, `PARAMETER_MODE`, `PARAMATER_NAME`, `PARAMATER_ORDINAL_POSITION`, `PARAMETER_SPECIFIC_CATALOG`, `PARAMETER_SPECIFIC_NAME`, `PARAMATER_SPECIFIC_SCHEMA`, `PARTIAL`, `PASCAL`, `PENDANT`, `PLACING`, `PLI`, `POSITION`, `POSTFIX`, `PRECISION`, `PREFIX`, `PREORDER`, `PREPARE`, `PRESERVE`, `PRIMARY`, `PRIOR`, `PRIVILEGES`, `PROCEDURAL`, `PROCEDURE`, `PUBLIC`, `READ`, `READS`, `RECHECK`, `RECURSIVE`, `REF`, `REFERENCES`, `REFERENCING`, `REINDEX`, `RELATIVE`, `RENAME`, `REPEATABLE`, `REPLACE`, `RESET`, `RESTART`, `RESTRICT`, `RESULT`, `RETURN`, `RETURNED_LENGTH`, `RETURNED_OCTET_LENGTH`, `RETURNED_SQLSTATE`, `RETURNS`, `REVOKE`, `RIGHT`, `ROLE`, `ROLLBACK`, `ROLLUP`, `ROUTINE`, `ROUTINE_CATALOG`, `ROUTINE_NAME`, `ROUTINE_SCHEMA`, `ROW`, `ROWS`, `ROW_COUNT`, `RULE`, `SAVE_POINT`, `SCALE`, `SCHEMA`, `SCHEMA_NAME`, `SCOPE`, `SCROLL`, `SEARCH`, `SECOND`, `SECURITY`, `SELECT`, `SELF`, `SENSITIVE`, `SERIALIZABLE`, `SERVER_NAME`, `SESSION`, `SESSION_USER`, `SET`, `SETOF`, `SETS`, `SHARE`, `SHOW`, `SIMILAR`, `SIMPLE`, `SIZE`, `SOME`, `SOURCE`, `SPACE`, `SPECIFIC`, `SPECIFICTYPE`, `SPECIFIC_NAME`, `SQL`, `SQLCODE`, `SQLERROR`, `SQLEXCEPTION`, `SQLSTATE`, `SQLWARNINIG`, `STABLE`, `START`, `STATE`, `STATEMENT`, `STATIC`, `STATISTICS`, `STDIN`, `STDOUT`, `STORAGE`, `STRICT`, `STRUCTURE`, `STYPE`, `SUBCLASS_ORIGIN`, `SUBLIST`, `SUBSTRING`, `SUM`, `SYMMETRIC`, `SYSID`, `SYSTEM`, `SYSTEM_USER`, `TABLE`, `TABLE_NAME`, ` TEMP`, `TEMPLATE`, `TEMPORARY`, `TERMINATE`, `THAN`, `THEN`, `TIMESTAMP`, `TIMEZONE_HOUR`, `TIMEZONE_MINUTE`, `TO`, `TOAST`, `TRAILING`, `TRANSATION`, `TRANSACTIONS_COMMITTED`, `TRANSACTIONS_ROLLED_BACK`, `TRANSATION_ACTIVE`, `TRANSFORM`, `TRANSFORMS`, `TRANSLATE`, `TRANSLATION`, `TREAT`, `TRIGGER`, `TRIGGER_CATALOG`, `TRIGGER_NAME`, `TRIGGER_SCHEMA`, `TRIM`, `TRUE`, `TRUNCATE`, `TRUSTED`, `TYPE`, `UNCOMMITTED`, `UNDER`, `UNENCRYPTED`, `UNION`, `UNIQUE`, `UNKNOWN`, `UNLISTEN`, `UNNAMED`, `UNNEST`, `UNTIL`, `UPDATE`, `UPPER`, `USAGE`, `USER`, `USER_DEFINED_TYPE_CATALOG`, `USER_DEFINED_TYPE_NAME`, `USER_DEFINED_TYPE_SCHEMA`, `USING`, `VACUUM`, `VALID`, `VALIDATOR`, `VALUES`, `VARIABLE`, `VERBOSE`, `VERSION`, `VIEW`, `VOLATILE`, `WHEN`, `WHENEVER`, `WHERE`, `WITH`, `WITHOUT`, `WORK`, `WRITE`, `YEAR`, `ZONE`), Keyword, nil},
{Words(``, `\b`, `ARRAY`, `BIGINT`, `BINARY`, `BIT`, `BLOB`, `BOOLEAN`, `CHAR`, `CHARACTER`, `DATE`, `DEC`, `DECIMAL`, `FLOAT`, `INT`, `INTEGER`, `INTERVAL`, `NUMBER`, `NUMERIC`, `REAL`, `SERIAL`, `SMALLINT`, `VARCHAR`, `VARYING`, `INT8`, `SERIAL8`, `TEXT`), NameBuiltin, nil},
{"[+*/<>=~!@#%^&|`?-]", Operator, nil},
{`[0-9]+`, LiteralNumberInteger, nil},
{`[a-z_][\w$]*`, Name, nil},
{`[;:()\[\],.]`, Punctuation, nil},
},
"multiline-comments": {
{`/\*`, CommentMultiline, Push("multiline-comments")},
{`\*/`, CommentMultiline, Pop(1)},
{`[^/*]+`, CommentMultiline, nil},
{`[/*]`, CommentMultiline, nil},
},
"string": {
{`[^']+`, LiteralStringSingle, nil},
{`''`, LiteralStringSingle, nil},
{`'`, LiteralStringSingle, Pop(1)},
},
"double-string": {
{`[^"]+`, LiteralStringDouble, nil},
{`""`, LiteralStringDouble, nil},
{`"`, LiteralStringDouble, Pop(1)},
},
},
))