mirror of
https://github.com/alecthomas/chroma.git
synced 2025-03-17 20:58:08 +02:00
Add CSV lexer (#1005)
This commit is contained in:
parent
5e7b53e590
commit
e76e1e2233
53
lexers/embedded/csv.xml
Normal file
53
lexers/embedded/csv.xml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
<!--
|
||||||
|
Lexer for RFC-4180 compliant CSV subject to the following additions:
|
||||||
|
- UTF-8 encoding is accepted (the RFC requires 7-bit ASCII)
|
||||||
|
- The line terminator character can be LF or CRLF (the RFC allows CRLF only)
|
||||||
|
|
||||||
|
Link to the RFC-4180 specification: https://tools.ietf.org/html/rfc4180
|
||||||
|
|
||||||
|
Additions inspired by:
|
||||||
|
https://github.com/frictionlessdata/datapackage/issues/204#issuecomment-193242077
|
||||||
|
|
||||||
|
Future improvements:
|
||||||
|
- Identify non-quoted numbers as LiteralNumber
|
||||||
|
- Identify y as an error in "x"y. Currently it's identified as another string
|
||||||
|
literal.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>CSV</name>
|
||||||
|
<alias>csv</alias>
|
||||||
|
<filename>*.csv</filename>
|
||||||
|
<mime_type>text/csv</mime_type>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="\r?\n">
|
||||||
|
<token type="Punctuation" />
|
||||||
|
</rule>
|
||||||
|
<rule pattern=",">
|
||||||
|
<token type="Punctuation" />
|
||||||
|
</rule>
|
||||||
|
<rule pattern=""">
|
||||||
|
<token type="LiteralStringDouble" />
|
||||||
|
<push state="escaped" />
|
||||||
|
</rule>
|
||||||
|
<rule pattern="[^\r\n,]+">
|
||||||
|
<token type="LiteralString" />
|
||||||
|
</rule>
|
||||||
|
</state>
|
||||||
|
<state name="escaped">
|
||||||
|
<rule pattern="""">
|
||||||
|
<token type="LiteralStringEscape"/>
|
||||||
|
</rule>
|
||||||
|
<rule pattern=""">
|
||||||
|
<token type="LiteralStringDouble" />
|
||||||
|
<pop depth="1"/>
|
||||||
|
</rule>
|
||||||
|
<rule pattern="[^"]+">
|
||||||
|
<token type="LiteralStringDouble" />
|
||||||
|
</rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
2
lexers/testdata/csv.actual
vendored
Normal file
2
lexers/testdata/csv.actual
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
hello,"newline:
|
||||||
|
quote: "" comma: ,",1,"",""stuff,,1.1
|
17
lexers/testdata/csv.expected
vendored
Normal file
17
lexers/testdata/csv.expected
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[
|
||||||
|
{"type":"LiteralString","value":"hello"},
|
||||||
|
{"type":"Punctuation","value":","},
|
||||||
|
{"type":"LiteralStringDouble","value":"\"newline:\nquote: "},
|
||||||
|
{"type":"LiteralStringEscape","value":"\"\""},
|
||||||
|
{"type":"LiteralStringDouble","value":" comma: ,\""},
|
||||||
|
{"type":"Punctuation","value":","},
|
||||||
|
{"type":"LiteralString","value":"1"},
|
||||||
|
{"type":"Punctuation","value":","},
|
||||||
|
{"type":"LiteralStringDouble","value":"\"\""},
|
||||||
|
{"type":"Punctuation","value":","},
|
||||||
|
{"type":"LiteralStringDouble","value":"\"\""},
|
||||||
|
{"type":"LiteralString","value":"stuff"},
|
||||||
|
{"type":"Punctuation","value":",,"},
|
||||||
|
{"type":"LiteralString","value":"1.1"},
|
||||||
|
{"type":"Punctuation","value":"\n"}
|
||||||
|
]
|
Loading…
x
Reference in New Issue
Block a user