From 290ff860b9f359e2e25d5745ea0e508355896314 Mon Sep 17 00:00:00 2001 From: thomas <18520168+yaythomas@users.noreply.github.com> Date: Sun, 6 Sep 2020 14:18:10 +0100 Subject: [PATCH] yaml: block scalar multiline improvements. --- lexers/testdata/yaml.actual | 49 ++++++++++++++ lexers/testdata/yaml.expected | 117 +++++++++++++++++++++++++++++++--- lexers/y/yaml.go | 2 +- 3 files changed, 157 insertions(+), 11 deletions(-) diff --git a/lexers/testdata/yaml.actual b/lexers/testdata/yaml.actual index ea865ab..772e43f 100644 --- a/lexers/testdata/yaml.actual +++ b/lexers/testdata/yaml.actual @@ -82,6 +82,55 @@ literal_block_with_keep_chomping: |+ The literal continues until de-dented, and the leading indentation is stripped. + +a: | + multiline literal + line 2 +b: > + multiline: folded + line 2 +c: |- + multiline # literal strip + line 2 +d: >- + multiline folded strip + line 2: test + + # not a comment + indented by 1 +e: |+ + multiline literal keep + line: 2 +# this is a comment +f: >+ + multiline folded keep one space + line 2 +g: test + +block_scalars_with_indent: + a: | + multiline literal + line 2 + b: > + multiline: folded + line 2 + c: |- + multiline # literal strip + line 2 6 leading spaces + + d: >- + multiline folded strip + line 2: test + # not a comment + e: |+ + multiline literal keep + line: 2 + # this is a comment + f: >+ + multiline folded keep + line 2 + g: test + #################### # COLLECTION TYPES # #################### diff --git a/lexers/testdata/yaml.expected b/lexers/testdata/yaml.expected index ff62f37..f054e3c 100644 --- a/lexers/testdata/yaml.expected +++ b/lexers/testdata/yaml.expected @@ -189,23 +189,116 @@ {"type":"NameTag","value":"literal_block"}, {"type":"Punctuation","value":":"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"|\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."}, - {"type":"TextWhitespace","value":"\n"}, + {"type":"Punctuation","value":"|"}, + {"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."}, + {"type":"TextWhitespace","value":" \n"}, {"type":"NameTag","value":"folded_style"}, {"type":"Punctuation","value":":"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"\u003e\n This entire block of text will be the value of 'folded_style', but this\n time, all newlines will be replaced with a single space.\n\n Blank lines, like above, are converted to a newline character.\n\n 'More-indented' lines keep their newlines, too -\n this text will appear over two lines."}, - {"type":"TextWhitespace","value":"\n"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of 'folded_style', but this\n time, all newlines will be replaced with a single space.\n\n Blank lines, like above, are converted to a newline character.\n\n 'More-indented' lines keep their newlines, too -\n this text will appear over two lines."}, + {"type":"TextWhitespace","value":" \n"}, {"type":"NameTag","value":"literal_block_with_strip_chomping"}, {"type":"Punctuation","value":":"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"|-\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the strip chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."}, - {"type":"TextWhitespace","value":"\n"}, + {"type":"Punctuation","value":"|-"}, + {"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the strip chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."}, + {"type":"TextWhitespace","value":" \n"}, {"type":"NameTag","value":"literal_block_with_keep_chomping"}, {"type":"Punctuation","value":":"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"|+\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the keep chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped."}, + {"type":"Punctuation","value":"|+"}, + {"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the keep chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped."}, + {"type":"TextWhitespace","value":" \n\n"}, + {"type":"NameTag","value":"a"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|"}, + {"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"NameTag","value":"b"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"NameTag","value":"c"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|-"}, + {"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"NameTag","value":"d"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e-"}, + {"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n\n # not a comment\n indented by 1"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"NameTag","value":"e"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|+"}, + {"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"Comment","value":"# this is a comment"}, {"type":"TextWhitespace","value":"\n"}, + {"type":"NameTag","value":"f"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e+"}, + {"type":"LiteralStringDoc","value":"\n multiline folded keep one space\n line 2"}, + {"type":"TextWhitespace","value":" \n"}, + {"type":"NameTag","value":"g"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Literal","value":"test"}, + {"type":"TextWhitespace","value":"\n\n"}, + {"type":"NameTag","value":"block_scalars_with_indent"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":"\n "}, + {"type":"NameTag","value":"a"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|"}, + {"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"}, + {"type":"TextWhitespace","value":" \n "}, + {"type":"NameTag","value":"b"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"}, + {"type":"TextWhitespace","value":" \n "}, + {"type":"NameTag","value":"c"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|-"}, + {"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2 6 leading spaces"}, + {"type":"TextWhitespace","value":" \n\n "}, + {"type":"NameTag","value":"d"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e-"}, + {"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n # not a comment"}, + {"type":"TextWhitespace","value":" \n "}, + {"type":"NameTag","value":"e"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"|+"}, + {"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"}, + {"type":"TextWhitespace","value":" \n "}, + {"type":"Comment","value":"# this is a comment"}, + {"type":"TextWhitespace","value":"\n "}, + {"type":"NameTag","value":"f"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Punctuation","value":"\u003e+"}, + {"type":"LiteralStringDoc","value":"\n multiline folded keep\n line 2"}, + {"type":"TextWhitespace","value":" \n "}, + {"type":"NameTag","value":"g"}, + {"type":"Punctuation","value":":"}, + {"type":"TextWhitespace","value":" "}, + {"type":"Literal","value":"test"}, + {"type":"TextWhitespace","value":"\n\n"}, {"type":"Comment","value":"####################"}, {"type":"TextWhitespace","value":"\n"}, {"type":"Comment","value":"# COLLECTION TYPES #"}, @@ -248,8 +341,9 @@ {"type":"TextWhitespace","value":"\n"}, {"type":"Punctuation","value":"?"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"|\n This is a key\n that has multiple lines"}, - {"type":"TextWhitespace","value":"\n"}, + {"type":"Punctuation","value":"|"}, + {"type":"LiteralStringDoc","value":"\n This is a key\n that has multiple lines"}, + {"type":"TextWhitespace","value":" \n"}, {"type":"Punctuation","value":":"}, {"type":"TextWhitespace","value":" "}, {"type":"Literal","value":"and this is its value"}, @@ -514,7 +608,10 @@ {"type":"TextWhitespace","value":" "}, {"type":"CommentPreproc","value":"!!binary"}, {"type":"TextWhitespace","value":" "}, - {"type":"LiteralStringDoc","value":"|\n R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5\n OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+\n +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC\n AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=\n\n# YAML also has a set type, which looks like this:"}, + {"type":"Punctuation","value":"|"}, + {"type":"LiteralStringDoc","value":"\n R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5\n OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+\n +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC\n AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs="}, + {"type":"TextWhitespace","value":" \n\n"}, + {"type":"Comment","value":"# YAML also has a set type, which looks like this:"}, {"type":"TextWhitespace","value":"\n"}, {"type":"NameTag","value":"set"}, {"type":"Punctuation","value":":"}, diff --git a/lexers/y/yaml.go b/lexers/y/yaml.go index 85ba226..72b7743 100644 --- a/lexers/y/yaml.go +++ b/lexers/y/yaml.go @@ -23,13 +23,13 @@ var YAML = internal.Register(MustNewLexer( {`&[^\s]+`, CommentPreproc, nil}, {`\*[^\s]+`, CommentPreproc, nil}, {`^%include\s+[^\n\r]+`, CommentPreproc, nil}, - {`[>|](?:[+-])?\s(?:^(?:[ \n]{1})+.*\n?)*$`, StringDoc, nil}, Include("key"), Include("value"), {`[?:,\[\]]`, Punctuation, nil}, {`.`, Text, nil}, }, "value": { + {`([>|](?:[+-])?)(\n(^ {1,})(?:.*\n*(?:^\3 *).*)*)`, ByGroups(Punctuation, StringDoc, Whitespace), nil}, {Words(``, `\b`, "true", "True", "TRUE", "false", "False", "FALSE", "null", "y", "Y", "yes", "Yes", "YES", "n", "N", "no", "No", "NO", "on", "On", "ON", "off", "Off", "OFF"), KeywordConstant, nil},