mirror of
https://github.com/alecthomas/chroma.git
synced 2025-03-17 20:58:08 +02:00
Add pygments2chroma_xml.py
This script automatically generates a lexer in xml format from Pygments.
This commit is contained in:
parent
65c601bb6f
commit
739028eb93
191
_tools/pygments2chroma_xml.py
Normal file
191
_tools/pygments2chroma_xml.py
Normal file
@ -0,0 +1,191 @@
|
||||
import functools
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import types
|
||||
import html
|
||||
|
||||
import pystache
|
||||
from pygments import lexer as pygments_lexer
|
||||
from pygments.token import _TokenType
|
||||
|
||||
TEMPLATE = r'''
|
||||
<lexer>
|
||||
<config>
|
||||
<name>{{name}}</name>
|
||||
{{#aliases}}
|
||||
<alias>{{alias}}</alias>
|
||||
{{/aliases}}
|
||||
{{#filenames}}
|
||||
<filename>{{filename}}</filename>
|
||||
{{/filenames}}
|
||||
{{#mimetypes}}
|
||||
<mime_type>{{mimetype}}</mime_type>
|
||||
{{/mimetypes}}
|
||||
{{#re_ignorecase}}
|
||||
<case_insensitive>true</case_insensitive>
|
||||
{{/re_ignorecase}}
|
||||
{{#re_dotall}}
|
||||
<dot_all>true</dot_all>
|
||||
{{/re_dotall}}
|
||||
{{#re_not_multiline}}
|
||||
<not_multiline>true</not_multiline>
|
||||
{{/re_not_multiline}}
|
||||
</config>
|
||||
<rules>
|
||||
{{#tokens}}
|
||||
<state name="{{state}}">
|
||||
{{#rules}}
|
||||
{{{.}}}
|
||||
{{/rules}}
|
||||
</state>
|
||||
{{/tokens}}
|
||||
</rules>
|
||||
</lexer>
|
||||
'''
|
||||
|
||||
|
||||
def xml_regex(s):
|
||||
return xml_string(s)
|
||||
|
||||
def xml_string(s):
|
||||
s = html.escape(s)
|
||||
return '"' + s + '"'
|
||||
|
||||
|
||||
def to_camel_case(snake_str):
|
||||
components = snake_str.split('_')
|
||||
return ''.join(x.title() for x in components)
|
||||
|
||||
|
||||
def warning(message):
|
||||
print('warning: ' + message, file=sys.stderr)
|
||||
|
||||
|
||||
def resolve_emitter(emitter):
|
||||
if isinstance(emitter, types.FunctionType):
|
||||
if repr(emitter).startswith('<function bygroups.'):
|
||||
args = emitter.__closure__[0].cell_contents
|
||||
emitter = '<bygroups>%s</bygroups>' % ''.join(resolve_emitter(e) for e in args)
|
||||
elif repr(emitter).startswith('<function using.'):
|
||||
args = emitter.__closure__[0].cell_contents
|
||||
if isinstance(args, dict):
|
||||
state = 'root'
|
||||
if 'stack' in args:
|
||||
state = args['stack'][1]
|
||||
args.pop('stack')
|
||||
assert args == {}, args
|
||||
emitter = '<usingself state="%s"/>' % state
|
||||
elif issubclass(args, pygments_lexer.Lexer):
|
||||
name = args.__name__
|
||||
if name.endswith('Lexer'):
|
||||
name = name[:-5]
|
||||
emitter = '<using state="%s"/>' % state
|
||||
else:
|
||||
raise ValueError('only support "using" with lexer classes, not %r' % args)
|
||||
else:
|
||||
warning('unsupported emitter function %r' % emitter)
|
||||
emitter = '?? %r ??' % emitter
|
||||
elif isinstance(emitter, _TokenType):
|
||||
emitter = '<token type="%s"/>' % str(emitter).replace('.', '')[5:]
|
||||
elif emitter is None:
|
||||
return 'None'
|
||||
else:
|
||||
raise ValueError('unsupported emitter type %r' % emitter)
|
||||
assert isinstance(emitter, str)
|
||||
return emitter
|
||||
|
||||
|
||||
def process_state_action(action):
|
||||
if isinstance(action, tuple):
|
||||
return functools.reduce(lambda a, b: a + b, (process_state_action(a) for a in action))
|
||||
if action.startswith('#'):
|
||||
action = action[1:]
|
||||
if action== 'pop':
|
||||
action = '<pop depth="1"/>'
|
||||
elif action.startswith('pop:'):
|
||||
action = '<pop depth="%s"/>' % action[4:]
|
||||
elif action == 'push':
|
||||
action = '<push/>'
|
||||
elif action.startswith('push:'):
|
||||
action = '<push state="%s"/>' % action[5:]
|
||||
else:
|
||||
raise ValueError('unsupported action %r' % (action,))
|
||||
else:
|
||||
action = '<push state="%s"/>' % action
|
||||
return (action,)
|
||||
|
||||
|
||||
def translate_rules(rules):
|
||||
out = []
|
||||
for rule in rules:
|
||||
if isinstance(rule, tuple):
|
||||
regex = rule[0]
|
||||
if isinstance(regex, str):
|
||||
regex = xml_regex(regex)
|
||||
elif isinstance(regex, pygments_lexer.words):
|
||||
regex = xml_string('%s(%s)%s' % (regex.prefix,
|
||||
'|'.join(w for w in regex.words),
|
||||
regex.suffix))
|
||||
else:
|
||||
raise ValueError('expected regex string but got %r' % regex)
|
||||
emitter = resolve_emitter(rule[1])
|
||||
if len(rule) == 2:
|
||||
modifier = ''
|
||||
elif type(rule[2]) is str:
|
||||
modifier = process_state_action(rule[2])[0]
|
||||
elif isinstance(rule[2], pygments_lexer.combined):
|
||||
modifier = '<combined state="%s"/>' % '" state="'.join(rule[2])
|
||||
elif type(rule[2]) is tuple:
|
||||
modifier = '<push state="%s"/>' % '" state="'.join(rule[2])
|
||||
else:
|
||||
raise ValueError('unsupported modifier %r' % (rule[2],))
|
||||
out.append('<rule pattern={}>{}{}</rule>'.format(regex, emitter, modifier))
|
||||
elif isinstance(rule, pygments_lexer.include):
|
||||
out.append('<rule><include state="{}"/></rule>'.format(rule))
|
||||
elif isinstance(rule, pygments_lexer.default):
|
||||
process_state_action(rule.state)
|
||||
out.append('<rule>{}</rule>'.format(''.join(process_state_action(rule.state))))
|
||||
else:
|
||||
raise ValueError('unsupported rule %r' % (rule,))
|
||||
return out
|
||||
|
||||
|
||||
class TemplateView(object):
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def re_not_multiline(self):
|
||||
return not (self.regex_flags & re.MULTILINE)
|
||||
|
||||
def re_dotall(self):
|
||||
return self.regex_flags & re.DOTALL
|
||||
|
||||
def re_ignorecase(self):
|
||||
return self.regex_flags & re.IGNORECASE
|
||||
|
||||
|
||||
def main():
|
||||
package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1)
|
||||
|
||||
package = importlib.import_module(package_name)
|
||||
|
||||
lexer_cls = getattr(package, symbol_name)
|
||||
|
||||
assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer'
|
||||
|
||||
print(pystache.render(TEMPLATE, TemplateView(
|
||||
name=lexer_cls.name,
|
||||
regex_flags=lexer_cls.flags,
|
||||
aliases=[{'alias': alias} for alias in lexer_cls.aliases],
|
||||
filenames=[{'filename': filename} for filename in lexer_cls.filenames],
|
||||
mimetypes=[{'mimetype': mimetype} for mimetype in lexer_cls.mimetypes],
|
||||
tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()],
|
||||
)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user