mirror of
https://github.com/alecthomas/chroma.git
synced 2025-02-13 13:28:27 +02:00
This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
219 lines
7.5 KiB
Go
219 lines
7.5 KiB
Go
package lexers_test
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/alecthomas/chroma/v2"
|
|
"github.com/alecthomas/chroma/v2/lexers"
|
|
)
|
|
|
|
const lexerBenchSource = `/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
package org.apache.kafka.server.common;
|
|
|
|
import org.apache.kafka.common.utils.Utils;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.BufferedWriter;
|
|
import java.io.File;
|
|
import java.io.FileOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.OutputStreamWriter;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.file.FileAlreadyExistsException;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.util.ArrayList;
|
|
import java.util.Collection;
|
|
import java.util.Collections;
|
|
import java.util.List;
|
|
import java.util.Optional;
|
|
|
|
/**
|
|
* This class represents a utility to capture a checkpoint in a file. It writes down to the file in the below format.
|
|
*
|
|
* ========= File beginning =========
|
|
* version: int
|
|
* entries-count: int
|
|
* entry-as-string-on-each-line
|
|
* ========= File end ===============
|
|
*
|
|
* Each entry is represented as a string on each line in the checkpoint file. {@link EntryFormatter} is used
|
|
* to convert the entry into a string and vice versa.
|
|
*
|
|
* @param <T> entry type.
|
|
*/
|
|
public class CheckpointFile<T> {
|
|
|
|
private final int version;
|
|
private final EntryFormatter<T> formatter;
|
|
private final Object lock = new Object();
|
|
private final Path absolutePath;
|
|
private final Path tempPath;
|
|
|
|
public CheckpointFile(File file,
|
|
int version,
|
|
EntryFormatter<T> formatter) throws IOException {
|
|
this.version = version;
|
|
this.formatter = formatter;
|
|
try {
|
|
// Create the file if it does not exist.
|
|
Files.createFile(file.toPath());
|
|
} catch (FileAlreadyExistsException ex) {
|
|
// Ignore if file already exists.
|
|
}
|
|
absolutePath = file.toPath().toAbsolutePath();
|
|
tempPath = Paths.get(absolutePath.toString() + ".tmp");
|
|
}
|
|
|
|
public void write(Collection<T> entries) throws IOException {
|
|
synchronized (lock) {
|
|
// write to temp file and then swap with the existing file
|
|
try (FileOutputStream fileOutputStream = new FileOutputStream(tempPath.toFile());
|
|
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8))) {
|
|
// Write the version
|
|
writer.write(Integer.toString(version));
|
|
writer.newLine();
|
|
|
|
// Write the entries count
|
|
writer.write(Integer.toString(entries.size()));
|
|
writer.newLine();
|
|
|
|
// Write each entry on a new line.
|
|
for (T entry : entries) {
|
|
writer.write(formatter.toString(entry));
|
|
writer.newLine();
|
|
}
|
|
|
|
writer.flush();
|
|
fileOutputStream.getFD().sync();
|
|
}
|
|
|
|
Utils.atomicMoveWithFallback(tempPath, absolutePath);
|
|
}
|
|
}
|
|
|
|
public List<T> read() throws IOException {
|
|
synchronized (lock) {
|
|
try (BufferedReader reader = Files.newBufferedReader(absolutePath)) {
|
|
CheckpointReadBuffer<T> checkpointBuffer = new CheckpointReadBuffer<>(absolutePath.toString(), reader, version, formatter);
|
|
return checkpointBuffer.read();
|
|
}
|
|
}
|
|
}
|
|
|
|
private static class CheckpointReadBuffer<T> {
|
|
|
|
private final String location;
|
|
private final BufferedReader reader;
|
|
private final int version;
|
|
private final EntryFormatter<T> formatter;
|
|
|
|
CheckpointReadBuffer(String location,
|
|
BufferedReader reader,
|
|
int version,
|
|
EntryFormatter<T> formatter) {
|
|
this.location = location;
|
|
this.reader = reader;
|
|
this.version = version;
|
|
this.formatter = formatter;
|
|
}
|
|
|
|
List<T> read() throws IOException {
|
|
String line = reader.readLine();
|
|
if (line == null)
|
|
return Collections.emptyList();
|
|
|
|
int readVersion = toInt(line);
|
|
if (readVersion != version) {
|
|
throw new IOException("Unrecognised version:" + readVersion + ", expected version: " + version
|
|
+ " in checkpoint file at: " + location);
|
|
}
|
|
|
|
line = reader.readLine();
|
|
if (line == null) {
|
|
return Collections.emptyList();
|
|
}
|
|
int expectedSize = toInt(line);
|
|
List<T> entries = new ArrayList<>(expectedSize);
|
|
line = reader.readLine();
|
|
while (line != null) {
|
|
Optional<T> maybeEntry = formatter.fromString(line);
|
|
if (!maybeEntry.isPresent()) {
|
|
throw buildMalformedLineException(line);
|
|
}
|
|
entries.add(maybeEntry.get());
|
|
line = reader.readLine();
|
|
}
|
|
|
|
if (entries.size() != expectedSize) {
|
|
throw new IOException("Expected [" + expectedSize + "] entries in checkpoint file ["
|
|
+ location + "], but found only [" + entries.size() + "]");
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
private int toInt(String line) throws IOException {
|
|
try {
|
|
return Integer.parseInt(line);
|
|
} catch (NumberFormatException e) {
|
|
throw buildMalformedLineException(line);
|
|
}
|
|
}
|
|
|
|
private IOException buildMalformedLineException(String line) {
|
|
return new IOException(String.format("Malformed line in checkpoint file [%s]: %s", location, line));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This is used to convert the given entry of type {@code T} into a string and vice versa.
|
|
*
|
|
* @param <T> entry type
|
|
*/
|
|
public interface EntryFormatter<T> {
|
|
|
|
/**
|
|
* @param entry entry to be converted into string.
|
|
* @return String representation of the given entry.
|
|
*/
|
|
String toString(T entry);
|
|
|
|
/**
|
|
* @param value string representation of an entry.
|
|
* @return entry converted from the given string representation if possible. {@link Optional#empty()} represents
|
|
* that the given string representation could not be converted into an entry.
|
|
*/
|
|
Optional<T> fromString(String value);
|
|
}
|
|
}
|
|
`
|
|
|
|
func Benchmark(b *testing.B) {
|
|
b.ReportAllocs()
|
|
for i := 0; i < b.N; i++ {
|
|
it, err := lexers.GlobalLexerRegistry.Get("Java").Tokenise(nil, lexerBenchSource)
|
|
assert.NoError(b, err)
|
|
for t := it(); t != chroma.EOF; t = it() {
|
|
}
|
|
}
|
|
}
|