mirror of
https://github.com/google/comprehensive-rust.git
synced 2025-03-05 16:36:19 +02:00
Use mdbook-i18n-helpers crate (#552)
The i18n-helpers are now available as a stand-alone crate: https://crates.io/crates/mdbook-i18n-helpers. Because we cache the Rust binaries in our GitHub workflows, I bumped the cache prefix to ensure we use a clean cache. Otherwise, Cargo won’t install the new binaries in mdbook-i18n-helpers because it sees the old ones from this repository.
This commit is contained in:
parent
c299bf58a8
commit
49bf110b31
37
.github/workflows/build.yml
vendored
37
.github/workflows/build.yml
vendored
@ -86,43 +86,6 @@ jobs:
|
||||
working-directory: ${{ matrix.directory }}
|
||||
run: cargo build
|
||||
|
||||
i18n-helpers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Rust cache
|
||||
uses: ./.github/workflows/setup-rust-cache
|
||||
|
||||
- name: Install Gettext
|
||||
run: sudo apt install gettext
|
||||
|
||||
- name: Install mdbook
|
||||
uses: ./.github/workflows/install-mdbook
|
||||
|
||||
- name: Generate po/messages.pot
|
||||
run: mdbook build -d po
|
||||
env:
|
||||
MDBOOK_OUTPUT: '{"xgettext": {"pot-file": "messages.pot"}}'
|
||||
|
||||
- name: Test messages.pot
|
||||
run: msgfmt --statistics -o /dev/null po/messages.pot
|
||||
|
||||
- name: Expand includes without translation
|
||||
run: mdbook build -d expanded
|
||||
env:
|
||||
MDBOOK_OUTPUT: '{"markdown": {}}'
|
||||
|
||||
- name: Expand includes with no-op translation
|
||||
run: mdbook build -d no-op
|
||||
env:
|
||||
MDBOOK_OUTPUT: '{"markdown": {}}'
|
||||
MDBOOK_PREPROCESSOR__GETTEXT__PO_FILE: po/messages.pot
|
||||
|
||||
- name: Compare no translation to no-op translation
|
||||
run: diff --color=always --unified --recursive expanded no-op
|
||||
|
||||
find-translations:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
|
6
.github/workflows/install-mdbook/action.yml
vendored
6
.github/workflows/install-mdbook/action.yml
vendored
@ -1,6 +1,6 @@
|
||||
name: Install mdbook and dependencies
|
||||
|
||||
description: Install the mdbook with the dependencies we need.
|
||||
description: Install mdbook with the dependencies we need.
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
@ -15,6 +15,6 @@ runs:
|
||||
run: cargo install mdbook-svgbob --locked --version 0.2.1
|
||||
shell: bash
|
||||
|
||||
- name: Install i18n-helpers
|
||||
run: cargo install --path i18n-helpers --locked
|
||||
- name: Install mdbook-i18n-helpers
|
||||
run: cargo install mdbook-i18n-helpers --locked --version 0.1.0
|
||||
shell: bash
|
||||
|
@ -7,3 +7,5 @@ runs:
|
||||
steps:
|
||||
- name: Setup Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
prefix-key: v1 # Remove after merging #1322.
|
||||
|
1079
Cargo.lock
generated
1079
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,5 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"i18n-helpers",
|
||||
"src/exercises",
|
||||
"src/bare-metal/useful-crates/allocator-example",
|
||||
"src/bare-metal/useful-crates/zerocopy-example",
|
||||
|
10
README.md
10
README.md
@ -27,16 +27,16 @@ trigger when going through the code samples. We hope to improve on this via
|
||||
## Building
|
||||
|
||||
The course is built using a few tools:
|
||||
- [mdBook](https://github.com/rust-lang/mdBook)
|
||||
- [Svgbob plugin](https://github.com/boozook/mdbook-svgbob)
|
||||
- [i18n-helpers](TRANSLATIONS.md#i18n-helpers)
|
||||
- [mdbook](https://github.com/rust-lang/mdBook)
|
||||
- [mdbook-svgbob](https://github.com/boozook/mdbook-svgbob)
|
||||
- [mdbook-i18n-helpers](TRANSLATIONS.md#i18n-helpers)
|
||||
|
||||
Install these tools with
|
||||
Install these tools with:
|
||||
|
||||
```shell
|
||||
$ cargo install mdbook
|
||||
$ cargo install mdbook-svgbob
|
||||
$ cargo install --path i18n-helpers
|
||||
$ cargo install mdbook-i18n-helpers
|
||||
```
|
||||
|
||||
Then run
|
||||
|
@ -30,10 +30,10 @@ We use two helpers for the translations:
|
||||
* `mdbook-gettext`: This program translates the book into a target language. It
|
||||
is an mdbook preprocessor.
|
||||
|
||||
Install both helpers with the following command from the root of the course:
|
||||
Install both helpers with:
|
||||
|
||||
```shell
|
||||
$ cargo install --path i18n-helpers
|
||||
$ cargo install mdbook-i18n-helpers
|
||||
```
|
||||
|
||||
## Creating and Updating Translations
|
||||
|
@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "i18n-helpers"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.68"
|
||||
mdbook = "0.4.25"
|
||||
polib = "0.2.0"
|
||||
pulldown-cmark = { version = "0.9.2", default-features = false }
|
||||
semver = "1.0.16"
|
||||
serde_json = "1.0.91"
|
||||
toml = "0.5.1"
|
@ -1,251 +0,0 @@
|
||||
// Copyright 2023 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! `gettext` for `mdbook`
|
||||
//!
|
||||
//! This program works like `gettext`, meaning it will translate
|
||||
//! strings in your book.
|
||||
//!
|
||||
//! The translations come from GNU Gettext `xx.po` files. The PO file is
|
||||
//! is found under `po` directory based on the `book.language`.
|
||||
//! For example, `book.langauge` is set to `ko`, then `po/ko.po` is used.
|
||||
//! You can set `preprocessor.gettext.po-dir` to specify where to find PO
|
||||
//! files. If the PO file is not found, you'll get the untranslated book.
|
||||
//!
|
||||
//! See `TRANSLATIONS.md` in the repository root for more information.
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use i18n_helpers::extract_msgs;
|
||||
use mdbook::book::Book;
|
||||
use mdbook::preprocess::{CmdPreprocessor, PreprocessorContext};
|
||||
use mdbook::BookItem;
|
||||
use polib::catalog::Catalog;
|
||||
use polib::po_file;
|
||||
use semver::{Version, VersionReq};
|
||||
use std::{io, process};
|
||||
use toml::Value;
|
||||
|
||||
fn translate(text: &str, catalog: &Catalog) -> String {
|
||||
let mut consumed = 0; // bytes of text consumed so far
|
||||
let mut output = String::with_capacity(text.len());
|
||||
|
||||
for msg in extract_msgs(text) {
|
||||
let span = msg.span();
|
||||
|
||||
// Copy over any bytes of text that precede this message.
|
||||
if consumed < span.start {
|
||||
output.push_str(&text[consumed..span.start]);
|
||||
}
|
||||
|
||||
// Insert the translated text
|
||||
let msg_text = msg.text(text);
|
||||
let translated = catalog
|
||||
.find_message(None, msg_text, None)
|
||||
.filter(|msg| !msg.flags().is_fuzzy())
|
||||
.and_then(|msg| msg.msgstr().ok())
|
||||
.filter(|msgstr| !msgstr.is_empty())
|
||||
.unwrap_or(msg_text);
|
||||
output.push_str(translated);
|
||||
consumed = span.end;
|
||||
}
|
||||
|
||||
// Handle any text left over after the last message.
|
||||
let suffix = &text[consumed..];
|
||||
output.push_str(suffix);
|
||||
output
|
||||
}
|
||||
|
||||
fn translate_book(ctx: &PreprocessorContext, mut book: Book) -> anyhow::Result<Book> {
|
||||
// no-op when the target language is not set
|
||||
if ctx.config.book.language.is_none() {
|
||||
return Ok(book);
|
||||
}
|
||||
|
||||
// the target language
|
||||
let language = ctx.config.book.language.as_ref().unwrap();
|
||||
|
||||
// Find PO file for the target language
|
||||
let cfg = ctx
|
||||
.config
|
||||
.get_preprocessor("gettext")
|
||||
.ok_or_else(|| anyhow!("Could not read preprocessor.gettext configuration"))?;
|
||||
let po_dir = cfg.get("po-dir").and_then(Value::as_str).unwrap_or("po");
|
||||
let path = ctx.root.join(po_dir).join(format!("{language}.po"));
|
||||
|
||||
// no-op when PO file is missing
|
||||
if !path.exists() {
|
||||
return Ok(book);
|
||||
}
|
||||
|
||||
let catalog = po_file::parse(&path)
|
||||
.map_err(|err| anyhow!("{err}"))
|
||||
.with_context(|| format!("Could not parse {:?} as PO file", path))?;
|
||||
book.for_each_mut(|item| match item {
|
||||
BookItem::Chapter(ch) => {
|
||||
ch.content = translate(&ch.content, &catalog);
|
||||
ch.name = translate(&ch.name, &catalog);
|
||||
}
|
||||
BookItem::Separator => {}
|
||||
BookItem::PartTitle(title) => {
|
||||
*title = translate(title, &catalog);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(book)
|
||||
}
|
||||
|
||||
fn preprocess() -> anyhow::Result<()> {
|
||||
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?;
|
||||
let book_version = Version::parse(&ctx.mdbook_version)?;
|
||||
let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?;
|
||||
if !version_req.matches(&book_version) {
|
||||
eprintln!(
|
||||
"Warning: The gettext preprocessor was built against \
|
||||
mdbook version {}, but we're being called from version {}",
|
||||
mdbook::MDBOOK_VERSION,
|
||||
ctx.mdbook_version
|
||||
);
|
||||
}
|
||||
|
||||
let translated_book = translate_book(&ctx, book)?;
|
||||
serde_json::to_writer(io::stdout(), &translated_book)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
if std::env::args().len() == 3 {
|
||||
assert_eq!(std::env::args().nth(1).as_deref(), Some("supports"));
|
||||
if let Some("xgettext") = std::env::args().nth(2).as_deref() {
|
||||
process::exit(1)
|
||||
} else {
|
||||
// Signal that we support all other renderers.
|
||||
process::exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
preprocess()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use polib::message::Message;
|
||||
use polib::metadata::CatalogMetadata;
|
||||
|
||||
fn create_catalog(translations: &[(&str, &str)]) -> Catalog {
|
||||
let mut catalog = Catalog::new(CatalogMetadata::new());
|
||||
for (msgid, msgstr) in translations {
|
||||
let message = Message::build_singular()
|
||||
.with_msgid(String::from(*msgid))
|
||||
.with_msgstr(String::from(*msgstr))
|
||||
.done();
|
||||
catalog.append_or_update(message);
|
||||
}
|
||||
catalog
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_single_line() {
|
||||
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
|
||||
assert_eq!(translate("foo bar", &catalog), "FOO BAR");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_single_paragraph() {
|
||||
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
|
||||
assert_eq!(translate("foo bar\n", &catalog), "FOO BAR\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_paragraph_with_leading_newlines() {
|
||||
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
|
||||
assert_eq!(translate("\n\n\nfoo bar\n", &catalog), "\n\n\nFOO BAR\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_paragraph_with_trailing_newlines() {
|
||||
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
|
||||
assert_eq!(translate("foo bar\n\n\n", &catalog), "FOO BAR\n\n\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_multiple_paragraphs() {
|
||||
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
|
||||
assert_eq!(
|
||||
translate(
|
||||
"first paragraph\n\
|
||||
\n\
|
||||
foo bar\n\
|
||||
\n\
|
||||
last paragraph\n",
|
||||
&catalog
|
||||
),
|
||||
"first paragraph\n\
|
||||
\n\
|
||||
FOO BAR\n\
|
||||
\n\
|
||||
last paragraph\n"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translate_multiple_paragraphs_extra_newlines() {
|
||||
// Notice how the translated paragraphs have more lines.
|
||||
let catalog = create_catalog(&[
|
||||
(
|
||||
"first\n\
|
||||
paragraph",
|
||||
"FIRST\n\
|
||||
TRANSLATED\n\
|
||||
PARAGRAPH",
|
||||
),
|
||||
(
|
||||
"last\n\
|
||||
paragraph",
|
||||
"LAST\n\
|
||||
TRANSLATED\n\
|
||||
PARAGRAPH",
|
||||
),
|
||||
]);
|
||||
// Paragraph separation is kept intact while translating.
|
||||
assert_eq!(
|
||||
translate(
|
||||
"\n\
|
||||
first\n\
|
||||
paragraph\n\
|
||||
\n\
|
||||
\n\
|
||||
\n\
|
||||
last\n\
|
||||
paragraph\n\
|
||||
\n\
|
||||
\n",
|
||||
&catalog
|
||||
),
|
||||
"\n\
|
||||
FIRST\n\
|
||||
TRANSLATED\n\
|
||||
PARAGRAPH\n\
|
||||
\n\
|
||||
\n\
|
||||
\n\
|
||||
LAST\n\
|
||||
TRANSLATED\n\
|
||||
PARAGRAPH\n\
|
||||
\n\
|
||||
\n"
|
||||
);
|
||||
}
|
||||
}
|
@ -1,120 +0,0 @@
|
||||
// Copyright 2023 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! `xgettext` for `mdbook`
|
||||
//!
|
||||
//! This program works like `xgettext`, meaning it will extract
|
||||
//! translatable strings from your book. The strings are saved in a
|
||||
//! GNU Gettext `messages.pot` file in your build directory (typically
|
||||
//! `po/messages.pot`).
|
||||
//!
|
||||
//! See `TRANSLATIONS.md` in the repository root for more information.
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use mdbook::renderer::RenderContext;
|
||||
use mdbook::BookItem;
|
||||
use polib::catalog::Catalog;
|
||||
use polib::message::Message;
|
||||
use polib::metadata::CatalogMetadata;
|
||||
use std::{fs, io};
|
||||
|
||||
fn add_message(catalog: &mut Catalog, msgid: &str, source: &str) {
|
||||
let sources = match catalog.find_message(None, msgid, None) {
|
||||
Some(msg) => format!("{}\n{}", msg.source(), source),
|
||||
None => String::from(source),
|
||||
};
|
||||
let message = Message::build_singular()
|
||||
.with_source(sources)
|
||||
.with_msgid(String::from(msgid))
|
||||
.done();
|
||||
catalog.append_or_update(message);
|
||||
}
|
||||
|
||||
fn create_catalog(ctx: &RenderContext) -> anyhow::Result<Catalog> {
|
||||
let mut metadata = CatalogMetadata::new();
|
||||
if let Some(title) = &ctx.config.book.title {
|
||||
metadata.project_id_version = String::from(title);
|
||||
}
|
||||
if let Some(lang) = &ctx.config.book.language {
|
||||
metadata.language = String::from(lang);
|
||||
}
|
||||
metadata.mime_version = String::from("1.0");
|
||||
metadata.content_type = String::from("text/plain; charset=UTF-8");
|
||||
metadata.content_transfer_encoding = String::from("8bit");
|
||||
let mut catalog = Catalog::new(metadata);
|
||||
|
||||
// First, add all chapter names and part titles from SUMMARY.md.
|
||||
// The book items are in order of the summary, so we can assign
|
||||
// correct line numbers for duplicate lines by tracking the index
|
||||
// of our last search.
|
||||
let summary_path = ctx.config.book.src.join("SUMMARY.md");
|
||||
let summary = std::fs::read_to_string(ctx.root.join(&summary_path))?;
|
||||
let mut last_idx = 0;
|
||||
for item in ctx.book.iter() {
|
||||
let line = match item {
|
||||
BookItem::Chapter(chapter) => &chapter.name,
|
||||
BookItem::PartTitle(title) => title,
|
||||
BookItem::Separator => continue,
|
||||
};
|
||||
|
||||
let idx = summary[last_idx..].find(line).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Could not find {line:?} in SUMMARY.md after line {} -- \
|
||||
please remove any formatting from SUMMARY.md",
|
||||
summary[..last_idx].lines().count()
|
||||
)
|
||||
})?;
|
||||
last_idx += idx;
|
||||
let lineno = summary[..last_idx].lines().count();
|
||||
let source = format!("{}:{}", summary_path.display(), lineno);
|
||||
add_message(&mut catalog, line, &source);
|
||||
}
|
||||
|
||||
// Next, we add the chapter contents.
|
||||
for item in ctx.book.iter() {
|
||||
if let BookItem::Chapter(chapter) = item {
|
||||
let path = match &chapter.path {
|
||||
Some(path) => ctx.config.book.src.join(path),
|
||||
None => continue,
|
||||
};
|
||||
for msg in i18n_helpers::extract_msgs(&chapter.content) {
|
||||
let source = format!("{}:{}", path.display(), msg.line_number());
|
||||
add_message(&mut catalog, msg.text(&chapter.content), &source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(catalog)
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let ctx = RenderContext::from_json(&mut io::stdin()).context("Parsing stdin")?;
|
||||
let cfg = ctx
|
||||
.config
|
||||
.get_renderer("xgettext")
|
||||
.ok_or_else(|| anyhow!("Could not read output.xgettext configuration"))?;
|
||||
let path = cfg
|
||||
.get("pot-file")
|
||||
.ok_or_else(|| anyhow!("Missing output.xgettext.pot-file config value"))?
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow!("Expected a string for output.xgettext.pot-file"))?;
|
||||
fs::create_dir_all(&ctx.destination)
|
||||
.with_context(|| format!("Could not create {}", ctx.destination.display()))?;
|
||||
let output_path = ctx.destination.join(path);
|
||||
let catalog = create_catalog(&ctx).context("Extracting messages")?;
|
||||
polib::po_file::write(&catalog, &output_path)
|
||||
.with_context(|| format!("Writing messages to {}", output_path.display()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,497 +0,0 @@
|
||||
// Copyright 2023 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use mdbook::utils::new_cmark_parser;
|
||||
use pulldown_cmark::{Event, Tag};
|
||||
use std::ops::Range;
|
||||
|
||||
/// A translatable message.
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Message {
|
||||
/// Line number where this message begins.
|
||||
line: usize,
|
||||
|
||||
/// Span of the input text containing this message.
|
||||
span: Range<usize>,
|
||||
}
|
||||
|
||||
impl Message {
|
||||
fn new(line: usize, span: Range<usize>) -> Self {
|
||||
Self { line, span }
|
||||
}
|
||||
|
||||
/// Get the text of this message, as a slice of the document from which it was generated.
|
||||
pub fn text<'doc>(&self, document: &'doc str) -> &'doc str {
|
||||
&document[self.span.clone()]
|
||||
}
|
||||
|
||||
/// Get the line number at which this message begins.
|
||||
pub fn line_number(&self) -> usize {
|
||||
self.line
|
||||
}
|
||||
|
||||
/// Get the span of the source document from which this message is drawn.
|
||||
pub fn span(&self) -> Range<usize> {
|
||||
self.span.clone()
|
||||
}
|
||||
|
||||
/// Extend this message to the given offset.
|
||||
fn extend(&mut self, to_end: usize) {
|
||||
self.span.end = to_end;
|
||||
}
|
||||
|
||||
/// Trim trailing newlines from this message.
|
||||
fn trim_right(&mut self, document: &str) {
|
||||
let trimmed_len = document[self.span.clone()].trim_end_matches('\n').len();
|
||||
self.span.end = self.span.start + trimmed_len;
|
||||
}
|
||||
}
|
||||
|
||||
/// Accumulator for translatable messages based on input from the Markdown parser.
|
||||
struct MsgAccumulator<'a> {
|
||||
/// The input document.
|
||||
document: &'a str,
|
||||
|
||||
/// Offsets of each newline in the input, used to calculate line numbers from byte offsets.
|
||||
offsets: Vec<usize>,
|
||||
|
||||
/// The resulting messages, as ranges of the input document.
|
||||
msgs: Vec<Message>,
|
||||
|
||||
/// Current nesting depth of Start/End events.
|
||||
depth: usize,
|
||||
|
||||
/// If set, skip until the nesting depth returns to this level.
|
||||
skip_until_depth: Option<usize>,
|
||||
|
||||
/// True if the last message can still be appended to. If this is true then
|
||||
/// msgs has at least one element.
|
||||
message_open: bool,
|
||||
}
|
||||
|
||||
impl<'a> MsgAccumulator<'a> {
|
||||
fn new(document: &'a str) -> Self {
|
||||
Self {
|
||||
document: document,
|
||||
offsets: document
|
||||
.match_indices("\n")
|
||||
.map(|(offset, _)| offset)
|
||||
.collect(),
|
||||
msgs: vec![],
|
||||
depth: 0,
|
||||
skip_until_depth: None,
|
||||
message_open: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark the current message as finished.
|
||||
fn finish_message(&mut self) {
|
||||
self.message_open = false;
|
||||
}
|
||||
|
||||
/// Add a new text message, or extend an existing one.
|
||||
fn push_message(&mut self, span: Range<usize>) {
|
||||
// try to combine with an existing message.
|
||||
if self.message_open {
|
||||
if let Some(last) = self.msgs.last_mut() {
|
||||
last.extend(span.end);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.msgs
|
||||
.push(Message::new(self.line_number(span.start), span));
|
||||
self.message_open = true;
|
||||
}
|
||||
|
||||
/// Calculate the line number for the given offset.
|
||||
fn line_number(&self, offset: usize) -> usize {
|
||||
self.offsets.partition_point(|&o| o < offset) + 1
|
||||
}
|
||||
|
||||
/// Push a new Markdown event into the accumulator.
|
||||
fn push_event(&mut self, evt: Event<'a>, span: Range<usize>) {
|
||||
#[cfg(test)]
|
||||
println!("{evt:?} -- {:?}", &self.document[span.start..span.end]);
|
||||
|
||||
// Track the nesting depth.
|
||||
match evt {
|
||||
Event::Start(_) => self.depth += 1,
|
||||
Event::End(_) => self.depth -= 1,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Handle skip_until_depth, including skipping the End event that
|
||||
// returned to the desired level.
|
||||
if let Some(depth) = self.skip_until_depth {
|
||||
if self.depth <= depth {
|
||||
self.skip_until_depth = None;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
match evt {
|
||||
// Consider "inline" tags to be just part of the text.
|
||||
Event::Start(
|
||||
Tag::Emphasis | Tag::Strong | Tag::Strikethrough | Tag::Link(..),
|
||||
) => self.push_message(span),
|
||||
Event::End(
|
||||
Tag::Emphasis | Tag::Strong | Tag::Strikethrough | Tag::Link(..),
|
||||
) => self.push_message(span),
|
||||
|
||||
// We want to translate everything: text, code (from backticks, `..`), or HTML.
|
||||
Event::Text(_) | Event::Code(_) | Event::Html(_) => self.push_message(span),
|
||||
|
||||
// For many event types we just take the entire text from Start to End, which is
|
||||
// already encompassed in the event span.
|
||||
Event::Start(
|
||||
Tag::CodeBlock(_)
|
||||
| Tag::Heading(..)
|
||||
| Tag::List(..)
|
||||
| Tag::BlockQuote
|
||||
| Tag::Table(..),
|
||||
) => {
|
||||
self.finish_message();
|
||||
self.push_message(span);
|
||||
self.finish_message();
|
||||
// Skip until we get to a nesting depth outside of this Start event.
|
||||
self.skip_until_depth = Some(self.depth - 1);
|
||||
}
|
||||
|
||||
// For any other Start or End events, finish the current message but do
|
||||
// not begin a new one.
|
||||
Event::Start(_) | Event::End(_) => self.finish_message(),
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the resulting list of messages.
|
||||
fn into_msgs(mut self) -> Vec<Message> {
|
||||
let parser = new_cmark_parser(self.document, false);
|
||||
for (evt, span) in parser.into_offset_iter() {
|
||||
self.push_event(evt, span);
|
||||
}
|
||||
for msg in &mut self.msgs {
|
||||
msg.trim_right(self.document);
|
||||
}
|
||||
self.msgs
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract translatable messages from the markdown text.
|
||||
///
|
||||
/// Returns a vector of (line number, text), where line numbers begin at 1.
|
||||
pub fn extract_msgs(document: &str) -> Vec<Message> {
|
||||
MsgAccumulator::new(document).into_msgs()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn offset_to_line_empty() {
|
||||
assert_eq!(MsgAccumulator::new("").line_number(0), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_to_line_multiline() {
|
||||
let input = "abc\ndef\nghi";
|
||||
let acc = MsgAccumulator::new(input);
|
||||
let line_nums: Vec<_> = input
|
||||
.chars()
|
||||
.enumerate()
|
||||
.map(|(idx, ch)| (acc.line_number(idx), ch))
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
line_nums,
|
||||
vec![
|
||||
(1, 'a'),
|
||||
(1, 'b'),
|
||||
(1, 'c'),
|
||||
(1, '\n'),
|
||||
(2, 'd'),
|
||||
(2, 'e'),
|
||||
(2, 'f'),
|
||||
(2, '\n'),
|
||||
(3, 'g'),
|
||||
(3, 'h'),
|
||||
(3, 'i'),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn msg_to_tuple<'doc>(msg: &Message, document: &'doc str) -> (usize, &'doc str) {
|
||||
(msg.line_number(), msg.text(document))
|
||||
}
|
||||
|
||||
macro_rules! assert_extract_msgs {
|
||||
($document:expr, $exp:expr) => {{
|
||||
let document = $document;
|
||||
assert_eq!(
|
||||
extract_msgs(document)
|
||||
.iter()
|
||||
.map(|m| msg_to_tuple(m, document))
|
||||
.collect::<Vec<_>>(),
|
||||
$exp
|
||||
)
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_empty() {
|
||||
assert_extract_msgs!("", vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_single_line() {
|
||||
assert_extract_msgs!("This is a paragraph.", vec![(1, "This is a paragraph.")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_simple() {
|
||||
assert_extract_msgs!(
|
||||
"This is\n\
|
||||
the first\n\
|
||||
paragraph.🦀\n\
|
||||
\n\
|
||||
Second paragraph.",
|
||||
vec![
|
||||
(1, "This is\nthe first\nparagraph.🦀"),
|
||||
(5, "Second paragraph.")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_leading_newlines() {
|
||||
assert_extract_msgs!(
|
||||
"\n\
|
||||
\n\
|
||||
\n\
|
||||
This is the\n\
|
||||
first paragraph.",
|
||||
vec![(4, "This is the\nfirst paragraph.")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_trailing_newlines() {
|
||||
assert_extract_msgs!(
|
||||
"This is\n\
|
||||
a paragraph.\n\
|
||||
\n\
|
||||
\n",
|
||||
vec![(1, "This is\na paragraph.")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_styled_text() {
|
||||
assert_extract_msgs!(
|
||||
"**This** ~~message~~ _has_ `code` *style*\n",
|
||||
vec![(1, "**This** ~~message~~ _has_ `code` *style*")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_inline_html() {
|
||||
assert_extract_msgs!(
|
||||
"Hi <script>alert('there');</script>",
|
||||
vec![(1, "Hi <script>alert('there');</script>")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_links() {
|
||||
assert_extract_msgs!(
|
||||
"See [this page](https://example.com) for more info.",
|
||||
vec for more info.")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_links_footer() {
|
||||
assert_extract_msgs!(
|
||||
r#"
|
||||
* [Brazilian Portuguese][pt-BR] and
|
||||
* [Korean][ko]
|
||||
|
||||
[pt-BR]: https://google.github.io/comprehensive-rust/pt-BR/
|
||||
[ko]: https://google.github.io/comprehensive-rust/ko/
|
||||
"#,
|
||||
// The parser does not include the referenced links in the events it produces. This is
|
||||
// probably OK: links would not have been translated, anyway.
|
||||
vec![(2, "* [Brazilian Portuguese][pt-BR] and\n* [Korean][ko]"),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_block_quote() {
|
||||
assert_extract_msgs!(
|
||||
r#"One of my favorite quotes is:
|
||||
|
||||
> Don't believe everything you read on the Internet.
|
||||
>
|
||||
> I didn't say this second part, but I needed a paragraph for testing.
|
||||
|
||||
--Abraham Lincoln
|
||||
"#,
|
||||
vec![
|
||||
(1, "One of my favorite quotes is:"),
|
||||
(3, "> Don't believe everything you read on the Internet.\n>\n> I didn't say this second part, but I needed a paragraph for testing."),
|
||||
(7, "--Abraham Lincoln"),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_table() {
|
||||
let table = r#"| Module Type | Description
|
||||
|-------------------|------------------------------------------------------------------------
|
||||
| `rust_binary` | Produces a Rust binary.
|
||||
| `rust_library` | Produces a Rust library, and provides both `rlib` and `dylib` variants."#;
|
||||
let input = format!("Hey, a table\n\n{table}\n\nFooter.\n");
|
||||
// tables are included as part of the text.
|
||||
assert_extract_msgs!(
|
||||
&input,
|
||||
vec![(1, "Hey, a table"), (3, table), (8, "Footer."),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_code_block() {
|
||||
assert_extract_msgs!("Preamble\n```rust\nfn hello() {\n some_code()\n\n todo!()\n}\n```\nPostamble",
|
||||
vec![
|
||||
(1, "Preamble"),
|
||||
(2, "```rust\nfn hello() {\n some_code()\n\n todo!()\n}\n```"),
|
||||
(9, "Postamble")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_details() {
|
||||
// This isn't great, because the parser treats any data following a tag as also HTML,
|
||||
// but works well enough when `<details>` has blank lines before and after.
|
||||
assert_extract_msgs!(
|
||||
"Preamble\n<details>\nSome Details\n</details>\n\nPostamble",
|
||||
vec![
|
||||
(1, "Preamble"),
|
||||
(2, "<details>\nSome Details\n</details>"),
|
||||
(6, "Postamble")
|
||||
]
|
||||
);
|
||||
assert_extract_msgs!(
|
||||
"Preamble\n\n<details>\n\nSome Details\n\n</details>\n\nPostamble",
|
||||
vec![
|
||||
(1, "Preamble"),
|
||||
(3, "<details>"),
|
||||
(5, "Some Details"),
|
||||
(7, "</details>"),
|
||||
(9, "Postamble")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_list() {
|
||||
assert_extract_msgs!(
|
||||
"Some text\n * List item 1🦀\n * List item 2\n\nMore text",
|
||||
vec![
|
||||
(1, "Some text"),
|
||||
(2, " * List item 1🦀\n * List item 2"),
|
||||
(5, "More text")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_multilevel_list() {
|
||||
assert_extract_msgs!("Some text\n * List item 1\n * List item 2\n * Sublist 1\n * Sublist 2\n\nMore text",
|
||||
vec![
|
||||
(1, "Some text"),
|
||||
(2, " * List item 1\n * List item 2\n * Sublist 1\n * Sublist 2"),
|
||||
(7, "More text")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_list_with_paras() {
|
||||
assert_extract_msgs!(
|
||||
r#"* Item 1.
|
||||
* Item 2,
|
||||
two lines.
|
||||
|
||||
* Sub 1.
|
||||
* Sub 2.
|
||||
|
||||
More paragraph.
|
||||
|
||||
Top level.
|
||||
"#,
|
||||
vec![
|
||||
(1, "* Item 1.\n* Item 2,\n two lines.\n\n * Sub 1.\n * Sub 2.\n\n More paragraph."),
|
||||
(10, "Top level."),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_headings() {
|
||||
assert_extract_msgs!(
|
||||
r#"Some text
|
||||
# Headline News🦀
|
||||
|
||||
* A
|
||||
* List
|
||||
|
||||
## Subheading
|
||||
"#,
|
||||
vec![
|
||||
(1, "Some text"),
|
||||
(2, "# Headline News🦀"),
|
||||
(4, "* A\n* List"),
|
||||
(7, "## Subheading")
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_msgs_code_followed_by_details() {
|
||||
// This is a regression test for an error that would incorrectly combine
|
||||
// CodeBlock and HTML.
|
||||
assert_extract_msgs!(
|
||||
r#"```bob
|
||||
BOB
|
||||
```
|
||||
|
||||
<details>
|
||||
|
||||
* Blah blah
|
||||
|
||||
</details>
|
||||
"#,
|
||||
vec![
|
||||
(1, "```bob\nBOB\n```"),
|
||||
(5, "<details>"),
|
||||
(7, "* Blah blah"),
|
||||
(9, "</details>"),
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user