1
0
mirror of https://github.com/google/comprehensive-rust.git synced 2025-06-08 02:26:14 +02:00

212 lines
6.0 KiB
Rust
Raw Normal View History

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ANCHOR: solution
use thiserror::Error;
// ANCHOR: types
use std::iter::Peekable;
use std::str::Chars;
/// An arithmetic operator.
#[derive(Debug, PartialEq, Clone, Copy)]
enum Op {
Add,
Sub,
}
/// A token in the expression language.
#[derive(Debug, PartialEq)]
enum Token {
Number(String),
Identifier(String),
Operator(Op),
}
/// An expression in the expression language.
#[derive(Debug, PartialEq)]
enum Expression {
/// A reference to a variable.
Var(String),
/// A literal number.
Number(u32),
/// A binary operation.
Operation(Box<Expression>, Op, Box<Expression>),
}
// ANCHOR_END: types
fn tokenize(input: &str) -> Tokenizer {
return Tokenizer(input.chars().peekable());
}
#[derive(Debug, Error)]
enum TokenizerError {
#[error("Unexpected character '{0}' in input")]
UnexpectedCharacter(char),
}
struct Tokenizer<'a>(Peekable<Chars<'a>>);
impl<'a> Iterator for Tokenizer<'a> {
type Item = Result<Token, TokenizerError>;
fn next(&mut self) -> Option<Result<Token, TokenizerError>> {
let c = self.0.next()?;
match c {
'0'..='9' => {
let mut num = String::from(c);
while let Some(c @ '0'..='9') = self.0.peek() {
num.push(*c);
self.0.next();
}
Some(Ok(Token::Number(num)))
}
'a'..='z' => {
let mut ident = String::from(c);
while let Some(c @ ('a'..='z' | '_' | '0'..='9')) = self.0.peek() {
ident.push(*c);
self.0.next();
}
Some(Ok(Token::Identifier(ident)))
}
'+' => Some(Ok(Token::Operator(Op::Add))),
'-' => Some(Ok(Token::Operator(Op::Sub))),
_ => Some(Err(TokenizerError::UnexpectedCharacter(c))),
}
}
}
#[derive(Debug, Error)]
enum ParserError {
#[error("Tokenizer error: {0}")]
TokenizerError(#[from] TokenizerError),
#[error("Unexpected end of input")]
UnexpectedEOF,
#[error("Unexpected token {0:?}")]
UnexpectedToken(Token),
#[error("Invalid number")]
InvalidNumber(#[from] std::num::ParseIntError),
}
fn parse(input: &str) -> Result<Expression, ParserError> {
let mut tokens = tokenize(input);
Format all Markdown files with `dprint` (#1157) This is the result of running `dprint fmt` after removing `src/` from the list of excluded directories. This also reformats the Rust code: we might want to tweak this a bit in the future since some of the changes removes the hand-formatting. Of course, this formatting can be seen as a mis-feature, so maybe this is good overall. Thanks to mdbook-i18n-helpers 0.2, the POT file is nearly unchanged after this, meaning that all existing translations remain valid! A few messages were changed because of stray whitespace characters: msgid "" "Slices always borrow from another object. In this example, `a` has to remain " -"'alive' (in scope) for at least as long as our slice. " +"'alive' (in scope) for at least as long as our slice." msgstr "" The formatting is enforced in CI and we will have to see how annoying this is in practice for the many contributors. If it becomes annoying, we should look into fixing dprint/check#11 so that `dprint` can annotate the lines that need fixing directly, then I think we can consider more strict formatting checks. I added more customization to `rustfmt.toml`. This is to better emulate the dense style used in the course: - `max_width = 85` allows lines to take up the full width available in our code blocks (when taking margins and the line numbers into account). - `wrap_comments = true` ensures that we don't show very long comments in the code examples. I edited some comments to shorten them and avoid unnecessary line breaks — please trim other unnecessarily long comments when you see them! Remember we're writing code for slides :smile: - `use_small_heuristics = "Max"` allows for things like struct literals and if-statements to take up the full line width configured above. The formatting settings apply to all our Rust code right now — I think we could improve this with https://github.com/dprint/dprint/issues/711 which lets us add per-directory `dprint` configuration files. However, the `inherit: true` setting is not yet implemented (as far as I can tell), so a nested configuration file will have to copy most or all of the top-level file.
2023-12-31 00:15:07 +01:00
fn parse_expr<'a>(
tokens: &mut Tokenizer<'a>,
) -> Result<Expression, ParserError> {
let tok = tokens.next().ok_or(ParserError::UnexpectedEOF)??;
let expr = match tok {
Token::Number(num) => {
let v = num.parse()?;
Expression::Number(v)
}
Token::Identifier(ident) => Expression::Var(ident),
Token::Operator(_) => return Err(ParserError::UnexpectedToken(tok)),
};
// Look ahead to parse a binary operation if present.
Ok(match tokens.next() {
None => expr,
Format all Markdown files with `dprint` (#1157) This is the result of running `dprint fmt` after removing `src/` from the list of excluded directories. This also reformats the Rust code: we might want to tweak this a bit in the future since some of the changes removes the hand-formatting. Of course, this formatting can be seen as a mis-feature, so maybe this is good overall. Thanks to mdbook-i18n-helpers 0.2, the POT file is nearly unchanged after this, meaning that all existing translations remain valid! A few messages were changed because of stray whitespace characters: msgid "" "Slices always borrow from another object. In this example, `a` has to remain " -"'alive' (in scope) for at least as long as our slice. " +"'alive' (in scope) for at least as long as our slice." msgstr "" The formatting is enforced in CI and we will have to see how annoying this is in practice for the many contributors. If it becomes annoying, we should look into fixing dprint/check#11 so that `dprint` can annotate the lines that need fixing directly, then I think we can consider more strict formatting checks. I added more customization to `rustfmt.toml`. This is to better emulate the dense style used in the course: - `max_width = 85` allows lines to take up the full width available in our code blocks (when taking margins and the line numbers into account). - `wrap_comments = true` ensures that we don't show very long comments in the code examples. I edited some comments to shorten them and avoid unnecessary line breaks — please trim other unnecessarily long comments when you see them! Remember we're writing code for slides :smile: - `use_small_heuristics = "Max"` allows for things like struct literals and if-statements to take up the full line width configured above. The formatting settings apply to all our Rust code right now — I think we could improve this with https://github.com/dprint/dprint/issues/711 which lets us add per-directory `dprint` configuration files. However, the `inherit: true` setting is not yet implemented (as far as I can tell), so a nested configuration file will have to copy most or all of the top-level file.
2023-12-31 00:15:07 +01:00
Some(Ok(Token::Operator(op))) => Expression::Operation(
Box::new(expr),
op,
Box::new(parse_expr(tokens)?),
),
Some(Err(e)) => return Err(e.into()),
Some(Ok(tok)) => return Err(ParserError::UnexpectedToken(tok)),
})
}
parse_expr(&mut tokens)
}
fn main() -> anyhow::Result<()> {
let expr = parse("10+foo+20-30")?;
println!("{expr:?}");
Ok(())
}
// ANCHOR_END: solution
/*
// ANCHOR: panics
fn tokenize(input: &str) -> Tokenizer {
return Tokenizer(input.chars().peekable());
}
struct Tokenizer<'a>(Peekable<Chars<'a>>);
impl<'a> Iterator for Tokenizer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Token> {
let c = self.0.next()?;
match c {
'0'..='9' => {
let mut num = String::from(c);
while let Some(c @ '0'..='9') = self.0.peek() {
num.push(*c);
self.0.next();
}
Some(Token::Number(num))
}
'a'..='z' => {
let mut ident = String::from(c);
while let Some(c @ ('a'..='z' | '_' | '0'..='9')) = self.0.peek() {
ident.push(*c);
self.0.next();
}
Some(Token::Identifier(ident))
}
'+' => Some(Token::Operator(Op::Add)),
'-' => Some(Token::Operator(Op::Sub)),
_ => panic!("Unexpected character {c}"),
}
}
}
fn parse(input: &str) -> Expression {
let mut tokens = tokenize(input);
fn parse_expr<'a>(tokens: &mut Tokenizer<'a>) -> Expression {
let Some(tok) = tokens.next() else {
panic!("Unexpected end of input");
};
let expr = match tok {
Token::Number(num) => {
let v = num.parse().expect("Invalid 32-bit integer'");
Expression::Number(v)
}
Token::Identifier(ident) => Expression::Var(ident),
Token::Operator(_) => panic!("Unexpected token {tok:?}"),
};
// Look ahead to parse a binary operation if present.
match tokens.next() {
None => expr,
Format all Markdown files with `dprint` (#1157) This is the result of running `dprint fmt` after removing `src/` from the list of excluded directories. This also reformats the Rust code: we might want to tweak this a bit in the future since some of the changes removes the hand-formatting. Of course, this formatting can be seen as a mis-feature, so maybe this is good overall. Thanks to mdbook-i18n-helpers 0.2, the POT file is nearly unchanged after this, meaning that all existing translations remain valid! A few messages were changed because of stray whitespace characters: msgid "" "Slices always borrow from another object. In this example, `a` has to remain " -"'alive' (in scope) for at least as long as our slice. " +"'alive' (in scope) for at least as long as our slice." msgstr "" The formatting is enforced in CI and we will have to see how annoying this is in practice for the many contributors. If it becomes annoying, we should look into fixing dprint/check#11 so that `dprint` can annotate the lines that need fixing directly, then I think we can consider more strict formatting checks. I added more customization to `rustfmt.toml`. This is to better emulate the dense style used in the course: - `max_width = 85` allows lines to take up the full width available in our code blocks (when taking margins and the line numbers into account). - `wrap_comments = true` ensures that we don't show very long comments in the code examples. I edited some comments to shorten them and avoid unnecessary line breaks — please trim other unnecessarily long comments when you see them! Remember we're writing code for slides :smile: - `use_small_heuristics = "Max"` allows for things like struct literals and if-statements to take up the full line width configured above. The formatting settings apply to all our Rust code right now — I think we could improve this with https://github.com/dprint/dprint/issues/711 which lets us add per-directory `dprint` configuration files. However, the `inherit: true` setting is not yet implemented (as far as I can tell), so a nested configuration file will have to copy most or all of the top-level file.
2023-12-31 00:15:07 +01:00
Some(Token::Operator(op)) => Expression::Operation(
Box::new(expr),
op,
Box::new(parse_expr(tokens)),
),
Some(tok) => panic!("Unexpected token {tok:?}"),
}
}
parse_expr(&mut tokens)
}
fn main() {
let expr = parse("10+foo+20-30");
println!("{expr:?}");
}
// ANCHOR_END: panics
*/