1
0
mirror of https://github.com/google/comprehensive-rust.git synced 2025-06-16 14:17:34 +02:00

Update Concurrency course with times (#2007)

As I mentioned in #1536:

* Break into segments at approximately the places @fw-immunant put
breaks
 * Move all of the files into `src/concurrency`
 * Add timings and segment/session metadata so course outlines appear

There's room for more work here, including some additional feedback from
@fw-immunant after the session I observed, but let's do one step at a
time :)
This commit is contained in:
Dustin J. Mitchell
2024-04-23 09:26:41 -04:00
committed by GitHub
parent a03b7e68e5
commit face5af783
58 changed files with 385 additions and 246 deletions

View File

@ -0,0 +1,21 @@
[package]
name = "sync-exercises"
version = "0.1.0"
edition = "2021"
publish = false
[[bin]]
name = "dining-philosophers"
path = "dining-philosophers.rs"
[[bin]]
name = "link-checker"
path = "link-checker.rs"
[dependencies]
reqwest = { version = "0.12.4", features = ["blocking"] }
scraper = "0.19.0"
thiserror = "1.0.59"
[dev-dependencies]
tempfile = "3.10.1"

View File

@ -0,0 +1,54 @@
---
minutes: 20
---
# Dining Philosophers
The dining philosophers problem is a classic problem in concurrency:
> Five philosophers dine together at the same table. Each philosopher has their
> own place at the table. There is a fork between each plate. The dish served is
> a kind of spaghetti which has to be eaten with two forks. Each philosopher can
> only alternately think and eat. Moreover, a philosopher can only eat their
> spaghetti when they have both a left and right fork. Thus two forks will only
> be available when their two nearest neighbors are thinking, not eating. After
> an individual philosopher finishes eating, they will put down both forks.
You will need a local [Cargo installation](../../cargo/running-locally.md) for
this exercise. Copy the code below to a file called `src/main.rs`, fill out the
blanks, and test that `cargo run` does not deadlock:
<!-- File src/main.rs -->
```rust,compile_fail
{{#include dining-philosophers.rs:Philosopher}}
// left_fork: ...
// right_fork: ...
// thoughts: ...
}
{{#include dining-philosophers.rs:Philosopher-think}}
{{#include dining-philosophers.rs:Philosopher-eat}}
// Pick up forks...
{{#include dining-philosophers.rs:Philosopher-eat-end}}
// Create forks
// Create philosophers
// Make each of them think and eat 100 times
// Output their thoughts
}
```
You can use the following `Cargo.toml`:
<!-- File Cargo.toml -->
```toml
[package]
name = "dining-philosophers"
version = "0.1.0"
edition = "2021"
```

View File

@ -0,0 +1,95 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ANCHOR: solution
// ANCHOR: Philosopher
use std::sync::{mpsc, Arc, Mutex};
use std::thread;
use std::time::Duration;
struct Fork;
struct Philosopher {
name: String,
// ANCHOR_END: Philosopher
left_fork: Arc<Mutex<Fork>>,
right_fork: Arc<Mutex<Fork>>,
thoughts: mpsc::SyncSender<String>,
}
// ANCHOR: Philosopher-think
impl Philosopher {
fn think(&self) {
self.thoughts
.send(format!("Eureka! {} has a new idea!", &self.name))
.unwrap();
}
// ANCHOR_END: Philosopher-think
// ANCHOR: Philosopher-eat
fn eat(&self) {
// ANCHOR_END: Philosopher-eat
println!("{} is trying to eat", &self.name);
let _left = self.left_fork.lock().unwrap();
let _right = self.right_fork.lock().unwrap();
// ANCHOR: Philosopher-eat-end
println!("{} is eating...", &self.name);
thread::sleep(Duration::from_millis(10));
}
}
static PHILOSOPHERS: &[&str] =
&["Socrates", "Hypatia", "Plato", "Aristotle", "Pythagoras"];
fn main() {
// ANCHOR_END: Philosopher-eat-end
let (tx, rx) = mpsc::sync_channel(10);
let forks = (0..PHILOSOPHERS.len())
.map(|_| Arc::new(Mutex::new(Fork)))
.collect::<Vec<_>>();
for i in 0..forks.len() {
let tx = tx.clone();
let mut left_fork = Arc::clone(&forks[i]);
let mut right_fork = Arc::clone(&forks[(i + 1) % forks.len()]);
// To avoid a deadlock, we have to break the symmetry
// somewhere. This will swap the forks without deinitializing
// either of them.
if i == forks.len() - 1 {
std::mem::swap(&mut left_fork, &mut right_fork);
}
let philosopher = Philosopher {
name: PHILOSOPHERS[i].to_string(),
thoughts: tx,
left_fork,
right_fork,
};
thread::spawn(move || {
for _ in 0..100 {
philosopher.eat();
philosopher.think();
}
});
}
drop(tx);
for thought in rx {
println!("{thought}");
}
}

View File

@ -0,0 +1,85 @@
---
minutes: 20
---
# Multi-threaded Link Checker
Let us use our new knowledge to create a multi-threaded link checker. It should
start at a webpage and check that links on the page are valid. It should
recursively check other pages on the same domain and keep doing this until all
pages have been validated.
For this, you will need an HTTP client such as [`reqwest`][1]. You will also
need a way to find links, we can use [`scraper`][2]. Finally, we'll need some
way of handling errors, we will use [`thiserror`][3].
Create a new Cargo project and `reqwest` it as a dependency with:
```shell
cargo new link-checker
cd link-checker
cargo add --features blocking,rustls-tls reqwest
cargo add scraper
cargo add thiserror
```
> If `cargo add` fails with `error: no such subcommand`, then please edit the
> `Cargo.toml` file by hand. Add the dependencies listed below.
The `cargo add` calls will update the `Cargo.toml` file to look like this:
<!-- File Cargo.toml -->
```toml
[package]
name = "link-checker"
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"] }
scraper = "0.13.0"
thiserror = "1.0.37"
```
You can now download the start page. Try with a small site such as
`https://www.google.org/`.
Your `src/main.rs` file should look something like this:
<!-- File src/main.rs -->
```rust,compile_fail
{{#include link-checker.rs:setup}}
{{#include link-checker.rs:visit_page}}
fn main() {
let client = Client::new();
let start_url = Url::parse("https://www.google.org").unwrap();
let crawl_command = CrawlCommand{ url: start_url, extract_links: true };
match visit_page(&client, &crawl_command) {
Ok(links) => println!("Links: {links:#?}"),
Err(err) => println!("Could not extract links: {err:#}"),
}
}
```
Run the code in `src/main.rs` with
```shell
cargo run
```
## Tasks
- Use threads to check the links in parallel: send the URLs to be checked to a
channel and let a few threads check the URLs in parallel.
- Extend this to recursively extract links from all pages on the
`www.google.org` domain. Put an upper limit of 100 pages or so so that you
don't end up being blocked by the site.
[1]: https://docs.rs/reqwest/
[2]: https://docs.rs/scraper/
[3]: https://docs.rs/thiserror/

View File

@ -0,0 +1,181 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ANCHOR: solution
use std::sync::{mpsc, Arc, Mutex};
use std::thread;
// ANCHOR: setup
use reqwest::blocking::Client;
use reqwest::Url;
use scraper::{Html, Selector};
use thiserror::Error;
#[derive(Error, Debug)]
enum Error {
#[error("request error: {0}")]
ReqwestError(#[from] reqwest::Error),
#[error("bad http response: {0}")]
BadResponse(String),
}
// ANCHOR_END: setup
// ANCHOR: visit_page
#[derive(Debug)]
struct CrawlCommand {
url: Url,
extract_links: bool,
}
fn visit_page(client: &Client, command: &CrawlCommand) -> Result<Vec<Url>, Error> {
println!("Checking {:#}", command.url);
let response = client.get(command.url.clone()).send()?;
if !response.status().is_success() {
return Err(Error::BadResponse(response.status().to_string()));
}
let mut link_urls = Vec::new();
if !command.extract_links {
return Ok(link_urls);
}
let base_url = response.url().to_owned();
let body_text = response.text()?;
let document = Html::parse_document(&body_text);
let selector = Selector::parse("a").unwrap();
let href_values = document
.select(&selector)
.filter_map(|element| element.value().attr("href"));
for href in href_values {
match base_url.join(href) {
Ok(link_url) => {
link_urls.push(link_url);
}
Err(err) => {
println!("On {base_url:#}: ignored unparsable {href:?}: {err}");
}
}
}
Ok(link_urls)
}
// ANCHOR_END: visit_page
struct CrawlState {
domain: String,
visited_pages: std::collections::HashSet<String>,
}
impl CrawlState {
fn new(start_url: &Url) -> CrawlState {
let mut visited_pages = std::collections::HashSet::new();
visited_pages.insert(start_url.as_str().to_string());
CrawlState { domain: start_url.domain().unwrap().to_string(), visited_pages }
}
/// Determine whether links within the given page should be extracted.
fn should_extract_links(&self, url: &Url) -> bool {
let Some(url_domain) = url.domain() else {
return false;
};
url_domain == self.domain
}
/// Mark the given page as visited, returning false if it had already
/// been visited.
fn mark_visited(&mut self, url: &Url) -> bool {
self.visited_pages.insert(url.as_str().to_string())
}
}
type CrawlResult = Result<Vec<Url>, (Url, Error)>;
fn spawn_crawler_threads(
command_receiver: mpsc::Receiver<CrawlCommand>,
result_sender: mpsc::Sender<CrawlResult>,
thread_count: u32,
) {
let command_receiver = Arc::new(Mutex::new(command_receiver));
for _ in 0..thread_count {
let result_sender = result_sender.clone();
let command_receiver = command_receiver.clone();
thread::spawn(move || {
let client = Client::new();
loop {
let command_result = {
let receiver_guard = command_receiver.lock().unwrap();
receiver_guard.recv()
};
let Ok(crawl_command) = command_result else {
// The sender got dropped. No more commands coming in.
break;
};
let crawl_result = match visit_page(&client, &crawl_command) {
Ok(link_urls) => Ok(link_urls),
Err(error) => Err((crawl_command.url, error)),
};
result_sender.send(crawl_result).unwrap();
}
});
}
}
fn control_crawl(
start_url: Url,
command_sender: mpsc::Sender<CrawlCommand>,
result_receiver: mpsc::Receiver<CrawlResult>,
) -> Vec<Url> {
let mut crawl_state = CrawlState::new(&start_url);
let start_command = CrawlCommand { url: start_url, extract_links: true };
command_sender.send(start_command).unwrap();
let mut pending_urls = 1;
let mut bad_urls = Vec::new();
while pending_urls > 0 {
let crawl_result = result_receiver.recv().unwrap();
pending_urls -= 1;
match crawl_result {
Ok(link_urls) => {
for url in link_urls {
if crawl_state.mark_visited(&url) {
let extract_links = crawl_state.should_extract_links(&url);
let crawl_command = CrawlCommand { url, extract_links };
command_sender.send(crawl_command).unwrap();
pending_urls += 1;
}
}
}
Err((url, error)) => {
bad_urls.push(url);
println!("Got crawling error: {:#}", error);
continue;
}
}
}
bad_urls
}
fn check_links(start_url: Url) -> Vec<Url> {
let (result_sender, result_receiver) = mpsc::channel::<CrawlResult>();
let (command_sender, command_receiver) = mpsc::channel::<CrawlCommand>();
spawn_crawler_threads(command_receiver, result_sender, 16);
control_crawl(start_url, command_sender, result_receiver)
}
fn main() {
let start_url = reqwest::Url::parse("https://www.google.org").unwrap();
let bad_urls = check_links(start_url);
println!("Bad URLs: {:#?}", bad_urls);
}

View File

@ -0,0 +1,17 @@
---
minutes: 30
---
# Solutions
## Dining Philosophers
```rust
{{#include dining-philosophers.rs:solution}}
```
## Link Checker
```rust,compile_fail
{{#include link-checker.rs:solution}}
```