Update Concurrency course with times (#2007)

As I mentioned in #1536: * Break into segments at approximately the places @fw-immunant put breaks * Move all of the files into `src/concurrency` * Add timings and segment/session metadata so course outlines appear There's room for more work here, including some additional feedback from @fw-immunant after the session I observed, but let's do one step at a time :)
2025-06-16 14:17:34 +02:00 · 2024-04-23 09:26:41 -04:00
parent a03b7e68e5
commit face5af783
58 changed files with 385 additions and 246 deletions
--- a/src/concurrency/sync-exercises/Cargo.toml
+++ b/src/concurrency/sync-exercises/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "sync-exercises"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[[bin]]
+name = "dining-philosophers"
+path = "dining-philosophers.rs"
+
+[[bin]]
+name = "link-checker"
+path = "link-checker.rs"
+
+[dependencies]
+reqwest = { version = "0.12.4", features = ["blocking"] }
+scraper = "0.19.0"
+thiserror = "1.0.59"
+
+[dev-dependencies]
+tempfile = "3.10.1"
--- a/src/concurrency/sync-exercises/dining-philosophers.md
+++ b/src/concurrency/sync-exercises/dining-philosophers.md
@ -0,0 +1,54 @@
+---
+minutes: 20
+---
+
+# Dining Philosophers
+
+The dining philosophers problem is a classic problem in concurrency:
+
+> Five philosophers dine together at the same table. Each philosopher has their
+> own place at the table. There is a fork between each plate. The dish served is
+> a kind of spaghetti which has to be eaten with two forks. Each philosopher can
+> only alternately think and eat. Moreover, a philosopher can only eat their
+> spaghetti when they have both a left and right fork. Thus two forks will only
+> be available when their two nearest neighbors are thinking, not eating. After
+> an individual philosopher finishes eating, they will put down both forks.
+
+You will need a local [Cargo installation](../../cargo/running-locally.md) for
+this exercise. Copy the code below to a file called `src/main.rs`, fill out the
+blanks, and test that `cargo run` does not deadlock:
+
+<!-- File src/main.rs -->
+
+```rust,compile_fail
+{{#include dining-philosophers.rs:Philosopher}}
+    // left_fork: ...
+    // right_fork: ...
+    // thoughts: ...
+}
+
+{{#include dining-philosophers.rs:Philosopher-think}}
+
+{{#include dining-philosophers.rs:Philosopher-eat}}
+        // Pick up forks...
+{{#include dining-philosophers.rs:Philosopher-eat-end}}
+    // Create forks
+
+    // Create philosophers
+
+    // Make each of them think and eat 100 times
+
+    // Output their thoughts
+}
+```
+
+You can use the following `Cargo.toml`:
+
+<!-- File Cargo.toml -->
+
+```toml
+[package]
+name = "dining-philosophers"
+version = "0.1.0"
+edition = "2021"
+```
--- a/src/concurrency/sync-exercises/dining-philosophers.rs
+++ b/src/concurrency/sync-exercises/dining-philosophers.rs
@ -0,0 +1,95 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ANCHOR: solution
+// ANCHOR: Philosopher
+use std::sync::{mpsc, Arc, Mutex};
+use std::thread;
+use std::time::Duration;
+
+struct Fork;
+
+struct Philosopher {
+    name: String,
+    // ANCHOR_END: Philosopher
+    left_fork: Arc<Mutex<Fork>>,
+    right_fork: Arc<Mutex<Fork>>,
+    thoughts: mpsc::SyncSender<String>,
+}
+
+// ANCHOR: Philosopher-think
+impl Philosopher {
+    fn think(&self) {
+        self.thoughts
+            .send(format!("Eureka! {} has a new idea!", &self.name))
+            .unwrap();
+    }
+    // ANCHOR_END: Philosopher-think
+
+    // ANCHOR: Philosopher-eat
+    fn eat(&self) {
+        // ANCHOR_END: Philosopher-eat
+        println!("{} is trying to eat", &self.name);
+        let _left = self.left_fork.lock().unwrap();
+        let _right = self.right_fork.lock().unwrap();
+
+        // ANCHOR: Philosopher-eat-end
+        println!("{} is eating...", &self.name);
+        thread::sleep(Duration::from_millis(10));
+    }
+}
+
+static PHILOSOPHERS: &[&str] =
+    &["Socrates", "Hypatia", "Plato", "Aristotle", "Pythagoras"];
+
+fn main() {
+    // ANCHOR_END: Philosopher-eat-end
+    let (tx, rx) = mpsc::sync_channel(10);
+
+    let forks = (0..PHILOSOPHERS.len())
+        .map(|_| Arc::new(Mutex::new(Fork)))
+        .collect::<Vec<_>>();
+
+    for i in 0..forks.len() {
+        let tx = tx.clone();
+        let mut left_fork = Arc::clone(&forks[i]);
+        let mut right_fork = Arc::clone(&forks[(i + 1) % forks.len()]);
+
+        // To avoid a deadlock, we have to break the symmetry
+        // somewhere. This will swap the forks without deinitializing
+        // either of them.
+        if i == forks.len() - 1 {
+            std::mem::swap(&mut left_fork, &mut right_fork);
+        }
+
+        let philosopher = Philosopher {
+            name: PHILOSOPHERS[i].to_string(),
+            thoughts: tx,
+            left_fork,
+            right_fork,
+        };
+
+        thread::spawn(move || {
+            for _ in 0..100 {
+                philosopher.eat();
+                philosopher.think();
+            }
+        });
+    }
+
+    drop(tx);
+    for thought in rx {
+        println!("{thought}");
+    }
+}
--- a/src/concurrency/sync-exercises/link-checker.md
+++ b/src/concurrency/sync-exercises/link-checker.md
@ -0,0 +1,85 @@
+---
+minutes: 20
+---
+
+# Multi-threaded Link Checker
+
+Let us use our new knowledge to create a multi-threaded link checker. It should
+start at a webpage and check that links on the page are valid. It should
+recursively check other pages on the same domain and keep doing this until all
+pages have been validated.
+
+For this, you will need an HTTP client such as [`reqwest`][1]. You will also
+need a way to find links, we can use [`scraper`][2]. Finally, we'll need some
+way of handling errors, we will use [`thiserror`][3].
+
+Create a new Cargo project and `reqwest` it as a dependency with:
+
+```shell
+cargo new link-checker
+cd link-checker
+cargo add --features blocking,rustls-tls reqwest
+cargo add scraper
+cargo add thiserror
+```
+
+> If `cargo add` fails with `error: no such subcommand`, then please edit the
+> `Cargo.toml` file by hand. Add the dependencies listed below.
+
+The `cargo add` calls will update the `Cargo.toml` file to look like this:
+
+<!-- File Cargo.toml -->
+
+```toml
+[package]
+name = "link-checker"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"] }
+scraper = "0.13.0"
+thiserror = "1.0.37"
+```
+
+You can now download the start page. Try with a small site such as
+`https://www.google.org/`.
+
+Your `src/main.rs` file should look something like this:
+
+<!-- File src/main.rs -->
+
+```rust,compile_fail
+{{#include link-checker.rs:setup}}
+
+{{#include link-checker.rs:visit_page}}
+
+fn main() {
+    let client = Client::new();
+    let start_url = Url::parse("https://www.google.org").unwrap();
+    let crawl_command = CrawlCommand{ url: start_url, extract_links: true };
+    match visit_page(&client, &crawl_command) {
+        Ok(links) => println!("Links: {links:#?}"),
+        Err(err) => println!("Could not extract links: {err:#}"),
+    }
+}
+```
+
+Run the code in `src/main.rs` with
+
+```shell
+cargo run
+```
+
+## Tasks
+
+- Use threads to check the links in parallel: send the URLs to be checked to a
+  channel and let a few threads check the URLs in parallel.
+- Extend this to recursively extract links from all pages on the
+  `www.google.org` domain. Put an upper limit of 100 pages or so so that you
+  don't end up being blocked by the site.
+
+[1]: https://docs.rs/reqwest/
+[2]: https://docs.rs/scraper/
+[3]: https://docs.rs/thiserror/
--- a/src/concurrency/sync-exercises/link-checker.rs
+++ b/src/concurrency/sync-exercises/link-checker.rs
@ -0,0 +1,181 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ANCHOR: solution
+use std::sync::{mpsc, Arc, Mutex};
+use std::thread;
+
+// ANCHOR: setup
+use reqwest::blocking::Client;
+use reqwest::Url;
+use scraper::{Html, Selector};
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+enum Error {
+    #[error("request error: {0}")]
+    ReqwestError(#[from] reqwest::Error),
+    #[error("bad http response: {0}")]
+    BadResponse(String),
+}
+// ANCHOR_END: setup
+
+// ANCHOR: visit_page
+#[derive(Debug)]
+struct CrawlCommand {
+    url: Url,
+    extract_links: bool,
+}
+
+fn visit_page(client: &Client, command: &CrawlCommand) -> Result<Vec<Url>, Error> {
+    println!("Checking {:#}", command.url);
+    let response = client.get(command.url.clone()).send()?;
+    if !response.status().is_success() {
+        return Err(Error::BadResponse(response.status().to_string()));
+    }
+
+    let mut link_urls = Vec::new();
+    if !command.extract_links {
+        return Ok(link_urls);
+    }
+
+    let base_url = response.url().to_owned();
+    let body_text = response.text()?;
+    let document = Html::parse_document(&body_text);
+
+    let selector = Selector::parse("a").unwrap();
+    let href_values = document
+        .select(&selector)
+        .filter_map(|element| element.value().attr("href"));
+    for href in href_values {
+        match base_url.join(href) {
+            Ok(link_url) => {
+                link_urls.push(link_url);
+            }
+            Err(err) => {
+                println!("On {base_url:#}: ignored unparsable {href:?}: {err}");
+            }
+        }
+    }
+    Ok(link_urls)
+}
+// ANCHOR_END: visit_page
+
+struct CrawlState {
+    domain: String,
+    visited_pages: std::collections::HashSet<String>,
+}
+
+impl CrawlState {
+    fn new(start_url: &Url) -> CrawlState {
+        let mut visited_pages = std::collections::HashSet::new();
+        visited_pages.insert(start_url.as_str().to_string());
+        CrawlState { domain: start_url.domain().unwrap().to_string(), visited_pages }
+    }
+
+    /// Determine whether links within the given page should be extracted.
+    fn should_extract_links(&self, url: &Url) -> bool {
+        let Some(url_domain) = url.domain() else {
+            return false;
+        };
+        url_domain == self.domain
+    }
+
+    /// Mark the given page as visited, returning false if it had already
+    /// been visited.
+    fn mark_visited(&mut self, url: &Url) -> bool {
+        self.visited_pages.insert(url.as_str().to_string())
+    }
+}
+
+type CrawlResult = Result<Vec<Url>, (Url, Error)>;
+fn spawn_crawler_threads(
+    command_receiver: mpsc::Receiver<CrawlCommand>,
+    result_sender: mpsc::Sender<CrawlResult>,
+    thread_count: u32,
+) {
+    let command_receiver = Arc::new(Mutex::new(command_receiver));
+
+    for _ in 0..thread_count {
+        let result_sender = result_sender.clone();
+        let command_receiver = command_receiver.clone();
+        thread::spawn(move || {
+            let client = Client::new();
+            loop {
+                let command_result = {
+                    let receiver_guard = command_receiver.lock().unwrap();
+                    receiver_guard.recv()
+                };
+                let Ok(crawl_command) = command_result else {
+                    // The sender got dropped. No more commands coming in.
+                    break;
+                };
+                let crawl_result = match visit_page(&client, &crawl_command) {
+                    Ok(link_urls) => Ok(link_urls),
+                    Err(error) => Err((crawl_command.url, error)),
+                };
+                result_sender.send(crawl_result).unwrap();
+            }
+        });
+    }
+}
+
+fn control_crawl(
+    start_url: Url,
+    command_sender: mpsc::Sender<CrawlCommand>,
+    result_receiver: mpsc::Receiver<CrawlResult>,
+) -> Vec<Url> {
+    let mut crawl_state = CrawlState::new(&start_url);
+    let start_command = CrawlCommand { url: start_url, extract_links: true };
+    command_sender.send(start_command).unwrap();
+    let mut pending_urls = 1;
+
+    let mut bad_urls = Vec::new();
+    while pending_urls > 0 {
+        let crawl_result = result_receiver.recv().unwrap();
+        pending_urls -= 1;
+
+        match crawl_result {
+            Ok(link_urls) => {
+                for url in link_urls {
+                    if crawl_state.mark_visited(&url) {
+                        let extract_links = crawl_state.should_extract_links(&url);
+                        let crawl_command = CrawlCommand { url, extract_links };
+                        command_sender.send(crawl_command).unwrap();
+                        pending_urls += 1;
+                    }
+                }
+            }
+            Err((url, error)) => {
+                bad_urls.push(url);
+                println!("Got crawling error: {:#}", error);
+                continue;
+            }
+        }
+    }
+    bad_urls
+}
+
+fn check_links(start_url: Url) -> Vec<Url> {
+    let (result_sender, result_receiver) = mpsc::channel::<CrawlResult>();
+    let (command_sender, command_receiver) = mpsc::channel::<CrawlCommand>();
+    spawn_crawler_threads(command_receiver, result_sender, 16);
+    control_crawl(start_url, command_sender, result_receiver)
+}
+
+fn main() {
+    let start_url = reqwest::Url::parse("https://www.google.org").unwrap();
+    let bad_urls = check_links(start_url);
+    println!("Bad URLs: {:#?}", bad_urls);
+}
--- a/src/concurrency/sync-exercises/solutions.md
+++ b/src/concurrency/sync-exercises/solutions.md
@ -0,0 +1,17 @@
+---
+minutes: 30
+---
+
+# Solutions
+
+## Dining Philosophers
+
+```rust
+{{#include dining-philosophers.rs:solution}}
+```
+
+## Link Checker
+
+```rust,compile_fail
+{{#include link-checker.rs:solution}}
+```