diff --git a/src/SUMMARY.md b/src/SUMMARY.md
index 8d1339dc..59319d6f 100644
--- a/src/SUMMARY.md
+++ b/src/SUMMARY.md
@@ -228,6 +228,7 @@
# Bare Metal: Afternoon
- [Application Processors](bare-metal/aps.md)
+ - [Getting Ready to Rust](bare-metal/aps/entry-point.md)
- [Inline Assembly](bare-metal/aps/inline-assembly.md)
- [MMIO](bare-metal/aps/mmio.md)
- [Let's Write a UART Driver](bare-metal/aps/uart.md)
@@ -239,6 +240,7 @@
- [Using It](bare-metal/aps/better-uart/using.md)
- [Logging](bare-metal/aps/logging.md)
- [Using It](bare-metal/aps/logging/using.md)
+ - [Exceptions](bare-metal/aps/exceptions.md)
- [Other Projects](bare-metal/aps/other-projects.md)
- [Useful Crates](bare-metal/useful-crates.md)
- [zerocopy](bare-metal/useful-crates/zerocopy.md)
diff --git a/src/bare-metal/aps/entry-point.md b/src/bare-metal/aps/entry-point.md
new file mode 100644
index 00000000..91dbaa43
--- /dev/null
+++ b/src/bare-metal/aps/entry-point.md
@@ -0,0 +1,35 @@
+# Getting Ready to Rust
+
+Before we can start running Rust code, we need to do some initialisation.
+
+```armasm
+{{#include examples/entry.S:entry}}
+```
+
+
+
+* This is the same as it would be for C: initialising the processor state, zeroing the BSS, and
+ setting up the stack pointer.
+ * The BSS (block starting symbol, for historical reasons) is the part of the object file which
+ containing statically allocated variables which are initialised to zero. They are omitted from
+ the image, to avoid wasting space on zeroes. The compiler assumes that the loader will take care
+ of zeroing them.
+* The BSS may already be zeroed, depending on how memory is initialised and the image is loaded, but
+ we zero it to be sure.
+* We need to enable the MMU and cache before reading or writing any memory. If we don't:
+ * Unaligned accesses will fault. We build the Rust code for the `aarch64-unknown-none` target
+ which sets `+strict-align` to prevent the compiler generating unaligned accesses, so it should
+ be fine in this case, but this is not necessarily the case in general.
+ * If it were running in a VM, this can lead to cache coherency issues. The problem is that the VM
+ is accessing memory directly with the cache disabled, while the host has cachable aliases to the
+ same memory. Even if the host doesn't explicitly access the memory, speculative accesses can
+ lead to cache fills, and then changes from one or the other will get lost when the cache is
+ cleaned or the VM enables the cache. (Cache is keyed by physical address, not VA or IPA.)
+* For simplicity, we just use a hardcoded pagetable (see `idmap.S`) which identity maps the first 1
+ GiB of address space for devices, the next 1 GiB for DRAM, and another 1 GiB higher up for more
+ devices. This matches the memory layout that QEMU uses.
+* We also set up the exception vector (`vbar_el1`), which we'll see more about later.
+* All examples this afternoon assume we will be running at exception level 1 (EL1). If you need to
+ run at a different exception level you'll need to modify `entry.S` accordingly.
+
+
diff --git a/src/bare-metal/aps/examples/entry.S b/src/bare-metal/aps/examples/entry.S
index cd554f28..0a427a43 100644
--- a/src/bare-metal/aps/examples/entry.S
+++ b/src/bare-metal/aps/examples/entry.S
@@ -15,19 +15,19 @@
*/
.macro adr_l, reg:req, sym:req
- adrp \reg, \sym
- add \reg, \reg, :lo12:\sym
+ adrp \reg, \sym
+ add \reg, \reg, :lo12:\sym
.endm
.macro mov_i, reg:req, imm:req
- movz \reg, :abs_g3:\imm
- movk \reg, :abs_g2_nc:\imm
- movk \reg, :abs_g1_nc:\imm
- movk \reg, :abs_g0_nc:\imm
+ movz \reg, :abs_g3:\imm
+ movk \reg, :abs_g2_nc:\imm
+ movk \reg, :abs_g1_nc:\imm
+ movk \reg, :abs_g0_nc:\imm
.endm
-.set .L_MAIR_DEV_nGnRE, 0x04
-.set .L_MAIR_MEM_WBWA, 0xff
+.set .L_MAIR_DEV_nGnRE, 0x04
+.set .L_MAIR_MEM_WBWA, 0xff
.set .Lmairval, .L_MAIR_DEV_nGnRE | (.L_MAIR_MEM_WBWA << 8)
/* 4 KiB granule size for TTBR0_EL1. */
@@ -77,66 +77,71 @@
* prepares the stack, enables floating point, and sets up the exception vector. It preserves x0-x3
* for the Rust entry point, as these may contain boot parameters.
*/
+// ANCHOR: entry
.section .init.entry, "ax"
.global entry
entry:
- /* Load and apply the memory management configuration, ready to enable MMU and caches. */
- adrp x30, idmap
- msr ttbr0_el1, x30
+ /*
+ * Load and apply the memory management configuration, ready to enable MMU and
+ * caches.
+ */
+ adrp x30, idmap
+ msr ttbr0_el1, x30
- mov_i x30, .Lmairval
- msr mair_el1, x30
+ mov_i x30, .Lmairval
+ msr mair_el1, x30
- mov_i x30, .Ltcrval
- /* Copy the supported PA range into TCR_EL1.IPS. */
- mrs x29, id_aa64mmfr0_el1
- bfi x30, x29, #32, #4
+ mov_i x30, .Ltcrval
+ /* Copy the supported PA range into TCR_EL1.IPS. */
+ mrs x29, id_aa64mmfr0_el1
+ bfi x30, x29, #32, #4
- msr tcr_el1, x30
+ msr tcr_el1, x30
- mov_i x30, .Lsctlrval
+ mov_i x30, .Lsctlrval
- /*
- * Ensure everything before this point has completed, then invalidate any potentially stale
- * local TLB entries before they start being used.
- */
- isb
- tlbi vmalle1
- ic iallu
- dsb nsh
- isb
+ /*
+ * Ensure everything before this point has completed, then invalidate any
+ * potentially stale local TLB entries before they start being used.
+ */
+ isb
+ tlbi vmalle1
+ ic iallu
+ dsb nsh
+ isb
- /*
- * Configure sctlr_el1 to enable MMU and cache and don't proceed until this has completed.
- */
- msr sctlr_el1, x30
- isb
+ /*
+ * Configure sctlr_el1 to enable MMU and cache and don't proceed until this
+ * has completed.
+ */
+ msr sctlr_el1, x30
+ isb
- /* Disable trapping floating point access in EL1. */
- mrs x30, cpacr_el1
- orr x30, x30, #(0x3 << 20)
- msr cpacr_el1, x30
- isb
+ /* Disable trapping floating point access in EL1. */
+ mrs x30, cpacr_el1
+ orr x30, x30, #(0x3 << 20)
+ msr cpacr_el1, x30
+ isb
- /* Zero out the bss section. */
- adr_l x29, bss_begin
- adr_l x30, bss_end
-0: cmp x29, x30
- b.hs 1f
- stp xzr, xzr, [x29], #16
- b 0b
+ /* Zero out the bss section. */
+ adr_l x29, bss_begin
+ adr_l x30, bss_end
+0: cmp x29, x30
+ b.hs 1f
+ stp xzr, xzr, [x29], #16
+ b 0b
-1: /* Prepare the stack. */
- adr_l x30, boot_stack_end
- mov sp, x30
+1: /* Prepare the stack. */
+ adr_l x30, boot_stack_end
+ mov sp, x30
- /* Set up exception vector. */
- adr x30, vector_table_el1
- msr vbar_el1, x30
+ /* Set up exception vector. */
+ adr x30, vector_table_el1
+ msr vbar_el1, x30
- /* Call into Rust code. */
- bl main
+ /* Call into Rust code. */
+ bl main
- /* Loop forever waiting for interrupts. */
-2: wfi
- b 2b
+ /* Loop forever waiting for interrupts. */
+2: wfi
+ b 2b
diff --git a/src/bare-metal/aps/examples/src/exceptions.rs b/src/bare-metal/aps/examples/src/exceptions.rs
index 4ad90381..2ae4e27a 100644
--- a/src/bare-metal/aps/examples/src/exceptions.rs
+++ b/src/bare-metal/aps/examples/src/exceptions.rs
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// ANCHOR: exceptions
use log::error;
use smccc::psci::system_off;
use smccc::Hvc;
diff --git a/src/bare-metal/aps/exceptions.md b/src/bare-metal/aps/exceptions.md
new file mode 100644
index 00000000..cfe06b90
--- /dev/null
+++ b/src/bare-metal/aps/exceptions.md
@@ -0,0 +1,27 @@
+# Exceptions
+
+AArch64 defines an exception vector table with 16 entries, for 4 types of exceptions (synchronous,
+IRQ, FIQ, SError) from 4 states (current EL with SP0, current EL with SPx, lower EL using AArch64,
+lower EL using AArch32). We implement this in assembly to save volatile registers to the stack
+before calling into Rust code:
+
+```rust,editable,compile_fail
+{{#include examples/src/exceptions.rs:exceptions}}
+```
+
+
+
+* EL is exception level; all our examples this afternoon run in EL1.
+* For simplicity we aren't distinguishing between SP0 and SPx for the current EL exceptions, or
+ between AArch32 and AArch64 for the lower EL exceptions.
+* For this example we just log the exception and power down, as we don't expect any of them to
+ actually happen.
+* We can think of exception handlers and our main execution context more or less like different
+ threads. [`Send` and `Sync`][1] will control what we can share between them, just like with threads.
+ For example, if we want to share some value between exception handlers and the rest of the
+ program, and it's `Send` but not `Sync`, then we'll need to wrap it in something like a `Mutex`
+ and put it in a static.
+
+
+
+[1]: ../../concurrency/send-sync.md
diff --git a/src/bare-metal/aps/other-projects.md b/src/bare-metal/aps/other-projects.md
index 7f212899..ed0af1b4 100644
--- a/src/bare-metal/aps/other-projects.md
+++ b/src/bare-metal/aps/other-projects.md
@@ -5,7 +5,25 @@
* Supports x86, aarch64 and RISC-V.
* Relies on LinuxBoot rather than having many drivers itself.
* [Rust RaspberryPi OS tutorial](https://github.com/rust-embedded/rust-raspberrypi-OS-tutorials)
- * Initialisation, UART driver, simple bootloader, JTAG, exception levels, exception handling, page tables
- * Not all very well written, so beware.
+ * Initialisation, UART driver, simple bootloader, JTAG, exception levels, exception handling,
+ page tables
+ * Some dodginess around cache maintenance and initialisation in Rust, not necessarily a good
+ example to copy for production code.
* [`cargo-call-stack`](https://crates.io/crates/cargo-call-stack)
* Static analysis to determine maximum stack usage.
+
+
+
+* The RaspberryPi OS tutorial runs Rust code before the MMU and caches are enabled. This will read
+ and write memory (e.g. the stack). However:
+ * Without the MMU and cache, unaligned accesses will fault. It builds with `aarch64-unknown-none`
+ which sets `+strict-align` to prevent the compiler generating unaligned accesses so it should be
+ alright, but this is not necessarily the case in general.
+ * If it were running in a VM, this can lead to cache coherency issues. The problem is that the VM
+ is accessing memory directly with the cache disabled, while the host has cachable aliases to the
+ same memory. Even if the host doesn't explicitly access the memory, speculative accesses can
+ lead to cache fills, and then changes from one or the other will get lost. Again this is alright
+ in this particular case (running directly on the hardware with no hypervisor), but isn't a good
+ pattern in general.
+
+