From 54fd2578d329f3565075dd98e92b3b00a8dcdac7 Mon Sep 17 00:00:00 2001 From: Andrew Walbran Date: Wed, 14 Jun 2023 19:27:07 +0100 Subject: [PATCH] Add pages about entry point and exception handling on APs (#802) * Add page about entry point before Rust code. * Convert tabs to spaces. mdbook doesn't seem to handle tabs in code properly. * Add page about handling exceptions. * More nuanced discussion of Rust Raspberry Pi OS tutorial. * Add note about EL1 to entry point page too. --- src/SUMMARY.md | 2 + src/bare-metal/aps/entry-point.md | 35 ++++++ src/bare-metal/aps/examples/entry.S | 117 +++++++++--------- src/bare-metal/aps/examples/src/exceptions.rs | 1 + src/bare-metal/aps/exceptions.md | 27 ++++ src/bare-metal/aps/other-projects.md | 22 +++- 6 files changed, 146 insertions(+), 58 deletions(-) create mode 100644 src/bare-metal/aps/entry-point.md create mode 100644 src/bare-metal/aps/exceptions.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 8d1339dc..59319d6f 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -228,6 +228,7 @@ # Bare Metal: Afternoon - [Application Processors](bare-metal/aps.md) + - [Getting Ready to Rust](bare-metal/aps/entry-point.md) - [Inline Assembly](bare-metal/aps/inline-assembly.md) - [MMIO](bare-metal/aps/mmio.md) - [Let's Write a UART Driver](bare-metal/aps/uart.md) @@ -239,6 +240,7 @@ - [Using It](bare-metal/aps/better-uart/using.md) - [Logging](bare-metal/aps/logging.md) - [Using It](bare-metal/aps/logging/using.md) + - [Exceptions](bare-metal/aps/exceptions.md) - [Other Projects](bare-metal/aps/other-projects.md) - [Useful Crates](bare-metal/useful-crates.md) - [zerocopy](bare-metal/useful-crates/zerocopy.md) diff --git a/src/bare-metal/aps/entry-point.md b/src/bare-metal/aps/entry-point.md new file mode 100644 index 00000000..91dbaa43 --- /dev/null +++ b/src/bare-metal/aps/entry-point.md @@ -0,0 +1,35 @@ +# Getting Ready to Rust + +Before we can start running Rust code, we need to do some initialisation. + +```armasm +{{#include examples/entry.S:entry}} +``` + +
+ +* This is the same as it would be for C: initialising the processor state, zeroing the BSS, and + setting up the stack pointer. + * The BSS (block starting symbol, for historical reasons) is the part of the object file which + containing statically allocated variables which are initialised to zero. They are omitted from + the image, to avoid wasting space on zeroes. The compiler assumes that the loader will take care + of zeroing them. +* The BSS may already be zeroed, depending on how memory is initialised and the image is loaded, but + we zero it to be sure. +* We need to enable the MMU and cache before reading or writing any memory. If we don't: + * Unaligned accesses will fault. We build the Rust code for the `aarch64-unknown-none` target + which sets `+strict-align` to prevent the compiler generating unaligned accesses, so it should + be fine in this case, but this is not necessarily the case in general. + * If it were running in a VM, this can lead to cache coherency issues. The problem is that the VM + is accessing memory directly with the cache disabled, while the host has cachable aliases to the + same memory. Even if the host doesn't explicitly access the memory, speculative accesses can + lead to cache fills, and then changes from one or the other will get lost when the cache is + cleaned or the VM enables the cache. (Cache is keyed by physical address, not VA or IPA.) +* For simplicity, we just use a hardcoded pagetable (see `idmap.S`) which identity maps the first 1 + GiB of address space for devices, the next 1 GiB for DRAM, and another 1 GiB higher up for more + devices. This matches the memory layout that QEMU uses. +* We also set up the exception vector (`vbar_el1`), which we'll see more about later. +* All examples this afternoon assume we will be running at exception level 1 (EL1). If you need to + run at a different exception level you'll need to modify `entry.S` accordingly. + +
diff --git a/src/bare-metal/aps/examples/entry.S b/src/bare-metal/aps/examples/entry.S index cd554f28..0a427a43 100644 --- a/src/bare-metal/aps/examples/entry.S +++ b/src/bare-metal/aps/examples/entry.S @@ -15,19 +15,19 @@ */ .macro adr_l, reg:req, sym:req - adrp \reg, \sym - add \reg, \reg, :lo12:\sym + adrp \reg, \sym + add \reg, \reg, :lo12:\sym .endm .macro mov_i, reg:req, imm:req - movz \reg, :abs_g3:\imm - movk \reg, :abs_g2_nc:\imm - movk \reg, :abs_g1_nc:\imm - movk \reg, :abs_g0_nc:\imm + movz \reg, :abs_g3:\imm + movk \reg, :abs_g2_nc:\imm + movk \reg, :abs_g1_nc:\imm + movk \reg, :abs_g0_nc:\imm .endm -.set .L_MAIR_DEV_nGnRE, 0x04 -.set .L_MAIR_MEM_WBWA, 0xff +.set .L_MAIR_DEV_nGnRE, 0x04 +.set .L_MAIR_MEM_WBWA, 0xff .set .Lmairval, .L_MAIR_DEV_nGnRE | (.L_MAIR_MEM_WBWA << 8) /* 4 KiB granule size for TTBR0_EL1. */ @@ -77,66 +77,71 @@ * prepares the stack, enables floating point, and sets up the exception vector. It preserves x0-x3 * for the Rust entry point, as these may contain boot parameters. */ +// ANCHOR: entry .section .init.entry, "ax" .global entry entry: - /* Load and apply the memory management configuration, ready to enable MMU and caches. */ - adrp x30, idmap - msr ttbr0_el1, x30 + /* + * Load and apply the memory management configuration, ready to enable MMU and + * caches. + */ + adrp x30, idmap + msr ttbr0_el1, x30 - mov_i x30, .Lmairval - msr mair_el1, x30 + mov_i x30, .Lmairval + msr mair_el1, x30 - mov_i x30, .Ltcrval - /* Copy the supported PA range into TCR_EL1.IPS. */ - mrs x29, id_aa64mmfr0_el1 - bfi x30, x29, #32, #4 + mov_i x30, .Ltcrval + /* Copy the supported PA range into TCR_EL1.IPS. */ + mrs x29, id_aa64mmfr0_el1 + bfi x30, x29, #32, #4 - msr tcr_el1, x30 + msr tcr_el1, x30 - mov_i x30, .Lsctlrval + mov_i x30, .Lsctlrval - /* - * Ensure everything before this point has completed, then invalidate any potentially stale - * local TLB entries before they start being used. - */ - isb - tlbi vmalle1 - ic iallu - dsb nsh - isb + /* + * Ensure everything before this point has completed, then invalidate any + * potentially stale local TLB entries before they start being used. + */ + isb + tlbi vmalle1 + ic iallu + dsb nsh + isb - /* - * Configure sctlr_el1 to enable MMU and cache and don't proceed until this has completed. - */ - msr sctlr_el1, x30 - isb + /* + * Configure sctlr_el1 to enable MMU and cache and don't proceed until this + * has completed. + */ + msr sctlr_el1, x30 + isb - /* Disable trapping floating point access in EL1. */ - mrs x30, cpacr_el1 - orr x30, x30, #(0x3 << 20) - msr cpacr_el1, x30 - isb + /* Disable trapping floating point access in EL1. */ + mrs x30, cpacr_el1 + orr x30, x30, #(0x3 << 20) + msr cpacr_el1, x30 + isb - /* Zero out the bss section. */ - adr_l x29, bss_begin - adr_l x30, bss_end -0: cmp x29, x30 - b.hs 1f - stp xzr, xzr, [x29], #16 - b 0b + /* Zero out the bss section. */ + adr_l x29, bss_begin + adr_l x30, bss_end +0: cmp x29, x30 + b.hs 1f + stp xzr, xzr, [x29], #16 + b 0b -1: /* Prepare the stack. */ - adr_l x30, boot_stack_end - mov sp, x30 +1: /* Prepare the stack. */ + adr_l x30, boot_stack_end + mov sp, x30 - /* Set up exception vector. */ - adr x30, vector_table_el1 - msr vbar_el1, x30 + /* Set up exception vector. */ + adr x30, vector_table_el1 + msr vbar_el1, x30 - /* Call into Rust code. */ - bl main + /* Call into Rust code. */ + bl main - /* Loop forever waiting for interrupts. */ -2: wfi - b 2b + /* Loop forever waiting for interrupts. */ +2: wfi + b 2b diff --git a/src/bare-metal/aps/examples/src/exceptions.rs b/src/bare-metal/aps/examples/src/exceptions.rs index 4ad90381..2ae4e27a 100644 --- a/src/bare-metal/aps/examples/src/exceptions.rs +++ b/src/bare-metal/aps/examples/src/exceptions.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// ANCHOR: exceptions use log::error; use smccc::psci::system_off; use smccc::Hvc; diff --git a/src/bare-metal/aps/exceptions.md b/src/bare-metal/aps/exceptions.md new file mode 100644 index 00000000..cfe06b90 --- /dev/null +++ b/src/bare-metal/aps/exceptions.md @@ -0,0 +1,27 @@ +# Exceptions + +AArch64 defines an exception vector table with 16 entries, for 4 types of exceptions (synchronous, +IRQ, FIQ, SError) from 4 states (current EL with SP0, current EL with SPx, lower EL using AArch64, +lower EL using AArch32). We implement this in assembly to save volatile registers to the stack +before calling into Rust code: + +```rust,editable,compile_fail +{{#include examples/src/exceptions.rs:exceptions}} +``` + +
+ +* EL is exception level; all our examples this afternoon run in EL1. +* For simplicity we aren't distinguishing between SP0 and SPx for the current EL exceptions, or + between AArch32 and AArch64 for the lower EL exceptions. +* For this example we just log the exception and power down, as we don't expect any of them to + actually happen. +* We can think of exception handlers and our main execution context more or less like different + threads. [`Send` and `Sync`][1] will control what we can share between them, just like with threads. + For example, if we want to share some value between exception handlers and the rest of the + program, and it's `Send` but not `Sync`, then we'll need to wrap it in something like a `Mutex` + and put it in a static. + +
+ +[1]: ../../concurrency/send-sync.md diff --git a/src/bare-metal/aps/other-projects.md b/src/bare-metal/aps/other-projects.md index 7f212899..ed0af1b4 100644 --- a/src/bare-metal/aps/other-projects.md +++ b/src/bare-metal/aps/other-projects.md @@ -5,7 +5,25 @@ * Supports x86, aarch64 and RISC-V. * Relies on LinuxBoot rather than having many drivers itself. * [Rust RaspberryPi OS tutorial](https://github.com/rust-embedded/rust-raspberrypi-OS-tutorials) - * Initialisation, UART driver, simple bootloader, JTAG, exception levels, exception handling, page tables - * Not all very well written, so beware. + * Initialisation, UART driver, simple bootloader, JTAG, exception levels, exception handling, + page tables + * Some dodginess around cache maintenance and initialisation in Rust, not necessarily a good + example to copy for production code. * [`cargo-call-stack`](https://crates.io/crates/cargo-call-stack) * Static analysis to determine maximum stack usage. + +
+ +* The RaspberryPi OS tutorial runs Rust code before the MMU and caches are enabled. This will read + and write memory (e.g. the stack). However: + * Without the MMU and cache, unaligned accesses will fault. It builds with `aarch64-unknown-none` + which sets `+strict-align` to prevent the compiler generating unaligned accesses so it should be + alright, but this is not necessarily the case in general. + * If it were running in a VM, this can lead to cache coherency issues. The problem is that the VM + is accessing memory directly with the cache disabled, while the host has cachable aliases to the + same memory. Even if the host doesn't explicitly access the memory, speculative accesses can + lead to cache fills, and then changes from one or the other will get lost. Again this is alright + in this particular case (running directly on the hardware with no hypervisor), but isn't a good + pattern in general. + +