Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions litebox_platform_lvbs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ default = ["optee_syscall"]
optee_syscall = []
linux_syscall = []
devbox = []
# Tighten the preemption timer quantum so runaway-TA preemption can be tested
# without waiting out the production budget. Test builds only.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the production budget?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR for now uses 8 seconds, which is slightly below Linux kernel's hard CPU lockup timeout (10 seconds).

preemption_test_quantum = []

[lints]
workspace = true
20 changes: 20 additions & 0 deletions litebox_platform_lvbs/src/arch/x86/interrupts.S
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@ isr_with_err_code isr_alignment_check alignment_check_handler_impl 17
/* Vector 19: SIMD Floating-Point Exception (#XM) - No error code */
isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl 19

/*
* Vector 0x40: Hyper-V STIMER preemption timer - No error code
*
* Common case is a fire in USER mode (a TA overran its quantum): the macro's
* user-mode path routes it to exception_callback. The kernel path into
* stimer_handler_impl is a safety net only. See arch::timer.
*/
isr_no_err_code isr_stimer stimer_handler_impl 0x40

/*
* Hypervisor synthetic interrupt handler (vector 0xf3)
*
Expand All @@ -207,3 +216,14 @@ isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl 19
.global isr_hyperv_sint
isr_hyperv_sint:
iretq

/*
* Spurious interrupt handler (vector 0xff, programmed into the SVR)
*
* Delivered when the APIC raised an interrupt that vanished before the core
* acknowledged it. It carries no work and takes no EOI (the Intel SDM), so
* a bare iretq (no registers touched). See arch::timer (SPURIOUS_VECTOR).
*/
.global isr_spurious
isr_spurious:
iretq
27 changes: 27 additions & 0 deletions litebox_platform_lvbs/src/arch/x86/interrupts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
//! - **MCE (Vector 18)**: Machine Check Exceptions are delivered to VTL0 and handled
//! by the VTL0 kernel. VTL1 does not receive MCEs.

use super::timer::{SPURIOUS_VECTOR, STIMER_VECTOR};
use crate::mshv::HYPERVISOR_CALLBACK_VECTOR;
use core::ops::IndexMut;
use litebox_common_linux::PtRegs;
Expand All @@ -42,6 +43,8 @@ unsafe extern "C" {
fn isr_alignment_check();
fn isr_simd_floating_point();
fn isr_hyperv_sint();
fn isr_stimer();
fn isr_spurious();
}

const DOUBLE_FAULT_IST_INDEX: u16 = 0;
Expand Down Expand Up @@ -87,6 +90,10 @@ fn idt() -> &'static InterruptDescriptorTable {
.set_handler_addr(VirtAddr::from_ptr(isr_simd_floating_point as *const ()));
idt.index_mut(HYPERVISOR_CALLBACK_VECTOR)
.set_handler_addr(VirtAddr::from_ptr(isr_hyperv_sint as *const ()));
idt.index_mut(STIMER_VECTOR)
.set_handler_addr(VirtAddr::from_ptr(isr_stimer as *const ()));
idt.index_mut(SPURIOUS_VECTOR)
.set_handler_addr(VirtAddr::from_ptr(isr_spurious as *const ()));
}
idt
})
Expand Down Expand Up @@ -192,6 +199,26 @@ extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) {
panic!("EXCEPTION: SIMD FLOATING-POINT ERROR\n{regs:#x?}");
}

/// Handles an STIMER preemption-timer fire delivered in kernel mode (vector
/// 0x40); the common case fires in user mode (`exception_callback`). Re-arm
/// only while the `preemption_armed` flag is set. A stale fire (the flag is
/// clear) is just ACKed.
///
/// Two invariants keep the re-arm safe: `arm`/`disarm` set the flag before /
/// clear it after the STIMER MSR, so no fire leaves the timer disarmed while
/// the preemption target (i.e., user-mode code) keeps running. In-VTL1
/// handlers run with IF clear, so an in-scope kernel-mode fire only lands
/// in the bounded init/reenter prologue, where the re-arm just refreshes
/// that prologue's quantum.
#[unsafe(no_mangle)]
extern "C" fn stimer_handler_impl(_regs: &PtRegs) {
use crate::host::per_cpu_variables::with_per_cpu_variables;
super::timer::eoi();
if with_per_cpu_variables(|pcv| pcv.preemption_armed.get()) {
super::timer::rearm_preemption();
}
}

// Note: isr_hyperv_sint is defined in interrupts.S as a minimal stub that only
// performs iretq. This synthetic interrupt is an exception for VTL0 security
// violations (e.g., tampering with write-protected MSRs) delivered by Hyper-V
Expand Down
1 change: 1 addition & 0 deletions litebox_platform_lvbs/src/arch/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub mod interrupts;
pub mod ioport;
pub mod mm;
pub mod msr;
pub mod timer;

pub(crate) use x86_64::{
addr::{PhysAddr, VirtAddr},
Expand Down
243 changes: 243 additions & 0 deletions litebox_platform_lvbs/src/arch/x86/timer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

//! Hyper-V synthetic-timer (STIMER) preemption timer: forcefully terminates
//! runaway user-mode code in VTL1.
//!
//! VTL1 has no preemptive scheduler and VTL0 cannot interrupt VTL1, so
//! user-mode code that spins without returning holds the VP forever and freezes
//! VTL0 too. In lieu of a scheduler, VTL1 arms a VTL1-local Hyper-V synthetic
//! timer (STIMER0 in direct mode) that user-mode code cannot tamper with; on
//! expiry it fires `STIMER_VECTOR` and the shim terminates the offending
//! thread.
Comment on lines +11 to +12

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does the shim terminate the thread?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A user-mode STIMER interrupt will be delivered to the OP-TEE shim, and it'll terminate the thread. No extra code in this PR for this because the OP-TEE has no notion of interrupt such that its shim currently just terminates the thread if there is an interrupt.

//!
//! The timer is armed when entering user-mode code (`arm_preemption`) and
//! disarmed at the VTL0-return boundary (`vtl_switch`). Disarming there is
//! the hard invariant: VTL1 never hands the VP back to VTL0 with the timer
//! live. The deadline spans a whole dispatch, bounding the *cumulative* VTL1
//! residency which touches guest code. The timer stays armed across the
//! guest's syscalls and faults (VTL1's own kernel work is trusted and
//! bounded). This is what keeps VTL0 from tripping its CPU lockup watchdog.
//!
//! Direct mode injects `STIMER_VECTOR` straight into the local APIC, so the
//! usual fire path is an ordinary user-mode interrupt (ISR -> exception_callback
//! -> kill) with a rare in-kernel safety net (`interrupts::stimer_handler_impl`,
//! which re-arms via `rearm_preemption`).

use super::instrs::{rdmsr, wrmsr};
use crate::host::per_cpu_variables::with_per_cpu_variables;
use crate::mshv::{
HV_FEATURE_REFERENCE_COUNTER, HV_FEATURE_STIMER_DIRECT, HV_FEATURE_SYNTHETIC_TIMER,
HV_STIMER_CONFIG_DIRECT_MODE, HV_STIMER_CONFIG_ENABLE, HV_STIMER_CONFIG_VECTOR_SHIFT,
HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_STIMER0_COUNT, HV_X64_MSR_TIME_REF_COUNT,
HYPERV_CPUID_FEATURES, HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, HYPERV_HYPERVISOR_PRESENT_BIT,
};
use core::arch::x86_64::__cpuid_count as cpuid_count;

/// Vector the preemption timer fires on. Above the 0..31 exception range and
/// clear of the Hyper-V SINT vector (0xf3).
pub(crate) const STIMER_VECTOR: u8 = 0x40;

/// Vector the local APIC delivers for a *spurious* interrupt (programmed
/// into the SVR). `0xff` is conventional (top of range). Requires no EOI;
/// handled by the bare `iretq` stub `isr_spurious`.
pub(crate) const SPURIOUS_VECTOR: u8 = 0xff;

// Architectural x86 local-APIC (x2APIC) MSRs and the bit fields we use.
const IA32_APIC_BASE: u32 = 0x1b;
const IA32_APIC_BASE_EN: u64 = 1 << 11; // xAPIC global enable
const IA32_APIC_BASE_EXTD: u64 = 1 << 10; // x2APIC mode enable
const X2APIC_SVR: u32 = 0x80f; // Spurious Interrupt Vector Register
const X2APIC_SVR_ENABLE: u64 = 1 << 8; // APIC software-enable
const X2APIC_EOI: u32 = 0x80b; // End-of-interrupt (write 0)

// CPUID standard feature-information leaf (EAX=1) and the ECX bits we read.
const CPUID_FEATURE_INFO: u32 = 1;
const CPUID_FEATURE_INFO_ECX_X2APIC: u32 = 1 << 21;

/// Per-entry execution budget in microseconds. 8 s sits under Linux's default
/// 10 s hard-lockup watchdog, so VTL1 kills a runaway guest and returns the VP
/// before VTL0 declares its CPU locked, with margin for the kill/return path.
#[cfg(not(feature = "preemption_test_quantum"))]
const QUANTUM_MICROS: u64 = 8_000_000; // 8 s

/// Tight budget under the `preemption_test_quantum` feature so a runaway-guest
/// kill fires in ~10 ms. Test builds only.
#[cfg(feature = "preemption_test_quantum")]
const QUANTUM_MICROS: u64 = 10_000; // 10 ms

/// Partition reference counter granularity: 100 ns ticks, i.e., 10 per microsecond.
const REF_TICKS_PER_MICRO: u64 = 10;

/// Quantum as a reference-counter tick count (STIMER deadlines are in ticks).
const QUANTUM_100NS: u64 = QUANTUM_MICROS * REF_TICKS_PER_MICRO;

// TODO: This backend is Hyper-V specific (STIMER direct mode). For non-Hyper-V
// platforms, add alternative one-shot timer sources behind the same
// arm/disarm/eoi interface and have `init` pick one per platform:
// - x86: the LAPIC TSC-deadline timer (deadline via the IA32_TSC_DEADLINE MSR,
// armed through the LVT timer in TSC-deadline mode, delivered to the same
// vector; x2APIC is already enabled here).
// - Arm: the architected generic timer (a CNTV/CNTP compare delivering a PPI
// via the GIC).

/// Configure the preemption timer on the current CPU: enable x2APIC (for EOI)
/// and, if the hypervisor advertises STIMER direct mode, prepare STIMER0.
/// Idempotent and per-CPU; leaves the timer disabled (logged) rather than
/// crashing if any step is unsupported.
///
/// Call once per CPU after the IDT is loaded.
pub fn init() {
// x2APIC software-enable is needed to EOI the direct-mode STIMER interrupt.
if cpuid_count(CPUID_FEATURE_INFO, 0x0).ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0
|| !enable_x2apic()
{
crate::serial_println!("preemption disabled: x2APIC unavailable");
return;
}

if init_stimer() {
with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.set(true));
crate::debug_serial_println!("STIMER direct-mode (quantum {QUANTUM_MICROS} us)");
} else {
crate::serial_println!("preemption disabled: no STIMER direct-mode");
}
}

/// Enable x2APIC mode (if not already) and software-enable the local APIC with
/// spurious vector [`SPURIOUS_VECTOR`]. Returns `false` if x2APIC did not enable.
fn enable_x2apic() -> bool {
let base = rdmsr(IA32_APIC_BASE);
if base & IA32_APIC_BASE_EXTD == 0 {
// The SDM requires enabling xAPIC (EN) before x2APIC (EXTD); writing both
// from a fully-disabled APIC is a documented #GP, so set EN first.
if base & IA32_APIC_BASE_EN == 0 {
wrmsr(IA32_APIC_BASE, base | IA32_APIC_BASE_EN);
}
wrmsr(
IA32_APIC_BASE,
base | IA32_APIC_BASE_EN | IA32_APIC_BASE_EXTD,
);
if rdmsr(IA32_APIC_BASE) & IA32_APIC_BASE_EXTD == 0 {
return false;
}
}
// Software-enable the APIC with spurious vector SPURIOUS_VECTOR.
let svr = rdmsr(X2APIC_SVR);
wrmsr(
X2APIC_SVR,
svr | X2APIC_SVR_ENABLE | u64::from(SPURIOUS_VECTOR),
);
true
}

/// Verify STIMER capabilities (reference counter, synthetic-timer MSRs, direct
/// mode), log the raw feature leaf, and leave STIMER0 disabled (armed later via
/// [`arm_preemption`]). Returns `false` if any capability is missing.
fn init_stimer() -> bool {
if cpuid_count(CPUID_FEATURE_INFO, 0x0).ecx & HYPERV_HYPERVISOR_PRESENT_BIT == 0
|| cpuid_count(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 0x0).eax < HYPERV_CPUID_FEATURES
{
return false;
}
let feat = cpuid_count(HYPERV_CPUID_FEATURES, 0x0);
crate::debug_serial_println!(
"HV feature leaf {HYPERV_CPUID_FEATURES:#x}: eax={:#010x} edx={:#010x}",
feat.eax,
feat.edx
);
if feat.eax & HV_FEATURE_REFERENCE_COUNTER == 0
|| feat.eax & HV_FEATURE_SYNTHETIC_TIMER == 0
|| feat.edx & HV_FEATURE_STIMER_DIRECT == 0
{
return false;
}
// Known-disabled starting state; arm_preemption writes the full config.
wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0);
true
}

/// Program STIMER0 to fire one quantum from reference-now (one-shot, direct
/// mode); writes COUNT before CONFIG, which carries the Enable bit. The caller
/// owns the `preemption_armed` flag and the `preemption_timer_enabled` gate.
#[inline]
fn program_stimer_deadline() {
let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS));
let cfg = HV_STIMER_CONFIG_ENABLE
| HV_STIMER_CONFIG_DIRECT_MODE
| (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT);
wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg);
}

/// Arm the preemption timer for a VTL1 residency, one quantum from now.
/// Idempotent: while a residency is already armed (a nested re-entry) it
/// leaves the in-flight deadline in place, so the nested chain shares one
/// quantum. No-op if STIMER is not configured.
#[inline]
pub(crate) fn arm_preemption() {
with_per_cpu_variables(|pcv| {
if !pcv.preemption_timer_enabled.get() || pcv.preemption_armed.get() {
return;
}
// Mark armed *before* programming the MSR: a fire is only possible once
// the MSR is armed, so every in-residency fire sees the flag set.
pcv.preemption_armed.set(true);
program_stimer_deadline();
});
}

/// Re-arm after a kernel-mode fire; the one-shot auto-disables on expiry. Only
/// the in-kernel safety net (`interrupts::stimer_handler_impl`) calls this, and
/// only while a residency is armed, to refresh the deadline so the entry/exit
/// prologue the fire landed in can finish. No-op if STIMER is not configured or
/// no residency is armed.
#[inline]
pub(crate) fn rearm_preemption() {
with_per_cpu_variables(|pcv| {
if !pcv.preemption_timer_enabled.get() || !pcv.preemption_armed.get() {
return;
}
program_stimer_deadline();
});
}

/// Record that a preemption timer fire killed user-mode code.
#[inline]
pub(crate) fn mark_user_timeout_kill() {
with_per_cpu_variables(|pcv| pcv.preemption_timeout_killed_user.set(true));
}

/// Consume a pending user-timeout kill notification.
#[inline]
pub(crate) fn take_user_timeout_kill() -> bool {
with_per_cpu_variables(|pcv| {
let killed = pcv.preemption_timeout_killed_user.get();
pcv.preemption_timeout_killed_user.set(false);
killed
})
}

/// Disarm the preemption timer (clear STIMER0 CONFIG.Enable) before the VP is
/// handed back to VTL0. Called at the VTL0-return boundary (the `vtl_switch`
/// loop); a dispatch that never armed (HVCI/HEKI) returns without touching the
/// MSR. No-op if STIMER is not configured.
#[inline]
pub(crate) fn disarm_preemption() {
with_per_cpu_variables(|pcv| {
if !pcv.preemption_timer_enabled.get() || !pcv.preemption_armed.get() {
return;
}
// Clear armed *before* disarming the MSR: a stale fire in this window is
// then ACKed without re-arming, and the MSR is never left armed.
pcv.preemption_armed.set(false);
wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0);
});
}

/// Signal end-of-interrupt to the local APIC. Must be called for every delivered
/// preemption timer interrupt or the APIC will not deliver further interrupts.
#[inline]
pub(crate) fn eoi() {
wrmsr(X2APIC_EOI, 0);
}
8 changes: 8 additions & 0 deletions litebox_platform_lvbs/src/host/per_cpu_variables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ pub struct PerCpuVariables {
/// via `rdmsr(HV_REGISTER_VP_INDEX)` and immutable thereafter.
/// Uses `u32::MAX` as the "uninitialized" sentinel.
vp_index: Cell<u32>,
/// Set once this CPU's preemption timer is configured (see `arch::timer`).
/// Zero-initialized to `false`.
pub(crate) preemption_timer_enabled: Cell<bool>,
/// True while the preemption timer is armed (see `arch::timer`).
/// Zero-initialized to `false`.
pub(crate) preemption_armed: Cell<bool>,
/// Set when a preemption timer killed user-mode code.
pub(crate) preemption_timeout_killed_user: Cell<bool>,
}

// These Hyper-V pages must be page-aligned.
Expand Down
Loading