From e4329fc446618b63f77293c707497dcadc960a6b Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Wed, 10 Jun 2026 02:42:14 +0000 Subject: [PATCH 1/7] preemption timer for bounding TA execution --- litebox_platform_lvbs/Cargo.toml | 3 + .../src/arch/x86/interrupts.S | 20 ++ .../src/arch/x86/interrupts.rs | 24 ++ litebox_platform_lvbs/src/arch/x86/mod.rs | 1 + litebox_platform_lvbs/src/arch/x86/timer.rs | 225 ++++++++++++++++++ .../src/host/per_cpu_variables.rs | 6 + litebox_platform_lvbs/src/lib.rs | 24 +- litebox_platform_lvbs/src/mshv/mod.rs | 16 ++ litebox_runner_lvbs/Cargo.toml | 1 + litebox_runner_lvbs/src/lib.rs | 6 +- 10 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 litebox_platform_lvbs/src/arch/x86/timer.rs diff --git a/litebox_platform_lvbs/Cargo.toml b/litebox_platform_lvbs/Cargo.toml index 0c628a346..598f5d71a 100644 --- a/litebox_platform_lvbs/Cargo.toml +++ b/litebox_platform_lvbs/Cargo.toml @@ -46,6 +46,9 @@ default = ["optee_syscall"] optee_syscall = [] linux_syscall = [] devbox = [] +# Tighten the preemption timer quantum so runaway-TA preemption can be tested +# without waiting out the production budget. Test builds only. +preemption_test_quantum = [] [lints] workspace = true diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index e4a92e096..f69ec4832 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -192,6 +192,15 @@ isr_with_err_code isr_alignment_check alignment_check_handler_impl 17 /* Vector 19: SIMD Floating-Point Exception (#XM) - No error code */ isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl 19 +/* + * Vector 0x40: Hyper-V STIMER preemption timer - No error code + * + * Common case is a fire in USER mode (a TA overran its quantum): the macro's + * user-mode path routes it to exception_callback. The kernel path into + * stimer_handler_impl is a safety net only. See arch::timer. + */ +isr_no_err_code isr_stimer stimer_handler_impl 0x40 + /* * Hypervisor synthetic interrupt handler (vector 0xf3) * @@ -207,3 +216,14 @@ isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl 19 .global isr_hyperv_sint isr_hyperv_sint: iretq + +/* + * Spurious interrupt handler (vector 0xff, programmed into the SVR) + * + * Delivered when the APIC raised an interrupt that vanished before the core + * acknowledged it. It carries no work and takes no EOI (the Intel SDM), so + * a bare iretq (no registers touched). See arch::timer (SPURIOUS_VECTOR). + */ +.global isr_spurious +isr_spurious: + iretq diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 4b031b245..1ecec445f 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -16,6 +16,7 @@ //! - **MCE (Vector 18)**: Machine Check Exceptions are delivered to VTL0 and handled //! by the VTL0 kernel. VTL1 does not receive MCEs. +use super::timer::{SPURIOUS_VECTOR, STIMER_VECTOR}; use crate::mshv::HYPERVISOR_CALLBACK_VECTOR; use core::ops::IndexMut; use litebox_common_linux::PtRegs; @@ -42,6 +43,8 @@ unsafe extern "C" { fn isr_alignment_check(); fn isr_simd_floating_point(); fn isr_hyperv_sint(); + fn isr_stimer(); + fn isr_spurious(); } const DOUBLE_FAULT_IST_INDEX: u16 = 0; @@ -87,6 +90,10 @@ fn idt() -> &'static InterruptDescriptorTable { .set_handler_addr(VirtAddr::from_ptr(isr_simd_floating_point as *const ())); idt.index_mut(HYPERVISOR_CALLBACK_VECTOR) .set_handler_addr(VirtAddr::from_ptr(isr_hyperv_sint as *const ())); + idt.index_mut(STIMER_VECTOR) + .set_handler_addr(VirtAddr::from_ptr(isr_stimer as *const ())); + idt.index_mut(SPURIOUS_VECTOR) + .set_handler_addr(VirtAddr::from_ptr(isr_spurious as *const ())); } idt }) @@ -192,6 +199,23 @@ extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) { panic!("EXCEPTION: SIMD FLOATING-POINT ERROR\n{regs:#x?}"); } +/// Safety net for an STIMER preemption-timer fire delivered in *kernel* mode +/// (vector 0x40); the common case fires in user mode and routes to +/// `exception_callback`. Always EOI, but re-arm only while a TA is in scope +/// (`is_in_user`)---e.g., an expiry in the IF-enabled shim init/reenter window. +/// Out of scope the fire is stale (latched while interrupts were disabled and +/// delivered after the TA exited and `scoped` disarmed). Re-arming would leave a +/// wall-clock timer counting with no TA, which Hyper-V later delivers via a +/// spurious VTL switch into VTL1. +#[unsafe(no_mangle)] +extern "C" fn stimer_handler_impl(_regs: &PtRegs) { + use crate::host::per_cpu_variables::with_per_cpu_variables; + super::timer::eoi(); + if with_per_cpu_variables(|pcv| pcv.asm.is_in_user()) { + super::timer::arm_preemption(); + } +} + // Note: isr_hyperv_sint is defined in interrupts.S as a minimal stub that only // performs iretq. This synthetic interrupt is an exception for VTL0 security // violations (e.g., tampering with write-protected MSRs) delivered by Hyper-V diff --git a/litebox_platform_lvbs/src/arch/x86/mod.rs b/litebox_platform_lvbs/src/arch/x86/mod.rs index 26b744658..9a58b6b8d 100644 --- a/litebox_platform_lvbs/src/arch/x86/mod.rs +++ b/litebox_platform_lvbs/src/arch/x86/mod.rs @@ -7,6 +7,7 @@ pub mod interrupts; pub mod ioport; pub mod mm; pub mod msr; +pub mod timer; pub(crate) use x86_64::{ addr::{PhysAddr, VirtAddr}, diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs new file mode 100644 index 000000000..88e7a0ec9 --- /dev/null +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -0,0 +1,225 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +//! Hyper-V synthetic-timer (STIMER) preemption timer: VTL1 preempts a runaway TA. +//! +//! VTL0 cannot interrupt VTL1 and OP-TEE has no scheduler, so a TA that +//! spins without returning would hold the VP forever and freeze VTL0 too. +//! VTL1 arms a VTL1-local Hyper-V synthetic timer (STIMER0 in direct mode) +//! that the TA cannot tamper with; on expiry it is delivered as +//! `STIMER_VECTOR` and the shim kills the TA with +//! `TEE_ERROR_TARGET_DEAD`. +//! +//! `scoped` brackets a whole TA entry: arm before `run_thread_arch`, +//! disarm once it fully returns to the VTL1 kernel: i.e., after the TA has +//! left ring 3, and before the VP is handed back to VTL0. The timer stays +//! armed across the TA's syscalls and faults (VTL1's own kernel work is +//! trusted and bounded), so it bounds the *cumulative* time the VP is held +//! in VTL1 per entry, which is what keeps VTL0 from tripping its +//! CPU lockup watchdog. +//! +//! Direct mode injects `STIMER_VECTOR` straight into the local APIC, so +//! the usual fire path is an ordinary user-mode interrupt (ISR -> +//! exception_callback -> kill) with a rare in-kernel safety net +//! (`interrupts::stimer_handler_impl`). + +use super::instrs::{rdmsr, wrmsr}; +use crate::host::per_cpu_variables::with_per_cpu_variables; +use crate::mshv::{ + HV_FEATURE_REFERENCE_COUNTER, HV_FEATURE_STIMER_DIRECT, HV_FEATURE_SYNTHETIC_TIMER, + HV_STIMER_CONFIG_DIRECT_MODE, HV_STIMER_CONFIG_ENABLE, HV_STIMER_CONFIG_VECTOR_SHIFT, + HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_STIMER0_COUNT, HV_X64_MSR_TIME_REF_COUNT, + HYPERV_CPUID_FEATURES, HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, HYPERV_HYPERVISOR_PRESENT_BIT, +}; + +/// Vector the preemption timer fires on. Above the 0..31 exception range and +/// clear of the Hyper-V SINT vector (0xf3). +pub(crate) const STIMER_VECTOR: u8 = 0x40; + +/// Vector the local APIC delivers for a *spurious* interrupt (programmed +/// into the SVR). `0xff` is conventional (top of range). Requires no EOI; +/// handled by the bare `iretq` stub `isr_spurious`. +pub(crate) const SPURIOUS_VECTOR: u8 = 0xff; + +// Architectural x86 local-APIC (x2APIC) MSRs and the bit fields we use. +const IA32_APIC_BASE: u32 = 0x1b; +const IA32_APIC_BASE_EN: u64 = 1 << 11; // xAPIC global enable +const IA32_APIC_BASE_EXTD: u64 = 1 << 10; // x2APIC mode enable +const X2APIC_SVR: u32 = 0x80f; // Spurious Interrupt Vector Register +const X2APIC_SVR_ENABLE: u64 = 1 << 8; // APIC software-enable +const X2APIC_EOI: u32 = 0x80b; // End-of-interrupt (write 0) + +// CPUID standard feature-information leaf (EAX=1) and the ECX bits we read. +const CPUID_FEATURE_INFO: u32 = 1; +const CPUID_FEATURE_INFO_ECX_X2APIC: u32 = 1 << 21; + +/// Per-entry execution budget in microseconds. 8 s sits under Linux's default +/// 10 s hard-lockup watchdog, so VTL1 kills a runaway TA and returns the VP +/// before VTL0 declares its CPU locked, with margin for the kill/return path. +#[cfg(not(feature = "preemption_test_quantum"))] +const QUANTUM_MICROS: u64 = 8_000_000; // 8 s + +/// Tight budget under the `preemption_test_quantum` feature so a runaway-TA +/// kill fires in ~10 ms. Test builds only. +#[cfg(feature = "preemption_test_quantum")] +const QUANTUM_MICROS: u64 = 10_000; // 10 ms + +/// Partition reference counter granularity: 100 ns ticks, i.e., 10 per microsecond. +const REF_TICKS_PER_MICRO: u64 = 10; + +/// Quantum as a reference-counter tick count (STIMER deadlines are in ticks). +const QUANTUM_100NS: u64 = QUANTUM_MICROS * REF_TICKS_PER_MICRO; + +// TODO: This backend is Hyper-V specific (STIMER direct mode). For non-Hyper-V +// platforms, add alternative one-shot timer sources behind the same +// arm/disarm/is_armed/eoi interface and have `init` pick one per platform: +// - x86: the LAPIC TSC-deadline timer (deadline via the IA32_TSC_DEADLINE MSR, +// armed through the LVT timer in TSC-deadline mode, delivered to the same +// vector; x2APIC is already enabled here). +// - Arm: the architected generic timer (a CNTV/CNTP compare delivering a PPI +// via the GIC). + +/// Configure the preemption timer on the current CPU: enable x2APIC (for EOI) +/// and, if the hypervisor advertises STIMER direct mode, prepare STIMER0. +/// Idempotent and per-CPU; leaves the timer disabled (logged) rather than +/// crashing if any step is unsupported. +/// +/// Call once per CPU after the IDT is loaded. +pub fn init() { + use core::arch::x86_64::__cpuid; + + let leaf1 = __cpuid(CPUID_FEATURE_INFO); + // x2APIC software-enable is needed to EOI the direct-mode STIMER interrupt. + if leaf1.ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0 || !enable_x2apic() { + crate::serial_println!("preemption disabled: x2APIC unavailable"); + return; + } + + if leaf1.ecx & HYPERV_HYPERVISOR_PRESENT_BIT != 0 + && __cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS).eax >= HYPERV_CPUID_FEATURES + { + let feat = __cpuid(HYPERV_CPUID_FEATURES); + crate::debug_serial_println!( + "HV feature leaf {HYPERV_CPUID_FEATURES:#x}: eax={:#010x} edx={:#010x}", + feat.eax, + feat.edx + ); + } else { + crate::serial_println!("no Hyper-V timer-capability leaf"); + } + + if init_stimer() { + with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.set(true)); + crate::debug_serial_println!("STIMER direct-mode (quantum {QUANTUM_MICROS} us)"); + } else { + crate::serial_println!("preemption disabled: no STIMER direct-mode"); + } +} + +/// Enable x2APIC mode (if not already) and software-enable the local APIC with +/// spurious vector [`SPURIOUS_VECTOR`]. Returns `false` if x2APIC did not enable. +fn enable_x2apic() -> bool { + let apic_base = rdmsr(IA32_APIC_BASE); + if apic_base & IA32_APIC_BASE_EXTD == 0 { + wrmsr( + IA32_APIC_BASE, + apic_base | IA32_APIC_BASE_EN | IA32_APIC_BASE_EXTD, + ); + if rdmsr(IA32_APIC_BASE) & IA32_APIC_BASE_EXTD == 0 { + return false; + } + } + // Software-enable the APIC with spurious vector SPURIOUS_VECTOR. + let svr = rdmsr(X2APIC_SVR); + wrmsr( + X2APIC_SVR, + svr | X2APIC_SVR_ENABLE | u64::from(SPURIOUS_VECTOR), + ); + true +} + +/// True if the hypervisor advertises everything STIMER needs. +fn stimer_direct_available() -> bool { + use core::arch::x86_64::__cpuid; + if __cpuid(CPUID_FEATURE_INFO).ecx & HYPERV_HYPERVISOR_PRESENT_BIT == 0 + || __cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS).eax < HYPERV_CPUID_FEATURES + { + return false; + } + let feat = __cpuid(HYPERV_CPUID_FEATURES); + feat.eax & HV_FEATURE_REFERENCE_COUNTER != 0 + && feat.eax & HV_FEATURE_SYNTHETIC_TIMER != 0 + && feat.edx & HV_FEATURE_STIMER_DIRECT != 0 +} + +/// Prepare STIMER0: verify capabilities and leave it disabled (armed later via +/// [`arm_preemption`]). Returns `false` if unsupported. +fn init_stimer() -> bool { + if !stimer_direct_available() { + return false; + } + // Known-disabled starting state; arm_preemption writes the full config. + wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); + true +} + +/// Arm the preemption timer to fire one quantum from now. Normally driven by +/// [`scoped`]; also re-armed by the kernel-mode-fire safety net +/// (`interrupts::stimer_handler_impl`). No-op if STIMER is not configured. +#[inline] +pub(crate) fn arm_preemption() { + if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { + return; + } + // One-shot at reference-now + quantum; write COUNT before CONFIG (Enable). + let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS)); + let cfg = HV_STIMER_CONFIG_ENABLE + | HV_STIMER_CONFIG_DIRECT_MODE + | (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT); + wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg); +} + +/// Run `f` with the preemption timer armed, disarming when it returns. +/// The single arm/disarm pairing; used to bracket a TA entry (see the module doc). +#[inline] +pub(crate) fn scoped(f: impl FnOnce() -> R) -> R { + /// Disarms on drop so an early return cannot leave the timer live. + struct Disarm; + impl Drop for Disarm { + fn drop(&mut self) { + disarm_preemption(); + } + } + + arm_preemption(); + let _disarm = Disarm; + f() +} + +/// Disarm the preemption timer (clear STIMER0 CONFIG.Enable). Only +/// [`scoped`]'s drop guard disarms. No-op if STIMER is not configured. +#[inline] +fn disarm_preemption() { + if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { + return; + } + wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); +} + +/// True if STIMER0 is still armed and has not fired. A one-shot STIMER +/// auto-clears Enable on fire, so false means it fired or was never armed. +#[inline] +pub(crate) fn is_armed() -> bool { + if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { + return false; + } + rdmsr(HV_X64_MSR_STIMER0_CONFIG) & HV_STIMER_CONFIG_ENABLE != 0 +} + +/// Signal end-of-interrupt to the local APIC. Must be called for every delivered +/// preemption timer interrupt or the APIC will not deliver further interrupts. +#[inline] +pub(crate) fn eoi() { + wrmsr(X2APIC_EOI, 0); +} diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index fc618cf3e..d9929cd6a 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -55,6 +55,9 @@ pub struct PerCpuVariables { /// via `rdmsr(HV_REGISTER_VP_INDEX)` and immutable thereafter. /// Uses `u32::MAX` as the "uninitialized" sentinel. vp_index: Cell, + /// Set once this CPU's preemption timer is configured (see `arch::timer`). + /// Zero-initialized to `false`. + pub(crate) preemption_timer_enabled: Cell, } // These Hyper-V pages must be page-aligned. @@ -408,6 +411,9 @@ impl PerCpuVariablesAsm { pub const fn is_in_user_offset() -> usize { offset_of!(PerCpuVariablesAsm, is_in_user) } + pub(crate) fn is_in_user(&self) -> bool { + self.is_in_user.get() != 0 + } pub fn get_exception(&self) -> litebox::shim::Exception { litebox::shim::Exception(self.exception_trapno.get()) } diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index e41f43ea0..d3755644c 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1632,7 +1632,13 @@ fn run_thread_inner( // `thread_ctx` will be passed to `syscall_handler` later. // `ctx_ptr` is to let `run_thread_arch` easily access `ctx` (i.e., not to deal with // member variable offset calculation in assembly code). - unsafe { run_thread_arch(&mut thread_ctx, ctx_ptr, u8::from(reenter)) }; + // + // Bracket the whole TA entry with the preemption timer. `run_thread_arch` + // returns exactly once, so the deadline spans all of the TA's user execution and + // in-VTL1 syscall/fault handling, bounding cumulative VTL1 occupancy. + crate::arch::timer::scoped(|| unsafe { + run_thread_arch(&mut thread_ctx, ctx_ptr, u8::from(reenter)); + }); } /// Save callee-saved registers onto the stack. @@ -2141,6 +2147,21 @@ unsafe extern "C" fn exception_handler( kernel_mode: false, } }; + // The preemption timer fires as a user-mode interrupt (STIMER_VECTOR). + if !kernel_mode && info.exception.0 == crate::arch::timer::STIMER_VECTOR { + let still_armed = crate::arch::timer::is_armed(); + crate::arch::timer::eoi(); + if still_armed { + // A stale, latched fire from prior quantum, not a real timeout. + // Resume on the still-armed timer. + if is_valid_user_ctx(thread_ctx.ctx) { + unsafe { switch_to_user(thread_ctx.ctx) } + } + return 0; + } + // Genuine timeout: fall through to the shim, which kills the TA. + crate::serial_println!("TA exceeded its execution quantum; terminating"); + } match thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) { ContinueOperation::Resume => { if kernel_mode { @@ -2203,6 +2224,7 @@ unsafe extern "C" fn switch_to_user(_ctx: &litebox_common_linux::PtRegs) -> ! { #[rustfmt::skip] core::arch::naked_asm!( "switch_to_user_start:", + "cli", // Flush TLB by reloading CR3 "mov rax, cr3", "mov cr3, rax", diff --git a/litebox_platform_lvbs/src/mshv/mod.rs b/litebox_platform_lvbs/src/mshv/mod.rs index 826daaa39..a055c7225 100644 --- a/litebox_platform_lvbs/src/mshv/mod.rs +++ b/litebox_platform_lvbs/src/mshv/mod.rs @@ -56,13 +56,29 @@ pub const HV_X64_MSR_SIMP: u32 = 0x_4000_0083; pub const HV_X64_MSR_SIMP_ENABLE: u32 = 0x_0000_0001; pub const HV_X64_MSR_SINT0: u32 = 0x_4000_0090; +// Partition reference counter and synthetic timer 0 (STIMER0). +pub const HV_X64_MSR_TIME_REF_COUNT: u32 = 0x_4000_0020; +pub const HV_X64_MSR_STIMER0_CONFIG: u32 = 0x_4000_00b0; +pub const HV_X64_MSR_STIMER0_COUNT: u32 = 0x_4000_00b1; + +// `HV_X64_MSR_STIMERn_CONFIG` bit layout (Periodic bit 1 left 0 => one-shot). +pub const HV_STIMER_CONFIG_ENABLE: u64 = 1 << 0; +pub const HV_STIMER_CONFIG_DIRECT_MODE: u64 = 1 << 12; +pub const HV_STIMER_CONFIG_VECTOR_SHIFT: u32 = 4; // ApicVector occupies bits 4..=11 + pub const HYPERVISOR_CALLBACK_VECTOR: u8 = 0xf3; pub const HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: u32 = 0x_4000_0000; pub const HYPERV_CPUID_INTERFACE: u32 = 0x_4000_0001; +pub const HYPERV_CPUID_FEATURES: u32 = 0x_4000_0003; pub const HYPERV_CPUID_IMPLEMENT_LIMITS: u32 = 0x_4000_0005; pub const HYPERV_HYPERVISOR_PRESENT_BIT: u32 = 0x_8000_0000; +// `HYPERV_CPUID_FEATURES` partition privilege / feature bits. +pub const HV_FEATURE_REFERENCE_COUNTER: u32 = 1 << 1; // EAX[1] +pub const HV_FEATURE_SYNTHETIC_TIMER: u32 = 1 << 3; // EAX[3] +pub const HV_FEATURE_STIMER_DIRECT: u32 = 1 << 19; // EDX[19] + pub const HV_PARTITION_ID_SELF: u64 = u64::MAX; pub const HV_VP_INDEX_SELF: u32 = u32::MAX - 1; diff --git a/litebox_runner_lvbs/Cargo.toml b/litebox_runner_lvbs/Cargo.toml index f5fc10f79..a1d789429 100644 --- a/litebox_runner_lvbs/Cargo.toml +++ b/litebox_runner_lvbs/Cargo.toml @@ -21,6 +21,7 @@ x86_64 = { version = "0.15.2", default-features = false, features = ["instructio [features] devbox = ["litebox_platform_lvbs/devbox"] +preemption_test_quantum = ["litebox_platform_lvbs/preemption_test_quantum"] [lints] workspace = true diff --git a/litebox_runner_lvbs/src/lib.rs b/litebox_runner_lvbs/src/lib.rs index 0ae319e59..b8fa2016b 100644 --- a/litebox_runner_lvbs/src/lib.rs +++ b/litebox_runner_lvbs/src/lib.rs @@ -20,7 +20,7 @@ use litebox_common_optee::{ OpteeSmcReturnCode, TeeOrigin, TeeResult, UteeEntryFunc, UteeParams, optee_msg_args_total_size, }; use litebox_platform_lvbs::{ - arch::{gdt, instrs::hlt_loop, interrupts}, + arch::{gdt, instrs::hlt_loop, interrupts, timer}, debug_serial_println, host::{bootparam::get_vtl1_memory_info, per_cpu_variables}, mm::MemoryProvider, @@ -225,6 +225,10 @@ pub fn init(is_bsp: bool) -> Option<&'static Platform> { x86_64::instructions::interrupts::enable(); Platform::enable_syscall_support(); + // Configure this CPU's STIMER preemption timer (VTL1 self-preemption). + // Per-CPU; safe to call on BSP and APs. + timer::init(); + ret } From 1186f1559f20d8db51537cffbd40c1cd28113e8d Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Thu, 11 Jun 2026 04:20:53 +0000 Subject: [PATCH 2/7] simplified --- .../src/arch/x86/interrupts.rs | 18 +++--- litebox_platform_lvbs/src/arch/x86/timer.rs | 63 +++++++------------ litebox_platform_lvbs/src/lib.rs | 13 +--- 3 files changed, 35 insertions(+), 59 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 1ecec445f..a4f2c8f8f 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -199,19 +199,21 @@ extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) { panic!("EXCEPTION: SIMD FLOATING-POINT ERROR\n{regs:#x?}"); } -/// Safety net for an STIMER preemption-timer fire delivered in *kernel* mode -/// (vector 0x40); the common case fires in user mode and routes to -/// `exception_callback`. Always EOI, but re-arm only while a TA is in scope -/// (`is_in_user`)---e.g., an expiry in the IF-enabled shim init/reenter window. -/// Out of scope the fire is stale (latched while interrupts were disabled and -/// delivered after the TA exited and `scoped` disarmed). Re-arming would leave a -/// wall-clock timer counting with no TA, which Hyper-V later delivers via a -/// spurious VTL switch into VTL1. +/// Handles an STIMER preemption-timer fire delivered in *kernel* mode (vector +/// 0x40); the common case fires in user mode (`exception_callback`). EOI +/// always; re-arm only while a TA is in scope (`is_in_user`) - out of scope +/// there is no TA to bound. +/// +/// In-scope kernel-mode fires only occur in the bounded shim init/reenter +/// prologue (other in-VTL1 handlers keep IF clear via `SFMASK` / exception +/// gates), so the re-arm cannot let a runaway TA reset its quantum — load-bearing +/// for the cumulative bound in `arch::timer`. #[unsafe(no_mangle)] extern "C" fn stimer_handler_impl(_regs: &PtRegs) { use crate::host::per_cpu_variables::with_per_cpu_variables; super::timer::eoi(); if with_per_cpu_variables(|pcv| pcv.asm.is_in_user()) { + crate::debug_serial_println!("preemption timer fired in kernel mode (in scope)"); super::timer::arm_preemption(); } } diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs index 88e7a0ec9..712128207 100644 --- a/litebox_platform_lvbs/src/arch/x86/timer.rs +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -72,7 +72,7 @@ const QUANTUM_100NS: u64 = QUANTUM_MICROS * REF_TICKS_PER_MICRO; // TODO: This backend is Hyper-V specific (STIMER direct mode). For non-Hyper-V // platforms, add alternative one-shot timer sources behind the same -// arm/disarm/is_armed/eoi interface and have `init` pick one per platform: +// arm/disarm/eoi interface and have `init` pick one per platform: // - x86: the LAPIC TSC-deadline timer (deadline via the IA32_TSC_DEADLINE MSR, // armed through the LVT timer in TSC-deadline mode, delivered to the same // vector; x2APIC is already enabled here). @@ -88,26 +88,12 @@ const QUANTUM_100NS: u64 = QUANTUM_MICROS * REF_TICKS_PER_MICRO; pub fn init() { use core::arch::x86_64::__cpuid; - let leaf1 = __cpuid(CPUID_FEATURE_INFO); // x2APIC software-enable is needed to EOI the direct-mode STIMER interrupt. - if leaf1.ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0 || !enable_x2apic() { + if __cpuid(CPUID_FEATURE_INFO).ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0 || !enable_x2apic() { crate::serial_println!("preemption disabled: x2APIC unavailable"); return; } - if leaf1.ecx & HYPERV_HYPERVISOR_PRESENT_BIT != 0 - && __cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS).eax >= HYPERV_CPUID_FEATURES - { - let feat = __cpuid(HYPERV_CPUID_FEATURES); - crate::debug_serial_println!( - "HV feature leaf {HYPERV_CPUID_FEATURES:#x}: eax={:#010x} edx={:#010x}", - feat.eax, - feat.edx - ); - } else { - crate::serial_println!("no Hyper-V timer-capability leaf"); - } - if init_stimer() { with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.set(true)); crate::debug_serial_println!("STIMER direct-mode (quantum {QUANTUM_MICROS} us)"); @@ -119,11 +105,16 @@ pub fn init() { /// Enable x2APIC mode (if not already) and software-enable the local APIC with /// spurious vector [`SPURIOUS_VECTOR`]. Returns `false` if x2APIC did not enable. fn enable_x2apic() -> bool { - let apic_base = rdmsr(IA32_APIC_BASE); - if apic_base & IA32_APIC_BASE_EXTD == 0 { + let base = rdmsr(IA32_APIC_BASE); + if base & IA32_APIC_BASE_EXTD == 0 { + // The SDM requires enabling xAPIC (EN) before x2APIC (EXTD); writing both + // from a fully-disabled APIC is a documented #GP, so set EN first. + if base & IA32_APIC_BASE_EN == 0 { + wrmsr(IA32_APIC_BASE, base | IA32_APIC_BASE_EN); + } wrmsr( IA32_APIC_BASE, - apic_base | IA32_APIC_BASE_EN | IA32_APIC_BASE_EXTD, + base | IA32_APIC_BASE_EN | IA32_APIC_BASE_EXTD, ); if rdmsr(IA32_APIC_BASE) & IA32_APIC_BASE_EXTD == 0 { return false; @@ -138,8 +129,10 @@ fn enable_x2apic() -> bool { true } -/// True if the hypervisor advertises everything STIMER needs. -fn stimer_direct_available() -> bool { +/// Verify STIMER capabilities (reference counter, synthetic-timer MSRs, direct +/// mode), log the raw feature leaf, and leave STIMER0 disabled (armed later via +/// [`arm_preemption`]). Returns `false` if any capability is missing. +fn init_stimer() -> bool { use core::arch::x86_64::__cpuid; if __cpuid(CPUID_FEATURE_INFO).ecx & HYPERV_HYPERVISOR_PRESENT_BIT == 0 || __cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS).eax < HYPERV_CPUID_FEATURES @@ -147,15 +140,15 @@ fn stimer_direct_available() -> bool { return false; } let feat = __cpuid(HYPERV_CPUID_FEATURES); - feat.eax & HV_FEATURE_REFERENCE_COUNTER != 0 - && feat.eax & HV_FEATURE_SYNTHETIC_TIMER != 0 - && feat.edx & HV_FEATURE_STIMER_DIRECT != 0 -} - -/// Prepare STIMER0: verify capabilities and leave it disabled (armed later via -/// [`arm_preemption`]). Returns `false` if unsupported. -fn init_stimer() -> bool { - if !stimer_direct_available() { + crate::debug_serial_println!( + "HV feature leaf {HYPERV_CPUID_FEATURES:#x}: eax={:#010x} edx={:#010x}", + feat.eax, + feat.edx + ); + if feat.eax & HV_FEATURE_REFERENCE_COUNTER == 0 + || feat.eax & HV_FEATURE_SYNTHETIC_TIMER == 0 + || feat.edx & HV_FEATURE_STIMER_DIRECT == 0 + { return false; } // Known-disabled starting state; arm_preemption writes the full config. @@ -207,16 +200,6 @@ fn disarm_preemption() { wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); } -/// True if STIMER0 is still armed and has not fired. A one-shot STIMER -/// auto-clears Enable on fire, so false means it fired or was never armed. -#[inline] -pub(crate) fn is_armed() -> bool { - if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { - return false; - } - rdmsr(HV_X64_MSR_STIMER0_CONFIG) & HV_STIMER_CONFIG_ENABLE != 0 -} - /// Signal end-of-interrupt to the local APIC. Must be called for every delivered /// preemption timer interrupt or the APIC will not deliver further interrupts. #[inline] diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index d3755644c..2b30204c1 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -2147,19 +2147,10 @@ unsafe extern "C" fn exception_handler( kernel_mode: false, } }; - // The preemption timer fires as a user-mode interrupt (STIMER_VECTOR). + // A user-mode STIMER_VECTOR fire is the preemption timeout: EOI it and fall + // through to the shim, which kills the TA with TEE_ERROR_TARGET_DEAD. if !kernel_mode && info.exception.0 == crate::arch::timer::STIMER_VECTOR { - let still_armed = crate::arch::timer::is_armed(); crate::arch::timer::eoi(); - if still_armed { - // A stale, latched fire from prior quantum, not a real timeout. - // Resume on the still-armed timer. - if is_valid_user_ctx(thread_ctx.ctx) { - unsafe { switch_to_user(thread_ctx.ctx) } - } - return 0; - } - // Genuine timeout: fall through to the shim, which kills the TA. crate::serial_println!("TA exceeded its execution quantum; terminating"); } match thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) { From 57397b81f8d700ea426a7ddd88aed96f3ff0a662 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Thu, 11 Jun 2026 16:11:17 +0000 Subject: [PATCH 3/7] close potential re-arm gap --- .../src/arch/x86/interrupts.rs | 20 +++++----- litebox_platform_lvbs/src/arch/x86/timer.rs | 38 ++++++++++++------- .../src/host/per_cpu_variables.rs | 6 +-- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index a4f2c8f8f..e6482090e 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -199,20 +199,22 @@ extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) { panic!("EXCEPTION: SIMD FLOATING-POINT ERROR\n{regs:#x?}"); } -/// Handles an STIMER preemption-timer fire delivered in *kernel* mode (vector -/// 0x40); the common case fires in user mode (`exception_callback`). EOI -/// always; re-arm only while a TA is in scope (`is_in_user`) - out of scope -/// there is no TA to bound. +/// Handles an STIMER preemption-timer fire delivered in kernel mode (vector +/// 0x40); the common case fires in user mode (`exception_callback`). +/// Re-arm only while the `preemption_armed` flag is set. A stale fire +/// (the flag is clear) is just ACKed. /// -/// In-scope kernel-mode fires only occur in the bounded shim init/reenter -/// prologue (other in-VTL1 handlers keep IF clear via `SFMASK` / exception -/// gates), so the re-arm cannot let a runaway TA reset its quantum — load-bearing -/// for the cumulative bound in `arch::timer`. +/// Two invariants keep the re-arm safe: `arm`/`disarm` set the flag before / +/// clear it after the STIMER MSR, so no fire leaves the timer disarmed while +/// the preemption target (i.e., user-mode code) keeps running. In-VTL1 +/// handlers run with IF clear, so an in-scope kernel-mode fire only lands +/// in the bounded init/reenter prologue, where the re-arm just refreshes +/// that prologue's quantum. #[unsafe(no_mangle)] extern "C" fn stimer_handler_impl(_regs: &PtRegs) { use crate::host::per_cpu_variables::with_per_cpu_variables; super::timer::eoi(); - if with_per_cpu_variables(|pcv| pcv.asm.is_in_user()) { + if with_per_cpu_variables(|pcv| pcv.preemption_armed.get()) { crate::debug_serial_println!("preemption timer fired in kernel mode (in scope)"); super::timer::arm_preemption(); } diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs index 712128207..69145b4fc 100644 --- a/litebox_platform_lvbs/src/arch/x86/timer.rs +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -161,16 +161,21 @@ fn init_stimer() -> bool { /// (`interrupts::stimer_handler_impl`). No-op if STIMER is not configured. #[inline] pub(crate) fn arm_preemption() { - if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { - return; - } - // One-shot at reference-now + quantum; write COUNT before CONFIG (Enable). - let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS)); - let cfg = HV_STIMER_CONFIG_ENABLE - | HV_STIMER_CONFIG_DIRECT_MODE - | (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT); - wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg); + with_per_cpu_variables(|pcv| { + if !pcv.preemption_timer_enabled.get() { + return; + } + // Mark armed *before* touching the MSR: a fire is only possible once the + // MSR is armed, so this guarantees every in-entry fire sees the flag set. + pcv.preemption_armed.set(true); + // One-shot at reference-now + quantum; write COUNT before CONFIG (Enable). + let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS)); + let cfg = HV_STIMER_CONFIG_ENABLE + | HV_STIMER_CONFIG_DIRECT_MODE + | (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT); + wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg); + }); } /// Run `f` with the preemption timer armed, disarming when it returns. @@ -194,10 +199,15 @@ pub(crate) fn scoped(f: impl FnOnce() -> R) -> R { /// [`scoped`]'s drop guard disarms. No-op if STIMER is not configured. #[inline] fn disarm_preemption() { - if !with_per_cpu_variables(|pcv| pcv.preemption_timer_enabled.get()) { - return; - } - wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); + with_per_cpu_variables(|pcv| { + if !pcv.preemption_timer_enabled.get() { + return; + } + // Clear armed *before* disarming the MSR: a stale fire in this window is + // then ACKed without re-arming, and the MSR is never left armed. + pcv.preemption_armed.set(false); + wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); + }); } /// Signal end-of-interrupt to the local APIC. Must be called for every delivered diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index d9929cd6a..0a913d218 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -58,6 +58,9 @@ pub struct PerCpuVariables { /// Set once this CPU's preemption timer is configured (see `arch::timer`). /// Zero-initialized to `false`. pub(crate) preemption_timer_enabled: Cell, + /// True while the preemption timer is armed (see `arch::timer`). + /// Zero-initialized to `false`. + pub(crate) preemption_armed: Cell, } // These Hyper-V pages must be page-aligned. @@ -411,9 +414,6 @@ impl PerCpuVariablesAsm { pub const fn is_in_user_offset() -> usize { offset_of!(PerCpuVariablesAsm, is_in_user) } - pub(crate) fn is_in_user(&self) -> bool { - self.is_in_user.get() != 0 - } pub fn get_exception(&self) -> litebox::shim::Exception { litebox::shim::Exception(self.exception_trapno.get()) } From d7f79ae7343028cb3d049d75d4789eb1eb663a5c Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Fri, 12 Jun 2026 16:58:28 +0000 Subject: [PATCH 4/7] improve invariant --- .../src/arch/x86/interrupts.rs | 8 +- litebox_platform_lvbs/src/arch/x86/timer.rs | 109 ++++++++++-------- litebox_platform_lvbs/src/lib.rs | 12 +- litebox_platform_lvbs/src/mshv/vtl_switch.rs | 2 + 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index e6482090e..b9f9cae1d 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -200,9 +200,9 @@ extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) { } /// Handles an STIMER preemption-timer fire delivered in kernel mode (vector -/// 0x40); the common case fires in user mode (`exception_callback`). -/// Re-arm only while the `preemption_armed` flag is set. A stale fire -/// (the flag is clear) is just ACKed. +/// 0x40); the common case fires in user mode (`exception_callback`). Re-arm +/// only while the `preemption_armed` flag is set. A stale fire (the flag is +/// clear) is just ACKed. /// /// Two invariants keep the re-arm safe: `arm`/`disarm` set the flag before / /// clear it after the STIMER MSR, so no fire leaves the timer disarmed while @@ -216,7 +216,7 @@ extern "C" fn stimer_handler_impl(_regs: &PtRegs) { super::timer::eoi(); if with_per_cpu_variables(|pcv| pcv.preemption_armed.get()) { crate::debug_serial_println!("preemption timer fired in kernel mode (in scope)"); - super::timer::arm_preemption(); + super::timer::rearm_preemption(); } } diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs index 69145b4fc..05627b2ab 100644 --- a/litebox_platform_lvbs/src/arch/x86/timer.rs +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -1,27 +1,28 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -//! Hyper-V synthetic-timer (STIMER) preemption timer: VTL1 preempts a runaway TA. +//! Hyper-V synthetic-timer (STIMER) preemption timer: forcefully terminates +//! runaway user-mode code in VTL1. //! -//! VTL0 cannot interrupt VTL1 and OP-TEE has no scheduler, so a TA that -//! spins without returning would hold the VP forever and freeze VTL0 too. -//! VTL1 arms a VTL1-local Hyper-V synthetic timer (STIMER0 in direct mode) -//! that the TA cannot tamper with; on expiry it is delivered as -//! `STIMER_VECTOR` and the shim kills the TA with -//! `TEE_ERROR_TARGET_DEAD`. +//! VTL1 has no preemptive scheduler and VTL0 cannot interrupt VTL1, so +//! user-mode code that spins without returning holds the VP forever and freezes +//! VTL0 too. In lieu of a scheduler, VTL1 arms a VTL1-local Hyper-V synthetic +//! timer (STIMER0 in direct mode) that user-mode code cannot tamper with; on +//! expiry it fires `STIMER_VECTOR` and the shim terminates the offending +//! thread. //! -//! `scoped` brackets a whole TA entry: arm before `run_thread_arch`, -//! disarm once it fully returns to the VTL1 kernel: i.e., after the TA has -//! left ring 3, and before the VP is handed back to VTL0. The timer stays -//! armed across the TA's syscalls and faults (VTL1's own kernel work is -//! trusted and bounded), so it bounds the *cumulative* time the VP is held -//! in VTL1 per entry, which is what keeps VTL0 from tripping its -//! CPU lockup watchdog. +//! The timer is armed when entering user-mode code (`arm_preemption`) and +//! disarmed at the VTL0-return boundary (`vtl_switch`). Disarming there is +//! the hard invariant: VTL1 never hands the VP back to VTL0 with the timer +//! live. The deadline spans a whole dispatch, bounding the *cumulative* VTL1 +//! residency which touches guest code. The timer stays armed across the +//! guest's syscalls and faults (VTL1's own kernel work is trusted and +//! bounded). This is what keeps VTL0 from tripping its CPU lockup watchdog. //! -//! Direct mode injects `STIMER_VECTOR` straight into the local APIC, so -//! the usual fire path is an ordinary user-mode interrupt (ISR -> -//! exception_callback -> kill) with a rare in-kernel safety net -//! (`interrupts::stimer_handler_impl`). +//! Direct mode injects `STIMER_VECTOR` straight into the local APIC, so the +//! usual fire path is an ordinary user-mode interrupt (ISR -> exception_callback +//! -> kill) with a rare in-kernel safety net (`interrupts::stimer_handler_impl`, +//! which re-arms via `rearm_preemption`). use super::instrs::{rdmsr, wrmsr}; use crate::host::per_cpu_variables::with_per_cpu_variables; @@ -54,12 +55,12 @@ const CPUID_FEATURE_INFO: u32 = 1; const CPUID_FEATURE_INFO_ECX_X2APIC: u32 = 1 << 21; /// Per-entry execution budget in microseconds. 8 s sits under Linux's default -/// 10 s hard-lockup watchdog, so VTL1 kills a runaway TA and returns the VP +/// 10 s hard-lockup watchdog, so VTL1 kills a runaway guest and returns the VP /// before VTL0 declares its CPU locked, with margin for the kill/return path. #[cfg(not(feature = "preemption_test_quantum"))] const QUANTUM_MICROS: u64 = 8_000_000; // 8 s -/// Tight budget under the `preemption_test_quantum` feature so a runaway-TA +/// Tight budget under the `preemption_test_quantum` feature so a runaway-guest /// kill fires in ~10 ms. Test builds only. #[cfg(feature = "preemption_test_quantum")] const QUANTUM_MICROS: u64 = 10_000; // 10 ms @@ -156,51 +157,59 @@ fn init_stimer() -> bool { true } -/// Arm the preemption timer to fire one quantum from now. Normally driven by -/// [`scoped`]; also re-armed by the kernel-mode-fire safety net -/// (`interrupts::stimer_handler_impl`). No-op if STIMER is not configured. +/// Program STIMER0 to fire one quantum from reference-now (one-shot, direct +/// mode); writes COUNT before CONFIG, which carries the Enable bit. The caller +/// owns the `preemption_armed` flag and the `preemption_timer_enabled` gate. +#[inline] +fn program_stimer_deadline() { + let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS)); + let cfg = HV_STIMER_CONFIG_ENABLE + | HV_STIMER_CONFIG_DIRECT_MODE + | (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT); + wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg); +} + +/// Arm the preemption timer for a VTL1 residency, one quantum from now. +/// Idempotent: while a residency is already armed (a nested re-entry) it +/// leaves the in-flight deadline in place, so the nested chain shares one +/// quantum. No-op if STIMER is not configured. #[inline] pub(crate) fn arm_preemption() { with_per_cpu_variables(|pcv| { - if !pcv.preemption_timer_enabled.get() { + if !pcv.preemption_timer_enabled.get() || pcv.preemption_armed.get() { return; } - // Mark armed *before* touching the MSR: a fire is only possible once the - // MSR is armed, so this guarantees every in-entry fire sees the flag set. + // Mark armed *before* programming the MSR: a fire is only possible once + // the MSR is armed, so every in-residency fire sees the flag set. pcv.preemption_armed.set(true); - // One-shot at reference-now + quantum; write COUNT before CONFIG (Enable). - let now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - wrmsr(HV_X64_MSR_STIMER0_COUNT, now.wrapping_add(QUANTUM_100NS)); - let cfg = HV_STIMER_CONFIG_ENABLE - | HV_STIMER_CONFIG_DIRECT_MODE - | (u64::from(STIMER_VECTOR) << HV_STIMER_CONFIG_VECTOR_SHIFT); - wrmsr(HV_X64_MSR_STIMER0_CONFIG, cfg); + program_stimer_deadline(); }); } -/// Run `f` with the preemption timer armed, disarming when it returns. -/// The single arm/disarm pairing; used to bracket a TA entry (see the module doc). +/// Re-arm after a kernel-mode fire; the one-shot auto-disables on expiry. Only +/// the in-kernel safety net (`interrupts::stimer_handler_impl`) calls this, and +/// only while a residency is armed, to refresh the deadline so the entry/exit +/// prologue the fire landed in can finish. No-op if STIMER is not configured or +/// no residency is armed. #[inline] -pub(crate) fn scoped(f: impl FnOnce() -> R) -> R { - /// Disarms on drop so an early return cannot leave the timer live. - struct Disarm; - impl Drop for Disarm { - fn drop(&mut self) { - disarm_preemption(); +pub(crate) fn rearm_preemption() { + with_per_cpu_variables(|pcv| { + if !pcv.preemption_timer_enabled.get() || !pcv.preemption_armed.get() { + return; } - } - - arm_preemption(); - let _disarm = Disarm; - f() + program_stimer_deadline(); + }); } -/// Disarm the preemption timer (clear STIMER0 CONFIG.Enable). Only -/// [`scoped`]'s drop guard disarms. No-op if STIMER is not configured. +/// Disarm the preemption timer (clear STIMER0 CONFIG.Enable) before the VP is +/// handed back to VTL0. Called at the VTL0-return boundary (the `vtl_switch` +/// loop); a dispatch that never armed (HVCI/HEKI) returns without touching the +/// MSR. No-op if STIMER is not configured. #[inline] -fn disarm_preemption() { +pub(crate) fn disarm_preemption() { with_per_cpu_variables(|pcv| { - if !pcv.preemption_timer_enabled.get() { + if !pcv.preemption_timer_enabled.get() || !pcv.preemption_armed.get() { return; } // Clear armed *before* disarming the MSR: a stale fire in this window is diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 2b30204c1..bc16cc1e0 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1633,12 +1633,14 @@ fn run_thread_inner( // `ctx_ptr` is to let `run_thread_arch` easily access `ctx` (i.e., not to deal with // member variable offset calculation in assembly code). // - // Bracket the whole TA entry with the preemption timer. `run_thread_arch` - // returns exactly once, so the deadline spans all of the TA's user execution and - // in-VTL1 syscall/fault handling, bounding cumulative VTL1 occupancy. - crate::arch::timer::scoped(|| unsafe { + // Arm the preemption timer for this TA entry; disarmed at the VTL0-return + // boundary in `vtl_switch`. + crate::arch::timer::arm_preemption(); + // SAFETY: `thread_ctx` and `ctx_ptr` alias the same valid `PtRegs`/shim for + // the duration of the call, and `run_thread_arch` returns exactly once. + unsafe { run_thread_arch(&mut thread_ctx, ctx_ptr, u8::from(reenter)); - }); + } } /// Save callee-saved registers onto the stack. diff --git a/litebox_platform_lvbs/src/mshv/vtl_switch.rs b/litebox_platform_lvbs/src/mshv/vtl_switch.rs index cc55ab182..493238b2a 100644 --- a/litebox_platform_lvbs/src/mshv/vtl_switch.rs +++ b/litebox_platform_lvbs/src/mshv/vtl_switch.rs @@ -424,6 +424,8 @@ pub fn vtl_switch(return_value: Option) -> [u64; NUM_VTLCALL_PARAMS] { set_vtl_return_value(value); loop { + // Never hand the VP back to VTL0 with the preemption timer live. + crate::arch::timer::disarm_preemption(); vtl1_vp_exit(); // Note. The below asm block only touches stable memory locations (no on-demand memory // allocation, no permission changes). So, it is safe to exclude the current VP from From 8cd48d5cded5e3e09bb173b327fa30da997f6003 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Fri, 12 Jun 2026 18:19:06 +0000 Subject: [PATCH 5/7] consistency --- litebox_platform_lvbs/src/arch/x86/timer.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs index 05627b2ab..6624473a8 100644 --- a/litebox_platform_lvbs/src/arch/x86/timer.rs +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -32,6 +32,7 @@ use crate::mshv::{ HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_STIMER0_COUNT, HV_X64_MSR_TIME_REF_COUNT, HYPERV_CPUID_FEATURES, HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, HYPERV_HYPERVISOR_PRESENT_BIT, }; +use core::arch::x86_64::__cpuid_count as cpuid_count; /// Vector the preemption timer fires on. Above the 0..31 exception range and /// clear of the Hyper-V SINT vector (0xf3). @@ -87,10 +88,10 @@ const QUANTUM_100NS: u64 = QUANTUM_MICROS * REF_TICKS_PER_MICRO; /// /// Call once per CPU after the IDT is loaded. pub fn init() { - use core::arch::x86_64::__cpuid; - // x2APIC software-enable is needed to EOI the direct-mode STIMER interrupt. - if __cpuid(CPUID_FEATURE_INFO).ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0 || !enable_x2apic() { + if cpuid_count(CPUID_FEATURE_INFO, 0x0).ecx & CPUID_FEATURE_INFO_ECX_X2APIC == 0 + || !enable_x2apic() + { crate::serial_println!("preemption disabled: x2APIC unavailable"); return; } @@ -134,13 +135,12 @@ fn enable_x2apic() -> bool { /// mode), log the raw feature leaf, and leave STIMER0 disabled (armed later via /// [`arm_preemption`]). Returns `false` if any capability is missing. fn init_stimer() -> bool { - use core::arch::x86_64::__cpuid; - if __cpuid(CPUID_FEATURE_INFO).ecx & HYPERV_HYPERVISOR_PRESENT_BIT == 0 - || __cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS).eax < HYPERV_CPUID_FEATURES + if cpuid_count(CPUID_FEATURE_INFO, 0x0).ecx & HYPERV_HYPERVISOR_PRESENT_BIT == 0 + || cpuid_count(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 0x0).eax < HYPERV_CPUID_FEATURES { return false; } - let feat = __cpuid(HYPERV_CPUID_FEATURES); + let feat = cpuid_count(HYPERV_CPUID_FEATURES, 0x0); crate::debug_serial_println!( "HV feature leaf {HYPERV_CPUID_FEATURES:#x}: eax={:#010x} edx={:#010x}", feat.eax, From e5ee56eaebe93cd3ccde9f3f98390d6ea6f6d3c3 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Mon, 15 Jun 2026 17:35:49 +0000 Subject: [PATCH 6/7] fix potential deadlock --- litebox_platform_lvbs/src/arch/x86/interrupts.rs | 1 - litebox_platform_lvbs/src/arch/x86/timer.rs | 16 ++++++++++++++++ .../src/host/per_cpu_variables.rs | 2 ++ litebox_platform_lvbs/src/lib.rs | 2 +- litebox_platform_lvbs/src/mshv/vtl_switch.rs | 5 +++++ 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index b9f9cae1d..97da7dd5a 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -215,7 +215,6 @@ extern "C" fn stimer_handler_impl(_regs: &PtRegs) { use crate::host::per_cpu_variables::with_per_cpu_variables; super::timer::eoi(); if with_per_cpu_variables(|pcv| pcv.preemption_armed.get()) { - crate::debug_serial_println!("preemption timer fired in kernel mode (in scope)"); super::timer::rearm_preemption(); } } diff --git a/litebox_platform_lvbs/src/arch/x86/timer.rs b/litebox_platform_lvbs/src/arch/x86/timer.rs index 6624473a8..56b6d67f7 100644 --- a/litebox_platform_lvbs/src/arch/x86/timer.rs +++ b/litebox_platform_lvbs/src/arch/x86/timer.rs @@ -202,6 +202,22 @@ pub(crate) fn rearm_preemption() { }); } +/// Record that a preemption timer fire killed user-mode code. +#[inline] +pub(crate) fn mark_user_timeout_kill() { + with_per_cpu_variables(|pcv| pcv.preemption_timeout_killed_user.set(true)); +} + +/// Consume a pending user-timeout kill notification. +#[inline] +pub(crate) fn take_user_timeout_kill() -> bool { + with_per_cpu_variables(|pcv| { + let killed = pcv.preemption_timeout_killed_user.get(); + pcv.preemption_timeout_killed_user.set(false); + killed + }) +} + /// Disarm the preemption timer (clear STIMER0 CONFIG.Enable) before the VP is /// handed back to VTL0. Called at the VTL0-return boundary (the `vtl_switch` /// loop); a dispatch that never armed (HVCI/HEKI) returns without touching the diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index 0a913d218..8ecaf1503 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -61,6 +61,8 @@ pub struct PerCpuVariables { /// True while the preemption timer is armed (see `arch::timer`). /// Zero-initialized to `false`. pub(crate) preemption_armed: Cell, + /// Set when a preemption timer killed user-mode code. + pub(crate) preemption_timeout_killed_user: Cell, } // These Hyper-V pages must be page-aligned. diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index bc16cc1e0..6e795b98f 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -2153,7 +2153,7 @@ unsafe extern "C" fn exception_handler( // through to the shim, which kills the TA with TEE_ERROR_TARGET_DEAD. if !kernel_mode && info.exception.0 == crate::arch::timer::STIMER_VECTOR { crate::arch::timer::eoi(); - crate::serial_println!("TA exceeded its execution quantum; terminating"); + crate::arch::timer::mark_user_timeout_kill(); } match thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) { ContinueOperation::Resume => { diff --git a/litebox_platform_lvbs/src/mshv/vtl_switch.rs b/litebox_platform_lvbs/src/mshv/vtl_switch.rs index 493238b2a..342a58a77 100644 --- a/litebox_platform_lvbs/src/mshv/vtl_switch.rs +++ b/litebox_platform_lvbs/src/mshv/vtl_switch.rs @@ -426,6 +426,11 @@ pub fn vtl_switch(return_value: Option) -> [u64; NUM_VTLCALL_PARAMS] { loop { // Never hand the VP back to VTL0 with the preemption timer live. crate::arch::timer::disarm_preemption(); + if crate::arch::timer::take_user_timeout_kill() { + crate::serial_println!( + "Terminiated user-mode code which exceeded its execution quantum" + ); + } vtl1_vp_exit(); // Note. The below asm block only touches stable memory locations (no on-demand memory // allocation, no permission changes). So, it is safe to exclude the current VP from From 4fa64896a9737cf71826e99f2ba11ec142eea431 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Tue, 16 Jun 2026 18:36:06 +0000 Subject: [PATCH 7/7] typos --- litebox_platform_lvbs/src/lib.rs | 4 ++-- litebox_platform_lvbs/src/mshv/vtl_switch.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 6e795b98f..4cb99cd9a 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1633,8 +1633,8 @@ fn run_thread_inner( // `ctx_ptr` is to let `run_thread_arch` easily access `ctx` (i.e., not to deal with // member variable offset calculation in assembly code). // - // Arm the preemption timer for this TA entry; disarmed at the VTL0-return - // boundary in `vtl_switch`. + // Arm the preemption timer for this user-thread execution. This function is + // idempotent, so `reenter` does not change the timeout. crate::arch::timer::arm_preemption(); // SAFETY: `thread_ctx` and `ctx_ptr` alias the same valid `PtRegs`/shim for // the duration of the call, and `run_thread_arch` returns exactly once. diff --git a/litebox_platform_lvbs/src/mshv/vtl_switch.rs b/litebox_platform_lvbs/src/mshv/vtl_switch.rs index 342a58a77..815665162 100644 --- a/litebox_platform_lvbs/src/mshv/vtl_switch.rs +++ b/litebox_platform_lvbs/src/mshv/vtl_switch.rs @@ -428,7 +428,7 @@ pub fn vtl_switch(return_value: Option) -> [u64; NUM_VTLCALL_PARAMS] { crate::arch::timer::disarm_preemption(); if crate::arch::timer::take_user_timeout_kill() { crate::serial_println!( - "Terminiated user-mode code which exceeded its execution quantum" + "Terminated user-mode code which exceeded its execution quantum" ); } vtl1_vp_exit();