⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/virt/hypervisor/
nested.rs

1//! Nested Virtualization
2//!
3//! L2 VMCS shadowing with field forwarding for nested hypervisor support.
4
5#[cfg(feature = "alloc")]
6use alloc::collections::BTreeMap;
7
8use super::GuestRegisters;
9use crate::virt::{vmx::VmcsFields, VmError};
10
11// ---------------------------------------------------------------------------
12// 1. Nested Virtualization
13// ---------------------------------------------------------------------------
14
15/// Shadow VMCS for L2 (nested) guest
16#[cfg(feature = "alloc")]
17pub struct ShadowVmcs {
18    /// Cached field values from L1's perspective
19    fields: BTreeMap<u32, u64>,
20    /// Whether the shadow VMCS is active
21    active: bool,
22    /// L1 VMCS link pointer (set to shadow VMCS physical address)
23    link_pointer: u64,
24}
25
26#[cfg(feature = "alloc")]
27impl Default for ShadowVmcs {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl ShadowVmcs {
34    pub fn new() -> Self {
35        Self {
36            fields: BTreeMap::new(),
37            active: false,
38            link_pointer: 0xFFFF_FFFF_FFFF_FFFF,
39        }
40    }
41
42    /// Write a field into the shadow VMCS
43    pub fn write_field(&mut self, field: u32, value: u64) {
44        self.fields.insert(field, value);
45    }
46
47    /// Read a field from the shadow VMCS
48    pub fn read_field(&self, field: u32) -> Option<u64> {
49        self.fields.get(&field).copied()
50    }
51
52    /// Activate the shadow VMCS for nested operation
53    pub fn activate(&mut self, link_pointer: u64) {
54        self.active = true;
55        self.link_pointer = link_pointer;
56    }
57
58    /// Deactivate the shadow VMCS
59    pub fn deactivate(&mut self) {
60        self.active = false;
61        self.link_pointer = 0xFFFF_FFFF_FFFF_FFFF;
62    }
63
64    pub fn is_active(&self) -> bool {
65        self.active
66    }
67
68    pub fn link_pointer(&self) -> u64 {
69        self.link_pointer
70    }
71
72    pub fn field_count(&self) -> usize {
73        self.fields.len()
74    }
75
76    /// Clear all cached fields
77    pub fn clear(&mut self) {
78        self.fields.clear();
79        self.active = false;
80        self.link_pointer = 0xFFFF_FFFF_FFFF_FFFF;
81    }
82}
83
84/// Nested virtualization state for L1/L2 management
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum NestingLevel {
87    /// Running at L0 (host hypervisor)
88    #[default]
89    L0,
90    /// Running at L1 (guest hypervisor)
91    L1,
92    /// Running at L2 (nested guest)
93    L2,
94}
95
96/// Nested VM entry/exit reason for L1<->L2 transitions
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub enum NestedExitReason {
99    /// L2 executed VMCALL -- forward to L1
100    Vmcall,
101    /// L2 EPT violation -- may need L1 handling
102    EptViolation,
103    /// L2 I/O instruction -- check L1 bitmap
104    IoInstruction,
105    /// L2 MSR access -- check L1 bitmap
106    MsrAccess,
107    /// L2 CPUID -- emulate or forward
108    Cpuid,
109    /// L2 executed VMX instruction -- must forward to L1
110    VmxInstruction,
111    /// L2 external interrupt -- may deliver to L1
112    ExternalInterrupt,
113    /// L2 triple fault
114    TripleFault,
115    /// L2 HLT
116    Hlt,
117    /// Other reason
118    Other(u32),
119}
120
121/// VMCS field forwarding: determines how L1 reads/writes to shadow VMCS
122/// map onto actual hardware VMCS fields
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub enum FieldForwardPolicy {
125    /// Field is directly passed through to hardware
126    Passthrough,
127    /// Field is intercepted and emulated
128    Emulated,
129    /// Field is read-only from L1
130    ReadOnly,
131    /// Field is hidden from L1
132    Hidden,
133}
134
135/// Nested virtualization controller
136#[cfg(feature = "alloc")]
137pub struct NestedVirtController {
138    /// Current nesting level
139    level: NestingLevel,
140    /// Shadow VMCS for L2
141    pub(crate) shadow_vmcs: ShadowVmcs,
142    /// L1 guest state saved during L2 execution
143    l1_saved_state: GuestRegisters,
144    /// Field forwarding policies
145    field_policies: BTreeMap<u32, FieldForwardPolicy>,
146    /// Whether nested VMX is enabled for the guest
147    nested_vmx_enabled: bool,
148}
149
150#[cfg(feature = "alloc")]
151impl Default for NestedVirtController {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157impl NestedVirtController {
158    pub fn new() -> Self {
159        let mut policies = BTreeMap::new();
160        // Guest RIP/RSP always emulated (L0 controls actual execution)
161        policies.insert(VmcsFields::GUEST_RIP, FieldForwardPolicy::Emulated);
162        policies.insert(VmcsFields::GUEST_RSP, FieldForwardPolicy::Emulated);
163        policies.insert(VmcsFields::GUEST_RFLAGS, FieldForwardPolicy::Emulated);
164        // Control registers pass through
165        policies.insert(VmcsFields::GUEST_CR0, FieldForwardPolicy::Passthrough);
166        policies.insert(VmcsFields::GUEST_CR3, FieldForwardPolicy::Passthrough);
167        policies.insert(VmcsFields::GUEST_CR4, FieldForwardPolicy::Passthrough);
168        // Exit reason is read-only
169        policies.insert(VmcsFields::VM_EXIT_REASON, FieldForwardPolicy::ReadOnly);
170        // Host state is hidden from L1 VMREAD
171        policies.insert(VmcsFields::HOST_RIP, FieldForwardPolicy::Hidden);
172        policies.insert(VmcsFields::HOST_RSP, FieldForwardPolicy::Hidden);
173
174        Self {
175            level: NestingLevel::L0,
176            shadow_vmcs: ShadowVmcs::new(),
177            l1_saved_state: GuestRegisters::default(),
178            field_policies: policies,
179            nested_vmx_enabled: false,
180        }
181    }
182
183    /// Enable nested VMX for the guest
184    pub fn enable_nested_vmx(&mut self) {
185        self.nested_vmx_enabled = true;
186    }
187
188    /// Handle L1 VMWRITE to shadow VMCS
189    pub fn handle_l1_vmwrite(&mut self, field: u32, value: u64) -> Result<(), VmError> {
190        let policy = self
191            .field_policies
192            .get(&field)
193            .copied()
194            .unwrap_or(FieldForwardPolicy::Passthrough);
195
196        match policy {
197            FieldForwardPolicy::Passthrough | FieldForwardPolicy::Emulated => {
198                self.shadow_vmcs.write_field(field, value);
199                Ok(())
200            }
201            FieldForwardPolicy::ReadOnly | FieldForwardPolicy::Hidden => {
202                Err(VmError::VmcsFieldError)
203            }
204        }
205    }
206
207    /// Handle L1 VMREAD from shadow VMCS
208    pub fn handle_l1_vmread(&self, field: u32) -> Result<u64, VmError> {
209        let policy = self
210            .field_policies
211            .get(&field)
212            .copied()
213            .unwrap_or(FieldForwardPolicy::Passthrough);
214
215        match policy {
216            FieldForwardPolicy::Hidden => Err(VmError::VmcsFieldError),
217            _ => self
218                .shadow_vmcs
219                .read_field(field)
220                .ok_or(VmError::VmcsFieldError),
221        }
222    }
223
224    /// Enter L2 from L1 (nested VM entry)
225    pub fn enter_l2(&mut self, l1_state: &GuestRegisters) -> Result<(), VmError> {
226        if !self.nested_vmx_enabled {
227            return Err(VmError::VmxNotSupported);
228        }
229        if self.level != NestingLevel::L1 {
230            // Must be at L1 to enter L2
231            if self.level == NestingLevel::L0 {
232                // L0 -> L1 is implicit; let's allow L0 to go to L2 through L1
233                self.level = NestingLevel::L1;
234            }
235        }
236
237        // Save L1 state
238        self.l1_saved_state = *l1_state;
239
240        // Validate shadow VMCS has minimum required fields
241        if self.shadow_vmcs.read_field(VmcsFields::GUEST_RIP).is_none() {
242            return Err(VmError::InvalidGuestState);
243        }
244
245        self.level = NestingLevel::L2;
246        self.shadow_vmcs.activate(0);
247        Ok(())
248    }
249
250    /// Exit from L2 back to L1 (nested VM exit)
251    pub fn exit_l2(&mut self, exit_reason: NestedExitReason) -> Result<GuestRegisters, VmError> {
252        if self.level != NestingLevel::L2 {
253            return Err(VmError::InvalidVmState);
254        }
255
256        // Store exit reason in shadow VMCS for L1 to read
257        let reason_code = match exit_reason {
258            NestedExitReason::Vmcall => 18,
259            NestedExitReason::EptViolation => 48,
260            NestedExitReason::IoInstruction => 30,
261            NestedExitReason::MsrAccess => 31,
262            NestedExitReason::Cpuid => 10,
263            NestedExitReason::VmxInstruction => 18,
264            NestedExitReason::ExternalInterrupt => 1,
265            NestedExitReason::TripleFault => 2,
266            NestedExitReason::Hlt => 12,
267            NestedExitReason::Other(code) => code,
268        };
269        self.shadow_vmcs
270            .write_field(VmcsFields::VM_EXIT_REASON, reason_code as u64);
271
272        self.level = NestingLevel::L1;
273        self.shadow_vmcs.deactivate();
274
275        // Return saved L1 state
276        Ok(self.l1_saved_state)
277    }
278
279    pub fn nesting_level(&self) -> NestingLevel {
280        self.level
281    }
282
283    pub fn is_nested_vmx_enabled(&self) -> bool {
284        self.nested_vmx_enabled
285    }
286
287    /// Check if a VM exit from L2 should be forwarded to L1
288    pub fn should_forward_to_l1(&self, exit_reason: NestedExitReason) -> bool {
289        match exit_reason {
290            // VMX instructions in L2 always go to L1
291            NestedExitReason::VmxInstruction => true,
292            // VMCALL always forwarded
293            NestedExitReason::Vmcall => true,
294            // Triple fault always forwarded
295            NestedExitReason::TripleFault => true,
296            // EPT violations may be handled by L0 or forwarded
297            NestedExitReason::EptViolation => true,
298            // I/O depends on L1 bitmap
299            NestedExitReason::IoInstruction => true,
300            // External interrupts go to L0 first
301            NestedExitReason::ExternalInterrupt => false,
302            _ => true,
303        }
304    }
305}