⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/arch/x86_64/
context.rs

1//! x86_64 context switching implementation
2
3use core::arch::asm;
4
5use crate::sched::task::TaskContext;
6
7/// x86_64 CPU context
8#[repr(C)]
9#[derive(Debug, Clone)]
10pub struct X86_64Context {
11    /// General purpose registers
12    pub r15: u64,
13    pub r14: u64,
14    pub r13: u64,
15    pub r12: u64,
16    pub r11: u64,
17    pub r10: u64,
18    pub r9: u64,
19    pub r8: u64,
20    pub rdi: u64,
21    pub rsi: u64,
22    pub rbp: u64,
23    pub rbx: u64,
24    pub rdx: u64,
25    pub rcx: u64,
26    pub rax: u64,
27
28    /// Stack pointer
29    pub rsp: u64,
30
31    /// Instruction pointer
32    pub rip: u64,
33
34    /// CPU flags
35    pub rflags: u64,
36
37    /// Segment registers
38    pub cs: u16,
39    pub ss: u16,
40    pub ds: u16,
41    pub es: u16,
42    pub fs: u16,
43    pub gs: u16,
44
45    /// Control registers
46    pub cr3: u64, // Page table base
47
48    /// Floating point state pointer
49    pub fpu_state: *mut FpuState,
50
51    /// TLS base (FS base for user)
52    pub tls_base: u64,
53}
54
55/// x86_64 FPU/SSE/AVX state
56#[repr(C, align(64))]
57pub struct FpuState {
58    /// FXSAVE area (512 bytes)
59    pub fxsave: [u8; 512],
60    /// Extended state (AVX, etc.)
61    pub xsave: [u8; 2048],
62}
63
64impl X86_64Context {
65    /// Create new context for a task
66    pub fn new(entry_point: usize, stack_pointer: usize) -> Self {
67        // Adjust stack pointer to leave room for a fake return address
68        // This prevents issues if the called function tries to access stack arguments
69        let adjusted_sp = (stack_pointer - 8) as u64;
70
71        Self {
72            // Clear all general purpose registers
73            r15: 0,
74            r14: 0,
75            r13: 0,
76            r12: 0,
77            r11: 0,
78            r10: 0,
79            r9: 0,
80            r8: 0,
81            rdi: 0,
82            rsi: 0,
83            rbp: 0,
84            rbx: 0,
85            rdx: 0,
86            rcx: 0,
87            rax: 0,
88
89            // Set stack pointer with adjustment
90            rsp: adjusted_sp,
91
92            // Set instruction pointer to entry point
93            rip: entry_point as u64,
94
95            // Default RFLAGS (interrupts disabled for now)
96            rflags: 0x002,
97
98            // Kernel segments
99            cs: 0x08, // Kernel code segment
100            ss: 0x10, // Kernel data segment
101            ds: 0x10,
102            es: 0x10,
103            fs: 0x00,
104            gs: 0x00,
105
106            // Initialize with current CR3
107            // SAFETY: Reading CR3 is always valid in kernel mode. It returns
108            // the current page table base address. No side effects.
109            cr3: unsafe {
110                let mut cr3: u64;
111                asm!("mov {}, cr3", out(reg) cr3);
112                cr3
113            },
114
115            // Will be allocated if FPU is used
116            fpu_state: core::ptr::null_mut(),
117
118            tls_base: 0,
119        }
120    }
121
122    /// Create a user-mode context for a process.
123    ///
124    /// Uses Ring 3 segment selectors (CS=0x33, SS/DS/ES=0x2B) and enables
125    /// interrupts (RFLAGS IF bit). The CR3 is set to the current page table
126    /// so the process initially shares the kernel's address space (with
127    /// user-accessible mappings added separately).
128    #[allow(dead_code)] // User-space process creation API
129    pub fn new_user(entry_point: usize, stack_pointer: usize) -> Self {
130        Self {
131            // Clear all general purpose registers
132            r15: 0,
133            r14: 0,
134            r13: 0,
135            r12: 0,
136            r11: 0,
137            r10: 0,
138            r9: 0,
139            r8: 0,
140            rdi: 0,
141            rsi: 0,
142            rbp: 0,
143            rbx: 0,
144            rdx: 0,
145            rcx: 0,
146            rax: 0,
147
148            // Set stack pointer
149            rsp: stack_pointer as u64,
150
151            // Set instruction pointer to user entry point
152            rip: entry_point as u64,
153
154            // RFLAGS with IF set (interrupts enabled in user mode)
155            rflags: 0x202,
156
157            // Ring 3 segment selectors
158            cs: 0x33, // User code segment (GDT offset 0x30 + RPL 3)
159            ss: 0x2B, // User data segment (GDT offset 0x28 + RPL 3)
160            ds: 0x2B,
161            es: 0x2B,
162            fs: 0x00,
163            gs: 0x00,
164
165            // Use current page table (caller must ensure user mappings exist)
166            // SAFETY: Reading CR3 is always valid in kernel mode. It returns
167            // the current page table base address. No side effects.
168            cr3: unsafe {
169                let mut cr3: u64;
170                asm!("mov {}, cr3", out(reg) cr3);
171                cr3
172            },
173
174            // No FPU state initially
175            fpu_state: core::ptr::null_mut(),
176            tls_base: 0,
177        }
178    }
179}
180
181impl crate::arch::context::ThreadContext for X86_64Context {
182    fn new() -> Self {
183        Self::default()
184    }
185
186    fn init(&mut self, entry_point: usize, stack_pointer: usize, _kernel_stack: usize) {
187        self.rip = entry_point as u64;
188        self.rsp = stack_pointer as u64;
189        // NOTE: TSS RSP0 is NOT updated here. The scheduler updates TSS RSP0
190        // during context switch (scheduler.rs). Setting it here is wrong for
191        // non-scheduled threads (e.g., inline-executed user programs from the
192        // shell) because the kernel stack virtual address may not be mapped
193        // in the process's page tables, causing a Double Fault when hardware
194        // interrupts fire in user mode.
195    }
196
197    fn get_instruction_pointer(&self) -> usize {
198        self.rip as usize
199    }
200
201    fn set_instruction_pointer(&mut self, ip: usize) {
202        self.rip = ip as u64;
203    }
204
205    fn get_stack_pointer(&self) -> usize {
206        self.rsp as usize
207    }
208
209    fn set_stack_pointer(&mut self, sp: usize) {
210        self.rsp = sp as u64;
211    }
212
213    fn set_tls_base(&mut self, base: u64) {
214        self.tls_base = base;
215    }
216
217    fn tls_base(&self) -> u64 {
218        self.tls_base
219    }
220
221    fn get_kernel_stack(&self) -> usize {
222        crate::arch::x86_64::gdt::get_kernel_stack() as usize
223    }
224
225    fn set_kernel_stack(&mut self, sp: usize) {
226        crate::arch::x86_64::gdt::set_kernel_stack(sp as u64);
227    }
228
229    fn set_return_value(&mut self, value: usize) {
230        self.rax = value as u64;
231    }
232
233    fn clone_from(&mut self, other: &Self) {
234        *self = other.clone();
235    }
236
237    fn to_task_context(&self) -> TaskContext {
238        TaskContext::X86_64(self.clone())
239    }
240}
241
242/// Switch from current context to new context
243///
244/// # Safety
245/// This function manipulates CPU state directly and must be called
246/// with interrupts disabled.
247#[no_mangle]
248pub unsafe extern "C" fn context_switch(current: *mut X86_64Context, next: *const X86_64Context) {
249    // Save current context
250    asm!(
251        // Save general purpose registers
252        "mov [rdi + 0x00], r15",
253        "mov [rdi + 0x08], r14",
254        "mov [rdi + 0x10], r13",
255        "mov [rdi + 0x18], r12",
256        "mov [rdi + 0x20], r11",
257        "mov [rdi + 0x28], r10",
258        "mov [rdi + 0x30], r9",
259        "mov [rdi + 0x38], r8",
260        "mov [rdi + 0x40], rdi",
261        "mov [rdi + 0x48], rsi",
262        "mov [rdi + 0x50], rbp",
263        "mov [rdi + 0x58], rbx",
264        "mov [rdi + 0x60], rdx",
265        "mov [rdi + 0x68], rcx",
266        "mov [rdi + 0x70], rax",
267
268        // Save stack pointer
269        "mov [rdi + 0x78], rsp",
270
271        // Save FS base (TLS) to tls_base field at offset 0xB0 (176)
272        "mov ecx, 0xC0000100",
273        "rdmsr",
274        "shl rdx, 32",
275        "or rax, rdx",
276        "mov [rdi + 0xB0], rax",
277
278        // Save return address as RIP
279        "mov rax, [rsp]",
280        "mov [rdi + 0x80], rax",
281
282        // Save RFLAGS
283        "pushfq",
284        "pop rax",
285        "mov [rdi + 0x88], rax",
286
287        in("rdi") current,
288        in("rsi") next,
289        lateout("rax") _,
290        lateout("rcx") _,
291        lateout("rdx") _,
292    );
293
294    // Load new context
295    asm!(
296        // Load new CR3 (offset 0xA0 = 160 = cr3 field) if different
297        "mov rax, [rsi + 0xA0]",
298        "mov rcx, cr3",
299        "cmp rax, rcx",
300        "je 2f",
301        "mov cr3, rax",
302        "2:",
303
304        // Load general purpose registers
305        "mov r15, [rsi + 0x00]",
306        "mov r14, [rsi + 0x08]",
307        "mov r13, [rsi + 0x10]",
308        "mov r12, [rsi + 0x18]",
309        "mov r11, [rsi + 0x20]",
310        "mov r10, [rsi + 0x28]",
311        "mov r9,  [rsi + 0x30]",
312        "mov r8,  [rsi + 0x38]",
313        "mov rdi, [rsi + 0x40]",
314        // Skip rsi for now
315        "mov rbp, [rsi + 0x50]",
316        "mov rbx, [rsi + 0x58]",
317        "mov rdx, [rsi + 0x60]",
318        "mov rcx, [rsi + 0x68]",
319        "mov rax, [rsi + 0x70]",
320
321        // Load RFLAGS
322        "push qword ptr [rsi + 0x88]",
323        "popfq",
324
325        // Load stack pointer
326        "mov rsp, [rsi + 0x78]",
327
328        // Push return address
329        "push qword ptr [rsi + 0x80]",
330
331        // Restore FS base (TLS) from tls_base field at offset 0xB0 (176)
332        "mov ecx, 0xC0000100",
333        "mov rax, [rsi + 0xB0]",
334        "mov rdx, rax",
335        "shr rdx, 32",
336        "wrmsr",
337
338        // Finally load rsi
339        "mov rsi, [rsi + 0x48]",
340
341        // Return to new context
342        "ret",
343
344        in("rsi") next,
345        lateout("rax") _,
346        lateout("rcx") _,
347        lateout("rdx") _,
348        lateout("r8") _,
349        lateout("r9") _,
350        lateout("r10") _,
351        lateout("r11") _,
352        lateout("r12") _,
353        lateout("r13") _,
354        lateout("r14") _,
355        lateout("r15") _,
356    );
357}
358
359/// Switch context using the ThreadContext interface.
360/// Called from `crate::arch::context::switch_context()`.
361pub fn switch_context(from: &mut X86_64Context, to: &X86_64Context) {
362    // SAFETY: Both `from` and `to` are valid references to X86_64Context
363    // structs. context_switch is an assembly routine that saves the current
364    // CPU state into `from` and restores state from `to`. Both contexts
365    // must have valid register values for safe execution.
366    unsafe {
367        context_switch(from as *mut _, to as *const _);
368    }
369}
370
371/// Save FPU state. Called from `crate::arch::context::save_fpu_state()`.
372pub fn save_fpu_state(state: &mut FpuState) {
373    // SAFETY: `state` is a valid mutable reference to a FpuState struct.
374    // The FXSAVE instruction stores the FPU/SSE state into the provided
375    // 512-byte aligned memory region. FpuState is repr(C, align(16)).
376    unsafe {
377        asm!("fxsave [{}]", in(reg) state as *mut FpuState);
378    }
379}
380
381/// Restore FPU state. Called from `crate::arch::context::restore_fpu_state()`.
382pub fn restore_fpu_state(state: &FpuState) {
383    // SAFETY: `state` is a valid reference to a FpuState struct containing
384    // previously saved FPU/SSE state. The FXRSTOR instruction restores the
385    // FPU/SSE state from the provided memory region.
386    unsafe {
387        asm!("fxrstor [{}]", in(reg) state as *const FpuState);
388    }
389}
390
391/// Initialize FPU for current CPU. Called from
392/// `crate::arch::context::init_fpu()`.
393pub fn init_fpu() {
394    // SAFETY: FPU initialization modifies CR0 and CR4 control registers to
395    // enable floating point and SSE support. This must only be called once
396    // during early kernel initialization. The register modifications are
397    // standard x86_64 FPU setup: clear EM, set MP and NE in CR0, set
398    // OSFXSR and OSXMMEXCPT in CR4.
399    unsafe {
400        // Enable FPU
401        asm!(
402            "mov rax, cr0",
403            "and ax, 0xFFFB",  // Clear EM bit
404            "or ax, 0x2",      // Set MP bit
405            "mov cr0, rax",
406
407            // Enable SSE
408            "mov rax, cr4",
409            "or ax, 0x600",    // Set OSFXSR and OSXMMEXCPT
410            "mov cr4, rax",
411
412            // Initialize FPU state
413            "fninit",
414            out("rax") _,
415        );
416    }
417}
418
419/// Check if CPU supports XSAVE
420#[allow(dead_code)] // CPU feature detection API -- used when extended state mgmt is enabled
421pub fn has_xsave() -> bool {
422    // SAFETY: CPUID with leaf 1 is always valid on x86_64. We check
423    // bit 26 of ECX (XSAVE feature flag). The push/pop of RBX is
424    // required because CPUID clobbers RBX and Rust may use it.
425    unsafe {
426        let result: u32;
427        asm!(
428            "push rbx",
429            "mov eax, 1",
430            "cpuid",
431            "mov {0:r}, rcx",
432            "pop rbx",
433            out(reg) result,
434            out("eax") _,
435            out("edx") _,
436            lateout("ecx") _,
437        );
438        (result & (1 << 26)) != 0
439    }
440}
441
442/// Enable XSAVE if supported
443#[allow(dead_code)] // CPU feature detection API -- used when extended state mgmt is enabled
444pub fn enable_xsave() {
445    if has_xsave() {
446        // SAFETY: XSAVE support was verified by has_xsave() above.
447        // Setting the OSXSAVE bit (bit 18) in CR4 enables the OS to
448        // use XSAVE/XRSTOR instructions for extended state management.
449        unsafe {
450            asm!(
451                "mov rax, cr4",
452                "or rax, 0x40000",  // Set OSXSAVE bit
453                "mov cr4, rax",
454                out("rax") _,
455            );
456        }
457    }
458}
459
460impl Default for X86_64Context {
461    fn default() -> Self {
462        Self {
463            r15: 0,
464            r14: 0,
465            r13: 0,
466            r12: 0,
467            r11: 0,
468            r10: 0,
469            r9: 0,
470            r8: 0,
471            rdi: 0,
472            rsi: 0,
473            rbp: 0,
474            rbx: 0,
475            rdx: 0,
476            rcx: 0,
477            rax: 0,
478            rsp: 0,
479            rip: 0,
480            rflags: 0x202,
481            cs: 0x08,
482            ss: 0x10,
483            ds: 0x10,
484            es: 0x10,
485            fs: 0x00,
486            gs: 0x00,
487            cr3: 0,
488            fpu_state: core::ptr::null_mut(),
489            tls_base: 0,
490        }
491    }
492}
493
494// SAFETY: X86_64Context can be safely sent between threads
495// The FPU state pointer is either null or points to thread-local data
496unsafe impl Send for X86_64Context {}
497unsafe impl Sync for X86_64Context {}
498
499/// Load context for first time (no previous context to save)
500///
501/// # Safety
502/// This function manipulates CPU state directly and must be called
503/// with interrupts disabled.
504#[no_mangle]
505pub unsafe extern "C" fn load_context(context: *const X86_64Context) {
506    // Load context directly using inline assembly
507    // For kernel-to-kernel context switch, we can use a simpler approach
508    asm!(
509        // rdi = context pointer
510
511        // Load CR3 (page table) first if not zero
512        "mov rax, [rdi + 160]", // cr3
513        "test rax, rax",
514        "jz 2f",
515        "mov cr3, rax",
516        "2:",
517
518        // Load segment registers
519        "mov ax, [rdi + 148]", // ds
520        "mov ds, ax",
521        "mov ax, [rdi + 150]", // es
522        "mov es, ax",
523
524        // Load stack pointer and push return address
525        "mov rsp, [rdi + 120]",
526        "push qword ptr [rdi + 128]", // Push RIP as return address
527
528        // Load RFLAGS
529        "push qword ptr [rdi + 136]",
530        "popfq",
531
532        // Load general purpose registers
533        "mov r15, [rdi]",
534        "mov r14, [rdi + 8]",
535        "mov r13, [rdi + 16]",
536        "mov r12, [rdi + 24]",
537        "mov r11, [rdi + 32]",
538        "mov r10, [rdi + 40]",
539        "mov r9,  [rdi + 48]",
540        "mov r8,  [rdi + 56]",
541        "mov rsi, [rdi + 72]",
542        "mov rbp, [rdi + 80]",
543        "mov rbx, [rdi + 88]",
544        "mov rdx, [rdi + 96]",
545        "mov rcx, [rdi + 104]",
546        "mov rax, [rdi + 112]",
547
548        // Load final register
549        "mov rdi, [rdi + 64]",
550
551        // Return to loaded context (RIP was pushed earlier)
552        "ret",
553        in("rdi") context,
554        options(noreturn)
555    );
556}