⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/userspace/
loader.rs

1//! User-space program loader
2//!
3//! This module handles loading user-space programs from the filesystem
4//! and creating processes to execute them.
5
6#![allow(clippy::slow_vector_initialization, clippy::explicit_auto_deref)]
7
8#[cfg(feature = "alloc")]
9extern crate alloc;
10
11#[cfg(feature = "alloc")]
12use alloc::{string::String, vec, vec::Vec};
13
14#[allow(unused_imports)]
15use crate::{
16    elf::ElfLoader,
17    error::KernelError,
18    fs::get_vfs,
19    println,
20    process::{lifecycle, ProcessId},
21};
22
23/// Load and execute the init process
24#[cfg(feature = "alloc")]
25pub fn load_init_process() -> Result<ProcessId, KernelError> {
26    // Try to load init from various locations
27    let init_paths = [
28        "/sbin/init",
29        "/bin/init",
30        "/usr/sbin/init",
31        "/usr/bin/init",
32        "/bin/sh", // Fallback to shell if no init found
33    ];
34
35    for path in &init_paths {
36        match load_user_program(path, &[], &[]) {
37            Ok(pid) => {
38                println!("[LOADER] init from {} (PID {})", path, pid.0);
39                return Ok(pid);
40            }
41            Err(_e) => {
42                // Silently try next path — only log on final failure
43            }
44        }
45    }
46
47    // If no init binary found, create a minimal init process
48    create_minimal_init()
49}
50
51/// Load a user program from the filesystem
52#[cfg(feature = "alloc")]
53pub fn load_user_program(
54    path: &str,
55    argv: &[&str],
56    envp: &[&str],
57) -> Result<ProcessId, KernelError> {
58    // Open the file
59    let file_node = get_vfs()
60        .read()
61        .open(path, crate::fs::file::OpenFlags::read_only())
62        .map_err(|_| KernelError::NotFound {
63            resource: "program file",
64            id: 0,
65        })?;
66
67    // Get file size
68    let metadata = file_node
69        .metadata()
70        .map_err(|_| KernelError::FsError(crate::error::FsError::IoError))?;
71    let file_size = metadata.size;
72
73    // Read the entire file into memory
74    let mut buffer = Vec::with_capacity(file_size);
75    buffer.resize(file_size, 0);
76
77    let bytes_read = file_node
78        .read(0, &mut buffer)
79        .map_err(|_| KernelError::FsError(crate::error::FsError::IoError))?;
80
81    if bytes_read != file_size {
82        return Err(KernelError::FsError(crate::error::FsError::IoError));
83    }
84
85    // Create an ELF loader and parse the binary
86    let loader = ElfLoader::new();
87    let binary = loader
88        .parse(&buffer)
89        .map_err(|_| KernelError::InvalidArgument {
90            name: "elf_binary",
91            value: "failed to parse ELF",
92        })?;
93
94    // Get the entry point
95    let entry_point = binary.entry_point as usize;
96
97    // Extract program name from path
98    let name: String = path.rsplit('/').next().unwrap_or("unknown").into();
99
100    // Convert arguments to owned strings
101    let argv_vec: Vec<String> = argv.iter().map(|s| String::from(*s)).collect();
102    let envp_vec: Vec<String> = envp.iter().map(|s| String::from(*s)).collect();
103
104    // Create process with ELF entry point
105    let options = lifecycle::ProcessCreateOptions {
106        name: name.clone(),
107        parent: None,
108        priority: crate::process::ProcessPriority::Normal,
109        entry_point,
110        argv: argv_vec,
111        envp: envp_vec,
112        user_stack_size: lifecycle::DEFAULT_USER_STACK_SIZE,
113        kernel_stack_size: lifecycle::DEFAULT_KERNEL_STACK_SIZE,
114    };
115
116    let pid = lifecycle::create_process_with_options(options)?;
117
118    #[cfg(target_arch = "x86_64")]
119    // SAFETY: raw_serial_str writes to the COM1 I/O port for diagnostic output.
120    unsafe {
121        crate::arch::x86_64::idt::raw_serial_str(b"[LOADER] pid created, opening fds\n");
122    }
123
124    // Open /dev/console for stdin(0), stdout(1), stderr(2).
125    // Try VFS first; fall back to a direct serial console node if /dev/console
126    // is not yet mounted (ensures fds 0/1/2 are always occupied so that
127    // pipe()/open() don't claim those slots).
128    if let Some(process) = crate::process::get_process(pid) {
129        use alloc::sync::Arc;
130
131        use crate::fs::file::{File, OpenFlags};
132
133        let console_node: Arc<dyn crate::fs::VfsNode> = {
134            let vfs = get_vfs().read();
135            match vfs.resolve_path("/dev/console") {
136                Ok(node) => node,
137                Err(_) => Arc::new(SerialConsoleNode),
138            }
139        };
140
141        let ft = process.file_table.lock();
142
143        // fd 0 = stdin (read-only)
144        let stdin_file = Arc::new(File::new_with_path(
145            console_node.clone(),
146            OpenFlags::read_only(),
147            String::from("/dev/console"),
148        ));
149        let _ = ft.open(stdin_file);
150
151        // fd 1 = stdout (write-only)
152        let stdout_file = Arc::new(File::new_with_path(
153            console_node.clone(),
154            OpenFlags::write_only(),
155            String::from("/dev/console"),
156        ));
157        let _ = ft.open(stdout_file);
158
159        // fd 2 = stderr (write-only)
160        let stderr_file = Arc::new(File::new_with_path(
161            console_node,
162            OpenFlags::write_only(),
163            String::from("/dev/console"),
164        ));
165        let _ = ft.open(stderr_file);
166    }
167
168    // Load the ELF segments into the process's address space.
169    //
170    // On RISC-V, the MMU is not enabled (satp = Bare mode), so ELF load
171    // addresses (e.g. 0x400000) map directly to physical addresses that
172    // are not valid RAM on the QEMU virt machine (RAM starts at
173    // 0x80000000). Writing to those addresses causes a store access fault
174    // and a CPU reset. Skip segment loading on RISC-V; the process PCB
175    // is still created for bookkeeping.
176    #[cfg(not(target_arch = "riscv64"))]
177    if let Some(process) = crate::process::get_process(pid) {
178        let mut memory_space = process.memory_space.lock();
179
180        #[cfg(target_arch = "x86_64")]
181        // SAFETY: raw_serial_str writes to the COM1 I/O port for diagnostic output.
182        unsafe {
183            crate::arch::x86_64::idt::raw_serial_str(b"[LOADER] loading ELF segments\n");
184        }
185
186        // Use the ELF loader to load the binary into the process's address space
187        let entry = ElfLoader::load(&buffer, &mut *memory_space)?;
188
189        // Verify the entry point matches
190        if entry != binary.entry_point {
191            return Err(KernelError::InvalidState {
192                expected: "matching entry point",
193                actual: "entry point mismatch after loading",
194            });
195        }
196
197        // Handle dynamic linking if needed
198        if binary.dynamic {
199            if let Some(interpreter) = &binary.interpreter {
200                match load_dynamic_linker(process, interpreter, &binary) {
201                    Ok(interp_entry) => {
202                        // Update process entry point to the interpreter.
203                        // The dynamic linker will initialize GOT/PLT then jump
204                        // to the main binary's entry point.
205                        if let Some(main_tid) = process.get_main_thread_id() {
206                            if let Some(thread) = process.get_thread(main_tid) {
207                                use crate::arch::context::ThreadContext;
208                                let mut ctx = thread.context.lock();
209                                ctx.set_instruction_pointer(interp_entry as usize);
210                            }
211                        }
212                    }
213                    Err(_e) => {
214                        // No dynamic linker available in rootfs -- warn and
215                        // fall through to the main entry point. The binary
216                        // will likely GP fault due to uninitialized GOT/PLT.
217                        println!(
218                            "[LOADER] WARNING: dynamic binary requires interpreter '{}' but it \
219                             could not be loaded; proceeding with main entry (expect GP fault)",
220                            interpreter
221                        );
222                    }
223                }
224            } else {
225                println!(
226                    "[LOADER] WARNING: binary is dynamically linked but has no interpreter set"
227                );
228            }
229        }
230    }
231
232    #[cfg(target_arch = "x86_64")]
233    // SAFETY: raw_serial_str writes to the COM1 I/O port for diagnostic output.
234    unsafe {
235        crate::arch::x86_64::idt::raw_serial_str(b"[LOADER] load complete, returning pid\n");
236    }
237
238    Ok(pid)
239}
240
241/// Load the dynamic linker/interpreter for dynamically linked binaries
242#[cfg(all(feature = "alloc", not(target_arch = "riscv64")))]
243fn load_dynamic_linker(
244    process: &crate::process::Process,
245    interpreter_path: &str,
246    _main_binary: &crate::elf::ElfBinary,
247) -> Result<u64, KernelError> {
248    use crate::mm::PageFlags;
249
250    // Read the interpreter from filesystem
251    let file_node = get_vfs()
252        .read()
253        .open(interpreter_path, crate::fs::file::OpenFlags::read_only())
254        .map_err(|_| KernelError::NotFound {
255            resource: "interpreter",
256            id: 0,
257        })?;
258
259    let metadata = file_node
260        .metadata()
261        .map_err(|_| KernelError::FsError(crate::error::FsError::IoError))?;
262    let file_size = metadata.size;
263
264    let mut buffer = Vec::with_capacity(file_size);
265    buffer.resize(file_size, 0);
266
267    file_node
268        .read(0, &mut buffer)
269        .map_err(|_| KernelError::FsError(crate::error::FsError::IoError))?;
270
271    // Parse the interpreter ELF
272    let loader = ElfLoader::new();
273    let interp_binary = loader
274        .parse(&buffer)
275        .map_err(|_| KernelError::InvalidArgument {
276            name: "interpreter_elf",
277            value: "failed to parse interpreter ELF",
278        })?;
279
280    // Load interpreter at a high address to avoid collision with main binary
281    // Standard Linux ld.so loads at 0x7f00_0000_0000 region
282    let interp_base = 0x7F00_0000_0000_u64;
283
284    let mut memory_space = process.memory_space.lock();
285
286    // Map and load each segment of the interpreter
287    for segment in &interp_binary.segments {
288        if segment.segment_type != crate::elf::SegmentType::Load {
289            continue;
290        }
291
292        // Calculate adjusted virtual address
293        let adjusted_vaddr = interp_base + segment.virtual_addr;
294        let page_start = adjusted_vaddr & !0xFFF;
295        let page_end = (adjusted_vaddr + segment.memory_size + 0xFFF) & !0xFFF;
296        let num_pages = ((page_end - page_start) / 0x1000) as usize;
297
298        // Determine page flags
299        let mut flags = PageFlags::USER | PageFlags::PRESENT;
300        if (segment.flags & 0x2) != 0 {
301            // PF_W
302            flags |= PageFlags::WRITABLE;
303        }
304        if (segment.flags & 0x1) == 0 {
305            // PF_X not set
306            flags |= PageFlags::NO_EXECUTE;
307        }
308
309        // Map pages for this segment
310        for i in 0..num_pages {
311            let addr = page_start + (i as u64 * 0x1000);
312            memory_space.map_page(addr as usize, flags)?;
313        }
314
315        // Copy segment data
316        if segment.file_size > 0 {
317            // SAFETY: 'dest' points to freshly mapped pages at adjusted_vaddr
318            // (mapped in the loop above). 'src' is buffer.as_ptr() offset by
319            // file_offset, which is within the ELF buffer (validated by the
320            // segment parser). copy_nonoverlapping is valid because the mapped
321            // virtual pages and the ELF buffer do not overlap.
322            unsafe {
323                let dest = adjusted_vaddr as *mut u8;
324                let src = buffer.as_ptr().add(segment.file_offset as usize);
325                core::ptr::copy_nonoverlapping(src, dest, segment.file_size as usize);
326            }
327        }
328
329        // Zero BSS
330        if segment.memory_size > segment.file_size {
331            // SAFETY: bss_start is within the mapped page range (pages were
332            // mapped for the full memory_size above). bss_size is the
333            // difference between memory_size and file_size, so write_bytes
334            // stays within the mapped region. Zeroing BSS is required by
335            // the ELF specification.
336            unsafe {
337                let bss_start = (adjusted_vaddr + segment.file_size) as *mut u8;
338                let bss_size = (segment.memory_size - segment.file_size) as usize;
339                core::ptr::write_bytes(bss_start, 0, bss_size);
340            }
341        }
342    }
343
344    // Calculate interpreter entry point (adjusted for base address)
345    let interp_entry = interp_base + interp_binary.entry_point;
346
347    // Set up auxiliary vector (auxv) for the interpreter
348    // This provides information about the main program to the dynamic linker
349    setup_auxiliary_vector(process, _main_binary, interp_base)?;
350
351    Ok(interp_entry)
352}
353
354/// Set up the auxiliary vector for dynamic linking
355#[cfg(all(feature = "alloc", not(target_arch = "riscv64")))]
356fn setup_auxiliary_vector(
357    _process: &crate::process::Process,
358    main_binary: &crate::elf::ElfBinary,
359    interp_base: u64,
360) -> Result<(), KernelError> {
361    // Auxiliary vector types (from Linux elf.h)
362    const AT_NULL: u64 = 0; // End of vector
363    const AT_PHDR: u64 = 3; // Program headers for program
364    const AT_PHENT: u64 = 4; // Size of program header entry
365    const AT_PHNUM: u64 = 5; // Number of program headers
366    const AT_PAGESZ: u64 = 6; // System page size
367    const AT_BASE: u64 = 7; // Base address of interpreter
368    const AT_ENTRY: u64 = 9; // Entry point of program
369    const AT_UID: u64 = 11; // Real user ID
370    const AT_EUID: u64 = 12; // Effective user ID
371    const AT_GID: u64 = 13; // Real group ID
372    const AT_EGID: u64 = 14; // Effective group ID
373
374    // Build auxiliary vector entries
375    let _auxv: Vec<(u64, u64)> = vec![
376        (AT_PAGESZ, 0x1000),                           // Page size
377        (AT_BASE, interp_base),                        // Interpreter base
378        (AT_ENTRY, main_binary.entry_point),           // Main program entry
379        (AT_PHNUM, main_binary.segments.len() as u64), // Number of program headers
380        (AT_PHENT, 56),                                // Size of program header (Elf64_Phdr)
381        (AT_PHDR, main_binary.load_base),              // Program headers address
382        (AT_UID, 0),                                   // Root user
383        (AT_EUID, 0),
384        (AT_GID, 0),
385        (AT_EGID, 0),
386        (AT_NULL, 0), // End of auxv
387    ];
388
389    // The auxiliary vector would typically be pushed onto the stack
390    // after the environment pointers. For now, we just prepare the data.
391    // The actual stack setup happens in the setup_args function.
392
393    Ok(())
394}
395
396/// Create a minimal init process when no init binary is available
397#[cfg(feature = "alloc")]
398fn create_minimal_init() -> Result<ProcessId, KernelError> {
399    // Entry point for minimal init
400    let entry_point = 0x200000; // User-space address
401
402    let options = lifecycle::ProcessCreateOptions {
403        name: String::from("init"),
404        parent: None,
405        priority: crate::process::ProcessPriority::System,
406        entry_point,
407        argv: vec![String::from("init")],
408        envp: Vec::new(),
409        user_stack_size: 64 * 1024, // Smaller stack for minimal init
410        kernel_stack_size: 16 * 1024,
411    };
412
413    let pid = lifecycle::create_process_with_options(options)?;
414
415    // Set up minimal code at the entry point
416    // Note: For x86_64 with bootloader 0.11+, we need to use the physical memory
417    // mapping For now, skip the code writing and just report success - the
418    // kernel initialization is demonstrated and user-space execution will need
419    // proper memory mapping.
420    #[cfg(target_arch = "x86_64")]
421    {
422        // x86_64 with bootloader 0.11: Cannot directly access user-space
423        // addresses. Actual user-space execution uses the ELF loader +
424        // iretq path.
425    }
426
427    #[cfg(target_arch = "aarch64")]
428    if let Some(process) = crate::process::get_process(pid) {
429        let mut memory_space = process.memory_space.lock();
430        let page_flags = crate::mm::PageFlags::PRESENT | crate::mm::PageFlags::USER;
431        memory_space.map_page(entry_point, page_flags)?;
432
433        // AArch64: b . (14000000)
434        // SAFETY: entry_point was just mapped with USER | PRESENT flags above.
435        // Writing the AArch64 "b ." (branch-to-self) instruction at the
436        // entry point creates a minimal init process that spins in place.
437        unsafe {
438            let code_ptr = entry_point as *mut u32;
439            *code_ptr = 0x14000000;
440        }
441    }
442
443    #[cfg(target_arch = "riscv64")]
444    {
445        // RISC-V: Cannot directly write to user-space virtual addresses during
446        // early boot because map_page() only records the mapping in a BTreeMap
447        // without creating hardware page table entries. With SATP in Bare mode,
448        // address 0x200000 maps to physical 0x200000, which is not RAM on the
449        // QEMU virt machine (RAM starts at 0x80000000). Writing there causes a
450        // store access fault and, with no trap handler (stvec) configured, the
451        // CPU reboots via OpenSBI.
452        //
453        // The init process PCB is created for bookkeeping. Actual user-space
454        // code loading will require proper page table activation in a future
455        // phase.
456    }
457
458    Ok(pid)
459}
460
461/// Load the shell program
462#[cfg(feature = "alloc")]
463pub fn load_shell() -> Result<ProcessId, KernelError> {
464    // Try to load a shell
465    let shell_paths = [
466        "/bin/vsh",  // VeridianOS shell
467        "/bin/sh",   // Standard shell
468        "/bin/bash", // Bash
469        "/bin/ash",  // Ash (busybox)
470    ];
471
472    for path in &shell_paths {
473        match load_user_program(
474            path,
475            &[path],
476            &["PATH=/bin:/usr/bin", "HOME=/", "TERM=veridian"],
477        ) {
478            Ok(pid) => {
479                return Ok(pid);
480            }
481            Err(_) => {
482                // Silently try next path — only log on final failure
483            }
484        }
485    }
486
487    // If no shell found, create a minimal shell
488    create_minimal_shell()
489}
490
491/// Create a minimal shell when no shell binary is available
492#[cfg(feature = "alloc")]
493fn create_minimal_shell() -> Result<ProcessId, KernelError> {
494    // Similar to minimal init, but configured as a shell
495    let entry_point = 0x300000;
496
497    let options = lifecycle::ProcessCreateOptions {
498        name: String::from("vsh"),
499        parent: Some(ProcessId(1)), // Child of init
500        priority: crate::process::ProcessPriority::Normal,
501        entry_point,
502        argv: vec![String::from("vsh")],
503        envp: vec![String::from("PATH=/bin"), String::from("HOME=/")],
504        user_stack_size: lifecycle::DEFAULT_USER_STACK_SIZE,
505        kernel_stack_size: lifecycle::DEFAULT_KERNEL_STACK_SIZE,
506    };
507
508    lifecycle::create_process_with_options(options)
509}
510
511/// Lightweight serial console VFS node used as a fallback when /dev/console
512/// is not available. Provides serial UART I/O for stdin/stdout/stderr so
513/// that fds 0/1/2 are always occupied in the process file table.
514#[cfg(feature = "alloc")]
515struct SerialConsoleNode;
516
517#[cfg(feature = "alloc")]
518impl crate::fs::VfsNode for SerialConsoleNode {
519    fn node_type(&self) -> crate::fs::NodeType {
520        crate::fs::NodeType::CharDevice
521    }
522
523    fn read(&self, _offset: usize, buffer: &mut [u8]) -> Result<usize, KernelError> {
524        // Blocking read from serial (same logic as sys_read stdin fallback)
525        #[cfg(target_arch = "x86_64")]
526        {
527            let mut count = 0;
528            for slot in buffer.iter_mut() {
529                loop {
530                    let status: u8;
531                    // SAFETY: Reading COM1 line status register (0x3FD) via x86 port I/O.
532                    unsafe {
533                        core::arch::asm!("in al, dx", out("al") status, in("dx") 0x3FDu16);
534                    }
535                    if (status & 1) != 0 {
536                        let byte: u8;
537                        // SAFETY: Reading COM1 data register (0x3F8) after confirming data ready.
538                        unsafe {
539                            core::arch::asm!("in al, dx", out("al") byte, in("dx") 0x3F8u16);
540                        }
541                        *slot = byte;
542                        count += 1;
543                        break;
544                    }
545                    core::hint::spin_loop();
546                }
547            }
548            Ok(count)
549        }
550        #[cfg(not(target_arch = "x86_64"))]
551        {
552            // Non-x86: no serial polling available, return EOF
553            let _ = buffer;
554            Ok(0)
555        }
556    }
557
558    fn write(&self, _offset: usize, data: &[u8]) -> Result<usize, KernelError> {
559        for &byte in data {
560            crate::print!("{}", byte as char);
561        }
562        Ok(data.len())
563    }
564
565    fn metadata(&self) -> Result<crate::fs::Metadata, KernelError> {
566        Ok(crate::fs::Metadata {
567            node_type: crate::fs::NodeType::CharDevice,
568            size: 0,
569            permissions: crate::fs::Permissions::from_mode(0o666),
570            uid: 0,
571            gid: 0,
572            created: 0,
573            modified: 0,
574            accessed: 0,
575            inode: 0,
576        })
577    }
578
579    fn readdir(&self) -> Result<Vec<crate::fs::DirEntry>, KernelError> {
580        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
581    }
582
583    fn lookup(&self, _name: &str) -> Result<alloc::sync::Arc<dyn crate::fs::VfsNode>, KernelError> {
584        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
585    }
586
587    fn create(
588        &self,
589        _name: &str,
590        _permissions: crate::fs::Permissions,
591    ) -> Result<alloc::sync::Arc<dyn crate::fs::VfsNode>, KernelError> {
592        Err(KernelError::OperationNotSupported {
593            operation: "create on serial console",
594        })
595    }
596
597    fn mkdir(
598        &self,
599        _name: &str,
600        _permissions: crate::fs::Permissions,
601    ) -> Result<alloc::sync::Arc<dyn crate::fs::VfsNode>, KernelError> {
602        Err(KernelError::OperationNotSupported {
603            operation: "mkdir on serial console",
604        })
605    }
606
607    fn unlink(&self, _name: &str) -> Result<(), KernelError> {
608        Err(KernelError::OperationNotSupported {
609            operation: "unlink on serial console",
610        })
611    }
612
613    fn truncate(&self, _size: usize) -> Result<(), KernelError> {
614        Err(KernelError::OperationNotSupported {
615            operation: "truncate on serial console",
616        })
617    }
618}