⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/process/
creation.rs

1//! Process creation and setup
2//!
3//! Handles creating new processes from scratch and replacing process images
4//! via the exec system call. Includes argument/environment stack setup for
5//! newly executed programs.
6
7#[cfg(feature = "alloc")]
8extern crate alloc;
9
10#[cfg(feature = "alloc")]
11use alloc::{format, string::String, vec::Vec};
12
13use super::{
14    lifecycle::create_scheduler_task,
15    pcb::{Process, ProcessBuilder, ProcessState},
16    table,
17    thread::ThreadBuilder,
18    ProcessId, ProcessPriority,
19};
20#[allow(unused_imports)]
21use crate::{arch::context::ThreadContext, error::KernelError};
22
23/// Default stack sizes
24pub const DEFAULT_USER_STACK_SIZE: usize = 256 * 1024; // 256KB initial (grows via page faults)
25pub const DEFAULT_KERNEL_STACK_SIZE: usize = 64 * 1024; // 64KB
26
27/// Process creation options
28#[cfg(feature = "alloc")]
29pub struct ProcessCreateOptions {
30    pub name: String,
31    pub parent: Option<ProcessId>,
32    pub priority: ProcessPriority,
33    pub entry_point: usize,
34    pub argv: Vec<String>,
35    pub envp: Vec<String>,
36    pub user_stack_size: usize,
37    pub kernel_stack_size: usize,
38}
39
40#[cfg(feature = "alloc")]
41impl Default for ProcessCreateOptions {
42    fn default() -> Self {
43        Self {
44            name: String::from("unnamed"),
45            parent: None,
46            priority: ProcessPriority::Normal,
47            entry_point: 0,
48            argv: Vec::new(),
49            envp: Vec::new(),
50            user_stack_size: DEFAULT_USER_STACK_SIZE,
51            kernel_stack_size: DEFAULT_KERNEL_STACK_SIZE,
52        }
53    }
54}
55
56/// Create a new process
57#[cfg(feature = "alloc")]
58pub fn create_process(name: String, entry_point: usize) -> Result<ProcessId, KernelError> {
59    let options = ProcessCreateOptions {
60        name,
61        entry_point,
62        ..Default::default()
63    };
64
65    create_process_with_options(options)
66}
67
68/// Create a new process with options
69#[cfg(feature = "alloc")]
70pub fn create_process_with_options(
71    options: ProcessCreateOptions,
72) -> Result<ProcessId, KernelError> {
73    // Enforce process count limit before allocating resources.
74    let current_count = table::PROCESS_TABLE.count();
75    if current_count >= super::MAX_PROCESSES {
76        return Err(KernelError::ResourceExhausted {
77            resource: "process table",
78        });
79    }
80
81    // Create the process
82    let process = ProcessBuilder::new(options.name.clone())
83        .parent(options.parent.unwrap_or(ProcessId(0)))
84        .priority(options.priority)
85        .build();
86
87    let pid = process.pid;
88
89    // Set up the process's address space
90    {
91        let mut memory_space = process.memory_space.lock();
92        // init() already maps kernel space, so we don't need to call map_kernel_space()
93        // again
94        memory_space.init()?;
95    }
96
97    // Create the main thread
98    let main_thread =
99        ThreadBuilder::new(pid, format!("{}-main", options.name), options.entry_point)
100            .user_stack_size(options.user_stack_size)
101            .kernel_stack_size(options.kernel_stack_size)
102            .build()?;
103
104    let tid = main_thread.tid;
105
106    // Map the user stack pages into the process's VAS page tables.
107    // ThreadBuilder::build() allocates physical frames for the user stack
108    // but does not map them. We call vas.map_page() for each page, which
109    // allocates new physical frames and creates the PTE entries.
110    {
111        let user_base = main_thread.user_stack.base;
112        let user_size = main_thread.user_stack.size;
113        let num_pages = user_size / 4096;
114
115        let mut memory_space = process.memory_space.lock();
116
117        let stack_flags = crate::mm::PageFlags::PRESENT
118            | crate::mm::PageFlags::USER
119            | crate::mm::PageFlags::WRITABLE
120            | crate::mm::PageFlags::NO_EXECUTE;
121        for i in 0..num_pages {
122            let vaddr = user_base + i * 4096;
123            memory_space.map_page(vaddr, stack_flags)?;
124        }
125
126        // Update VAS stack_top to match the main thread's actual allocated stack
127        // This ensures setup_exec_stack() uses the correct stack range
128        memory_space.set_stack_top(user_base + user_size);
129        memory_space.set_stack_size(user_size);
130    }
131
132    // Add thread to process
133    process.add_thread(main_thread)?;
134
135    // Setup user stack with arguments and environment
136    // Convert String vectors to &str slices for setup_exec_stack
137    let argv_refs: Vec<&str> = options.argv.iter().map(|s| s.as_str()).collect();
138    let envp_refs: Vec<&str> = options.envp.iter().map(|s| s.as_str()).collect();
139
140    // Get the process before adding to table so we can set up the stack
141    let stack_top = setup_exec_stack(&process, &argv_refs, &envp_refs, None)?;
142
143    // Update the thread context with the adjusted stack pointer
144    if let Some(thread) = process.get_thread(tid) {
145        let mut ctx = thread.context.lock();
146        ctx.set_stack_pointer(stack_top);
147    }
148
149    // Add process to process table
150    table::add_process(process)?;
151
152    // Mark process as ready
153    if let Some(process) = table::get_process(pid) {
154        process.set_state(ProcessState::Ready);
155
156        // Add main thread to scheduler
157        if let Some(thread) = process.get_thread(tid) {
158            create_scheduler_task(process, thread)?;
159        }
160    }
161
162    // Memory hardening: stack canary + guard page for new process.
163    // Only on x86_64 which has a proper LockedHeap allocator and trap
164    // handler. AArch64 hangs on spin::Mutex in the RNG, and RISC-V has
165    // no stvec trap handler so any fault during RNG init causes a reboot.
166    #[cfg(target_arch = "x86_64")]
167    {
168        use crate::security::memory_protection::{GuardPage, StackCanary};
169
170        // Create stack canary for the main thread
171        let _canary = StackCanary::new();
172
173        // Set up guard page below kernel stack to detect overflow
174        let _guard = GuardPage::new(
175            options.kernel_stack_size, // guard at bottom of stack region
176            4096,                      // one 4KB guard page
177        );
178    }
179
180    // Audit log: process creation
181    crate::security::audit::log_process_create(pid.0, 0, 0);
182
183    Ok(pid)
184}
185
186/// Parse a shebang (#!) line from the beginning of a file
187///
188/// If the data starts with `#!`, extracts the interpreter path and optional
189/// argument from the first line (up to 256 bytes or first newline).
190///
191/// # Examples
192/// - `#!/bin/sh\n`        -> Some(("/bin/sh", None))
193/// - `#!/bin/sh -e\n`     -> Some(("/bin/sh", Some("-e")))
194/// - `#!/usr/bin/env python3\n` -> Some(("/usr/bin/env", Some("python3")))
195/// - `\x7fELF...`         -> None (not a shebang)
196#[cfg(feature = "alloc")]
197pub fn parse_shebang(data: &[u8]) -> Option<(String, Option<String>)> {
198    // Must start with #!
199    if data.len() < 2 || data[0] != b'#' || data[1] != b'!' {
200        return None;
201    }
202
203    // Find end of first line, capped at 256 bytes
204    let max_len = data.len().min(256);
205    let line_end = data[2..max_len]
206        .iter()
207        .position(|&b| b == b'\n')
208        .map(|pos| pos + 2)
209        .unwrap_or(max_len);
210
211    // Extract the shebang line content (after #!)
212    let line = core::str::from_utf8(&data[2..line_end]).ok()?;
213    let line = line.trim();
214
215    if line.is_empty() {
216        return None;
217    }
218
219    // Split into interpreter and optional argument
220    // Only split on the first whitespace -- the rest is a single argument
221    if let Some(space_pos) = line.find([' ', '\t']) {
222        let interpreter = line[..space_pos].trim();
223        let arg = line[space_pos + 1..].trim();
224        if interpreter.is_empty() {
225            return None;
226        }
227        let opt_arg = if arg.is_empty() {
228            None
229        } else {
230            Some(String::from(arg))
231        };
232        Some((String::from(interpreter), opt_arg))
233    } else {
234        Some((String::from(line), None))
235    }
236}
237
238/// Search for an executable by name in PATH directories
239///
240/// If `name` contains a `/`, it is treated as an explicit path and returned
241/// as-is (if it exists in the VFS). Otherwise, the function first checks the
242/// current process's `env_vars` for a `PATH` entry (colon-separated list of
243/// directories). If no `PATH` environment variable is set, it falls back to
244/// the default search directories: `/bin`, `/usr/bin`, `/usr/local/bin`.
245#[cfg(feature = "alloc")]
246pub fn search_path(name: &str) -> Option<String> {
247    use crate::fs;
248
249    // If name already contains a slash, treat it as a path
250    if name.contains('/') {
251        if fs::file_exists(name) {
252            return Some(String::from(name));
253        }
254        return None;
255    }
256
257    // Try to read PATH from the current process's environment variables.
258    let path_env: Option<String> = super::current_process().and_then(|proc| {
259        let env = proc.env_vars.lock();
260        env.get("PATH").cloned()
261    });
262
263    if let Some(ref path_val) = path_env {
264        // Search each colon-separated directory in PATH.
265        for dir in path_val.split(':') {
266            if dir.is_empty() {
267                continue;
268            }
269            let full_path = format!("{}/{}", dir, name);
270            if fs::file_exists(&full_path) {
271                return Some(full_path);
272            }
273        }
274    } else {
275        // Fallback: standard search directories when no PATH env is set.
276        const DEFAULT_SEARCH_DIRS: &[&str] = &["/bin", "/usr/bin", "/usr/local/bin"];
277
278        for dir in DEFAULT_SEARCH_DIRS {
279            let full_path = format!("{}/{}", dir, name);
280            if fs::file_exists(&full_path) {
281                return Some(full_path);
282            }
283        }
284    }
285
286    None
287}
288
289/// Execute a new program in current process
290///
291/// Replaces the current process image with a new program.
292/// This function does not return on success - the new program begins execution.
293///
294/// Supports shebang (`#!`) scripts: if the file starts with `#!`, the
295/// interpreter specified on the shebang line is executed instead, with the
296/// script path prepended to the argument list. Also supports PATH search --
297/// if the path does not start with `/`, standard directories are searched.
298#[cfg(feature = "alloc")]
299pub fn exec_process(path: &str, argv: &[&str], envp: &[&str]) -> Result<(), KernelError> {
300    use crate::{elf::ElfLoader, fs};
301
302    let process = super::current_process().ok_or(KernelError::ProcessNotFound { pid: 0 })?;
303    let current_thread = super::current_thread().ok_or(KernelError::ThreadNotFound { tid: 0 })?;
304
305    // Resolve path via PATH search if it doesn't start with '/'
306    let resolved_path = if !path.starts_with('/') {
307        search_path(path).ok_or(KernelError::FsError(crate::error::FsError::NotFound))?
308    } else {
309        String::from(path)
310    };
311
312    // Step 1: Load new program from filesystem
313    let file_data = fs::read_file(&resolved_path)
314        .map_err(|_| KernelError::FsError(crate::error::FsError::NotFound))?;
315
316    // Step 1b: Check for shebang (#!) and delegate to interpreter if found
317    if let Some((interpreter, opt_arg)) = parse_shebang(&file_data) {
318        // Build new argv: [interpreter, opt_arg?, script_path, original_argv[1..]]
319        let mut new_argv: Vec<&str> = Vec::new();
320        let interp_ref: &str = &interpreter;
321        new_argv.push(interp_ref);
322
323        // Borrow opt_arg for the lifetime of this block
324        let opt_arg_string;
325        if let Some(ref arg) = opt_arg {
326            opt_arg_string = arg.clone();
327            new_argv.push(&opt_arg_string);
328        }
329
330        let resolved_ref: &str = &resolved_path;
331        new_argv.push(resolved_ref);
332
333        // Append original argv[1..] (skip argv[0] which was the script name)
334        if argv.len() > 1 {
335            new_argv.extend_from_slice(&argv[1..]);
336        }
337
338        // Recursively exec the interpreter
339        return exec_process(&interpreter, &new_argv, envp);
340    }
341
342    // Step 2: Clear current address space and load new program
343    let entry_point = {
344        let mut memory_space = process.memory_space.lock();
345
346        // Clear existing mappings before loading new program
347        memory_space.clear();
348
349        // Reinitialize the address space for the new program
350        memory_space.init()?;
351
352        // Re-map the main thread's user stack into the fresh VAS. `clear()`
353        // removed all user mappings; without this, the new image would return
354        // to an unmapped stack (the /bin/sh crash).
355        if let Some(main_tid) = process.get_main_thread_id() {
356            if let Some(main_thread) = process.get_thread(main_tid) {
357                let user_base = main_thread.user_stack.base;
358                let user_size = main_thread.user_stack.size;
359                let flags = crate::mm::PageFlags::PRESENT
360                    | crate::mm::PageFlags::USER
361                    | crate::mm::PageFlags::WRITABLE
362                    | crate::mm::PageFlags::NO_EXECUTE;
363                let pages = user_size / 4096;
364                for i in 0..pages {
365                    let vaddr = user_base + i * 4096;
366                    memory_space.map_page(vaddr, flags)?;
367                }
368                memory_space.set_stack_top(user_base + user_size);
369                memory_space.set_stack_size(user_size);
370            }
371        }
372
373        // Load ELF segments into address space and get entry point
374        ElfLoader::load(&file_data, &mut memory_space)?
375    };
376
377    // Step 2b: Check for dynamic linking
378    let (final_entry, aux_vector) = {
379        let loader = ElfLoader::new();
380        let elf_binary = loader
381            .parse(&file_data)
382            .map_err(|_| KernelError::InvalidArgument {
383                name: "elf",
384                value: "failed to parse ELF for dynamic linking check",
385            })?;
386
387        if elf_binary.dynamic && elf_binary.interpreter.is_some() {
388            // Dynamically linked -- load interpreter and build aux vector
389            let dyn_info = crate::elf::dynamic::prepare_dynamic_linking(
390                &file_data,
391                &elf_binary,
392                elf_binary.load_base,
393            )?
394            .ok_or(KernelError::InvalidArgument {
395                name: "dynamic",
396                value: "binary has interpreter but prepare_dynamic_linking returned None",
397            })?;
398
399            // Load interpreter LOAD segments into the process address space.
400            // The interpreter is a separate ELF loaded at its own base address
401            // (distinct from the main binary) to avoid overlap.
402            let interp_data = fs::read_file(&dyn_info.interp_path)
403                .map_err(|_| KernelError::FsError(crate::error::FsError::NotFound))?;
404            {
405                let mut memory_space = process.memory_space.lock();
406                let _interp_entry = ElfLoader::load(&interp_data, &mut memory_space)?;
407            }
408
409            // Entry point is the interpreter, not the main binary
410            (dyn_info.interp_entry, Some(dyn_info.aux_vector))
411        } else {
412            // Statically linked -- use binary entry directly, no aux vector
413            (entry_point, None)
414        }
415    };
416
417    // Step 2c: Set up TLS (Thread-Local Storage) if the ELF has a PT_TLS segment.
418    //
419    // x86_64 uses TLS variant II: %fs points to the Thread Control Block (TCB)
420    // at the END of the TLS block. TLS variables are at negative offsets from %fs.
421    // Layout: [tls_data | tls_bss | TCB_self_pointer]
422    //                                ^--- %fs base points here
423    //
424    // We allocate the TLS block via mmap in the process's VAS, copy the TLS
425    // template from the already-mapped PT_TLS segment, write a self-pointer
426    // at the TCB, and store the FS_BASE for the syscall/enter_usermode path.
427    #[cfg(target_arch = "x86_64")]
428    {
429        let loader = ElfLoader::new();
430        let elf_binary = loader.parse(&file_data).ok();
431        if let Some(ref binary) = elf_binary {
432            if let Some(tls_seg) = binary
433                .segments
434                .iter()
435                .find(|s| s.segment_type == crate::elf::types::SegmentType::Tls)
436            {
437                let tls_memsz = tls_seg.memory_size as usize;
438                let tls_filesz = tls_seg.file_size as usize;
439                // The TLS block needs: tls_memsz (data+bss) + 8 (TCB self-pointer),
440                // aligned up to 16 bytes.
441                let tcb_size = 8usize; // self-pointer
442                let tls_block_size = ((tls_memsz + tcb_size) + 15) & !15;
443
444                // Allocate user-space memory for TLS via mmap
445                let memory_space = process.memory_space.lock();
446                let tls_alloc =
447                    memory_space.mmap(tls_block_size, crate::mm::vas::MappingType::Data);
448                if let Ok(tls_base_vaddr) = tls_alloc {
449                    let tls_base = tls_base_vaddr.as_usize();
450
451                    // The TCB (and %fs) points to: tls_base + tls_memsz
452                    let tcb_addr = tls_base + tls_memsz;
453
454                    // Copy TLS init data from the already-loaded PT_TLS segment.
455                    // The template lives at tls_vaddr in the process's VAS (already
456                    // mapped by the LOAD segment that contains the TLS section).
457                    // We read from the ELF file data and write to the new TLS block.
458                    if tls_filesz > 0 {
459                        let tls_file_offset = tls_seg.file_offset as usize;
460                        if tls_file_offset + tls_filesz <= file_data.len() {
461                            let tls_init =
462                                &file_data[tls_file_offset..tls_file_offset + tls_filesz];
463                            let _ = crate::elf::write_to_user_pages(
464                                &memory_space,
465                                tls_base as u64,
466                                tls_init,
467                            );
468                        }
469                    }
470                    // BSS portion (tls_memsz - tls_filesz) is already zero from mmap
471
472                    // Write TCB self-pointer: *(u64*)tcb_addr = tcb_addr
473                    // This is needed because %fs:0 must return the TCB address itself.
474                    let self_ptr_bytes = (tcb_addr as u64).to_le_bytes();
475                    let _ = crate::elf::write_to_user_pages(
476                        &memory_space,
477                        tcb_addr as u64,
478                        &self_ptr_bytes,
479                    );
480
481                    drop(memory_space);
482
483                    // Store FS_BASE in the process for later use by enter_usermode
484                    process
485                        .tls_fs_base
486                        .store(tcb_addr as u64, core::sync::atomic::Ordering::Release);
487                }
488            }
489        }
490    }
491
492    // Step 3: Setup new stack with arguments, environment, and aux vector
493    let stack_top = setup_exec_stack(process, argv, envp, aux_vector.as_deref())?;
494
495    // Step 3b: Populate the process's env_vars BTreeMap from envp.
496    // This makes environment variables available to kernel-side lookups
497    // (e.g. PATH resolution in search_path()) without reading user memory.
498    {
499        let mut env_map = process.env_vars.lock();
500        env_map.clear();
501        for &env_str in envp {
502            if let Some(eq_pos) = env_str.find('=') {
503                let key = String::from(&env_str[..eq_pos]);
504                let value = String::from(&env_str[eq_pos + 1..]);
505                env_map.insert(key, value);
506            }
507        }
508    }
509
510    // Step 4: Reset thread context to new entry point
511    {
512        let mut ctx = current_thread.context.lock();
513
514        // Set new instruction pointer to program entry (interpreter entry
515        // for dynamically linked binaries, binary entry for static)
516        ctx.set_instruction_pointer(final_entry as usize);
517
518        // Set stack pointer to new stack top
519        ctx.set_stack_pointer(stack_top);
520
521        // Clear return value (argc is passed differently)
522        ctx.set_return_value(0);
523    }
524
525    // Step 4b: Sync scheduler Task context with the updated thread context.
526    // The scheduler has its own TaskContext (set at task creation) which must
527    // match the thread's new entry point/stack, otherwise the scheduler will
528    // resume at the old (pre-exec) address.
529    {
530        let sched = crate::sched::scheduler::current_scheduler().lock();
531        if let Some(task_ptr) = sched.current() {
532            // SAFETY: We are the currently running task and hold the scheduler
533            // lock, so no other CPU will modify this Task concurrently.
534            let task = unsafe { &mut *task_ptr.as_ptr() };
535            task.context = crate::sched::task::TaskContext::new(final_entry as usize, stack_top);
536        }
537    }
538
539    // Step 5: Close file descriptors marked close-on-exec
540    {
541        let file_table = process.file_table.lock();
542
543        file_table.close_on_exec();
544    }
545
546    // Step 6: Reset signal handlers to defaults
547    process.reset_signal_handlers();
548
549    // The actual execution resumes when we return to user mode
550    // The modified thread context will cause execution at the new entry point
551    Ok(())
552}
553
554#[cfg(not(feature = "alloc"))]
555pub fn exec_process(_path: &str, _argv: &[&str], _envp: &[&str]) -> Result<(), KernelError> {
556    Err(KernelError::NotImplemented {
557        feature: "exec (requires alloc)",
558    })
559}
560
561/// Write a value to a user-space stack address via the physical memory window.
562///
563/// The process's page tables map `vaddr` to a physical frame. We look up the
564/// mapping and write through the identity-mapped physical address.
565///
566/// # Safety
567///
568/// `vaddr` must be a valid mapped address in the process's VAS with write
569/// permissions. The caller must ensure no concurrent access to this memory.
570#[cfg(feature = "alloc")]
571unsafe fn write_to_user_stack(
572    memory_space: &crate::mm::VirtualAddressSpace,
573    vaddr: usize,
574    value: usize,
575) {
576    // Delegate to write_bytes_to_user_stack which handles page-crossing writes.
577    // While pointer writes are typically 16-byte aligned (and thus page-safe),
578    // this ensures correctness regardless of alignment.
579    let bytes = value.to_ne_bytes();
580    // SAFETY: caller guarantees vaddr is valid and mapped with write access.
581    unsafe {
582        write_bytes_to_user_stack(memory_space, vaddr, &bytes);
583    }
584}
585
586/// Write a byte slice to a user-space stack address via the physical memory
587/// window.  Handles writes that cross page boundaries by translating each
588/// page separately and copying only the bytes within that page.
589///
590/// # Safety
591///
592/// `vaddr` through `vaddr+data.len()-1` must be valid mapped addresses in the
593/// process's VAS with write permissions.
594#[cfg(feature = "alloc")]
595unsafe fn write_bytes_to_user_stack(
596    memory_space: &crate::mm::VirtualAddressSpace,
597    vaddr: usize,
598    data: &[u8],
599) {
600    use crate::mm::VirtualAddress;
601
602    let pt_root = memory_space.get_page_table();
603    if pt_root == 0 {
604        return;
605    }
606
607    let mapper = unsafe { super::super::mm::vas::create_mapper_from_root_pub(pt_root) };
608
609    // Write in page-sized chunks to handle data that crosses page boundaries.
610    // Each virtual page may map to a non-contiguous physical frame, so we must
611    // translate each page separately and copy only the bytes within that page.
612    let mut offset = 0usize;
613    while offset < data.len() {
614        let cur_vaddr = vaddr + offset;
615        let page_offset = cur_vaddr & 0xFFF;
616        let bytes_in_page = core::cmp::min(0x1000 - page_offset, data.len() - offset);
617
618        if let Ok((frame, _flags)) = mapper.translate_page(VirtualAddress(cur_vaddr as u64)) {
619            let phys_addr = (frame.as_u64() << 12) + page_offset as u64;
620            // SAFETY: phys_addr is converted to a kernel-accessible virtual
621            // address via phys_to_virt_addr. We copy exactly bytes_in_page
622            // bytes, which does not exceed the page boundary.
623            unsafe {
624                let virt = crate::mm::phys_to_virt_addr(phys_addr);
625                core::ptr::copy_nonoverlapping(
626                    data.as_ptr().add(offset),
627                    virt as *mut u8,
628                    bytes_in_page,
629                );
630            }
631        }
632
633        offset += bytes_in_page;
634    }
635}
636
637/// Setup stack for exec with arguments, environment, and optional auxiliary
638/// vector.
639///
640/// Writes the full argc/argv/envp/auxv layout to the user stack via the
641/// physical memory window. The layout (growing downward from stack_top) is:
642///
643/// ```text
644/// [high addresses]
645///   envp strings (null-terminated)
646///   argv strings (null-terminated)
647///   padding (16-byte alignment)
648///   AT_NULL (0, 0)           <- auxv terminator (if present)
649///   auxv[N-1] (type, value)
650///   ...
651///   auxv[0] (type, value)
652///   NULL                     <- envp[N]
653///   envp[N-1] pointer
654///   ...
655///   envp[0] pointer
656///   NULL                     <- argv[argc]
657///   argv[argc-1] pointer
658///   ...
659///   argv[0] pointer
660///   argc (usize)             <- SP (returned)
661/// [low addresses]
662/// ```
663#[cfg(feature = "alloc")]
664fn setup_exec_stack(
665    process: &Process,
666    argv: &[&str],
667    envp: &[&str],
668    aux_vector: Option<&[crate::elf::dynamic::AuxVecEntry]>,
669) -> Result<usize, KernelError> {
670    let memory_space = process.memory_space.lock();
671
672    // Get stack region
673    let stack_base = memory_space.user_stack_base();
674    let stack_size = memory_space.user_stack_size();
675    let stack_top = stack_base + stack_size;
676
677    // ---- Phase 1: Write strings from the top of the stack downward ----
678    let mut string_sp = stack_top;
679
680    // Write envp strings and record their user-space addresses
681    let mut envp_addrs: Vec<usize> = Vec::with_capacity(envp.len());
682    for &env in envp.iter().rev() {
683        let bytes = env.as_bytes();
684        string_sp -= bytes.len() + 1; // +1 for null terminator
685                                      // SAFETY: string_sp is within the stack mapping. We write the string
686                                      // bytes followed by a null terminator.
687        unsafe {
688            write_bytes_to_user_stack(&memory_space, string_sp, bytes);
689            write_bytes_to_user_stack(&memory_space, string_sp + bytes.len(), &[0]);
690        }
691        envp_addrs.push(string_sp);
692    }
693    envp_addrs.reverse();
694
695    // Write argv strings and record their user-space addresses
696    let mut argv_addrs: Vec<usize> = Vec::with_capacity(argv.len());
697    for &arg in argv.iter().rev() {
698        let bytes = arg.as_bytes();
699        string_sp -= bytes.len() + 1;
700        // SAFETY: string_sp is within the stack mapping.
701        unsafe {
702            write_bytes_to_user_stack(&memory_space, string_sp, bytes);
703            write_bytes_to_user_stack(&memory_space, string_sp + bytes.len(), &[0]);
704        }
705        argv_addrs.push(string_sp);
706    }
707    argv_addrs.reverse();
708
709    // ---- Phase 2: Align and write pointer arrays ----
710    // Align to 16 bytes
711    let mut sp = string_sp & !0xF;
712
713    // Ensure space for: argc + argv ptrs + NULL + envp ptrs + NULL + auxv entries
714    // Each auxv entry is 2 usizes (type, value)
715    let auxv_slots = aux_vector.map(|v| v.len() * 2).unwrap_or(0);
716    let ptrs_needed = 1 + argv.len() + 1 + envp.len() + 1 + auxv_slots;
717    sp -= ptrs_needed * core::mem::size_of::<usize>();
718    // Re-align to 16 bytes (ABI requirement)
719    sp &= !0xF;
720
721    // DIAGNOSTIC: Check if sp is still within stack bounds
722    if sp < stack_base {
723        crate::kprintln!(
724            "[STACK_SETUP] OVERFLOW! sp={:#x} < stack_base={:#x}, need {} bytes",
725            sp,
726            stack_base,
727            stack_top - sp
728        );
729        return Err(KernelError::OutOfMemory {
730            requested: stack_top - sp,
731            available: stack_size,
732        });
733    }
734
735    let mut write_pos = sp;
736
737    // Write argc
738    // SAFETY: write_pos is within the stack region.
739    unsafe {
740        write_to_user_stack(&memory_space, write_pos, argv.len());
741    }
742    write_pos += core::mem::size_of::<usize>();
743
744    // Write argv pointers
745    for &addr in &argv_addrs {
746        // SAFETY: write_pos is within the stack region.
747        unsafe {
748            write_to_user_stack(&memory_space, write_pos, addr);
749        }
750        write_pos += core::mem::size_of::<usize>();
751    }
752    // NULL terminator for argv
753    // SAFETY: write_pos is within the stack region.
754    unsafe {
755        write_to_user_stack(&memory_space, write_pos, 0);
756    }
757    write_pos += core::mem::size_of::<usize>();
758
759    // Write envp pointers
760    for &addr in &envp_addrs {
761        // SAFETY: write_pos is within the stack region.
762        unsafe {
763            write_to_user_stack(&memory_space, write_pos, addr);
764        }
765        write_pos += core::mem::size_of::<usize>();
766    }
767    // NULL terminator for envp
768    // SAFETY: write_pos is within the stack region.
769    unsafe {
770        write_to_user_stack(&memory_space, write_pos, 0);
771    }
772    write_pos += core::mem::size_of::<usize>();
773
774    // Write auxiliary vector (if present, for dynamically linked binaries)
775    if let Some(auxv) = aux_vector {
776        for entry in auxv {
777            // Each aux entry is two usize values: type, value
778            // SAFETY: write_pos is within the stack region, reserved in
779            // ptrs_needed calculation above.
780            unsafe {
781                write_to_user_stack(&memory_space, write_pos, entry.type_id as usize);
782            }
783            write_pos += core::mem::size_of::<usize>();
784            // SAFETY: write_pos is within the stack region.
785            unsafe {
786                write_to_user_stack(&memory_space, write_pos, entry.value as usize);
787            }
788            write_pos += core::mem::size_of::<usize>();
789        }
790    }
791
792    Ok(sp)
793}