⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/process/
fork.rs

1//! Process forking (copy-on-write)
2//!
3//! Implements the fork system call which creates a child process as a copy
4//! of the current process. Provides both full-copy (`fork_process`) and
5//! copy-on-write (`cow_fork`) variants. COW fork marks all user-space
6//! pages as read-only in both parent and child, deferring physical copies
7//! to the page fault handler when either process writes.
8
9#[cfg(feature = "alloc")]
10extern crate alloc;
11
12#[cfg(feature = "alloc")]
13use alloc::format;
14#[cfg(feature = "alloc")]
15use alloc::vec::Vec;
16
17use super::{
18    lifecycle::create_scheduler_task,
19    pcb::{ProcessBuilder, ProcessState},
20    table,
21    thread::ThreadBuilder,
22    ProcessId,
23};
24#[allow(unused_imports)]
25use crate::{arch::context::ThreadContext, error::KernelError, println};
26
27/// Fork current process
28#[cfg(feature = "alloc")]
29pub fn fork_process() -> Result<ProcessId, KernelError> {
30    // Enforce process count limit (includes zombies awaiting reap).
31    // This prevents unbounded process table growth during workloads
32    // like BusyBox native compilation (213+ sequential fork+exec+wait).
33    let current_count = table::PROCESS_TABLE.count();
34    if current_count >= super::MAX_PROCESSES {
35        println!(
36            "[PROCESS] fork: process limit reached ({}/{})",
37            current_count,
38            super::MAX_PROCESSES
39        );
40        return Err(KernelError::ResourceExhausted {
41            resource: "process table",
42        });
43    }
44
45    let current_process =
46        super::current_process().ok_or(KernelError::ProcessNotFound { pid: 0 })?;
47
48    let current_thread = super::current_thread().ok_or(KernelError::ThreadNotFound { tid: 0 })?;
49
50    // Create new process as copy of current
51    let new_process = ProcessBuilder::new(format!("{}-fork", current_process.name))
52        .parent(current_process.pid)
53        .priority(*current_process.priority.lock())
54        .build();
55
56    let new_pid = new_process.pid;
57
58    // Clone address space with COW (copy-on-write) optimization.
59    // Pages are shared read-only between parent and child; physical copies
60    // are deferred to the page fault handler when either process writes.
61    {
62        let current_space = current_process.memory_space.lock();
63        let mut new_space = new_process.memory_space.lock();
64
65        // Clone page tables and mapping metadata
66        new_space.clone_from(&current_space)?;
67
68        // Mark user-space pages as COW (shared, read-only)
69        let user_pages = collect_user_pages(&current_space);
70        if !user_pages.is_empty() {
71            for &(_vaddr, frame) in &user_pages {
72                crate::mm::demand_paging::with_manager_mut(|mgr| {
73                    mgr.add_cow_entry(frame.as_u64() as usize, frame);
74                    if let Some(entry) = mgr.cow_table.entries.get(&(frame.as_u64() as usize)) {
75                        entry
76                            .ref_count
77                            .store(2, core::sync::atomic::Ordering::Release);
78                    }
79                });
80            }
81        }
82    }
83
84    // Clone capabilities
85    {
86        let current_caps = current_process.capability_space.lock();
87        let new_caps = new_process.capability_space.lock();
88
89        // Clone capability space so child has same capabilities as parent
90        new_caps.clone_from(&current_caps)?;
91    }
92
93    // Clone file table so child inherits stdin/stdout/stderr and pipes
94    {
95        let parent_ft = current_process.file_table.lock();
96        let child_ft = parent_ft.clone_for_fork();
97        *new_process.file_table.lock() = child_ft;
98    }
99
100    // Inherit environment variables from parent
101    #[cfg(feature = "alloc")]
102    {
103        let parent_env = current_process.env_vars.lock();
104        let mut child_env = new_process.env_vars.lock();
105        for (key, value) in parent_env.iter() {
106            child_env.insert(key.clone(), value.clone());
107        }
108    }
109
110    // Inherit container membership from parent so forked children
111    // stay inside the same container namespace.
112    {
113        let cid = current_process
114            .container_id
115            .load(core::sync::atomic::Ordering::Acquire);
116        new_process
117            .container_id
118            .store(cid, core::sync::atomic::Ordering::Release);
119        if cid != 0 {
120            // Register child in the container's PID namespace
121            #[cfg(target_arch = "x86_64")]
122            {
123                let mut mgr = crate::virt::container::CONTAINER_MGR.lock();
124                if let Some(ref mut mgr) = *mgr {
125                    if let Some(container) = mgr.get_mut(cid) {
126                        container.namespaces.pid.add_process(new_pid);
127                    }
128                }
129            }
130        }
131    }
132
133    // Inherit uid, gid, pgid, sid from parent
134    // (ProcessBuilder doesn't copy these, so do it manually)
135    // uid/gid are non-atomic, but the new_process is not yet visible
136    // to other threads, so this is safe.
137    // SAFETY: new_process is not yet added to the process table, so no
138    // other thread can access it concurrently.
139    {
140        // pgid and sid are inherited from parent per POSIX
141        let parent_pgid = current_process
142            .pgid
143            .load(core::sync::atomic::Ordering::Acquire);
144        let parent_sid = current_process
145            .sid
146            .load(core::sync::atomic::Ordering::Acquire);
147        new_process
148            .pgid
149            .store(parent_pgid, core::sync::atomic::Ordering::Release);
150        new_process
151            .sid
152            .store(parent_sid, core::sync::atomic::Ordering::Release);
153    }
154
155    // Create thread in new process matching current thread
156    let new_thread = {
157        let ctx = current_thread.context.lock();
158        let thread = ThreadBuilder::new(
159            new_pid,
160            current_thread.name.clone(),
161            ctx.get_instruction_pointer(),
162        )
163        .user_stack_size(current_thread.user_stack.size)
164        .kernel_stack_size(current_thread.kernel_stack.size)
165        .priority(current_thread.priority)
166        .cpu_affinity(current_thread.get_affinity())
167        .build()?;
168
169        // Copy thread context for child process.
170        //
171        // On x86_64, we capture the LIVE register state from the syscall
172        // frame on the kernel stack (saved by syscall_entry assembly). This
173        // gives the child the parent's actual CPU registers at the moment of
174        // fork(), so the child resumes at the instruction after fork() with
175        // RAX=0 (fork return value), not from main().
176        //
177        // On other architectures (or if no syscall frame is available), we
178        // fall back to cloning the parent's ThreadContext from exec/load time.
179        {
180            let mut new_ctx = thread.context.lock();
181
182            #[cfg(target_arch = "x86_64")]
183            {
184                use crate::arch::x86_64::syscall::{get_saved_user_rsp, get_syscall_frame};
185
186                if let Some(frame) = get_syscall_frame() {
187                    // Populate child context from live parent registers.
188                    // Start with a clone for fields not in the frame (cr3, segments, etc.)
189                    *new_ctx = (*ctx).clone();
190
191                    // User RIP: RCX was clobbered by SYSCALL to hold the return address
192                    new_ctx.set_instruction_pointer(frame.rcx as usize);
193
194                    // User RSP: saved to per-CPU data by syscall_entry
195                    new_ctx.set_stack_pointer(get_saved_user_rsp() as usize);
196
197                    // Return value: fork returns 0 in child
198                    new_ctx.set_return_value(0);
199
200                    // Copy all general-purpose registers from the live frame.
201                    // The X86_64Context fields are accessed directly since we
202                    // know the concrete type on x86_64.
203                    new_ctx.rbx = frame.rbx;
204                    new_ctx.rbp = frame.rbp;
205                    new_ctx.r12 = frame.r12;
206                    new_ctx.r13 = frame.r13;
207                    new_ctx.r14 = frame.r14;
208                    new_ctx.r15 = frame.r15;
209                    new_ctx.rdi = frame.rdi;
210                    new_ctx.rsi = frame.rsi;
211                    new_ctx.rdx = frame.rdx;
212                    new_ctx.r8 = frame.r8;
213                    new_ctx.r9 = frame.r9;
214                    new_ctx.r10 = frame.r10;
215
216                    // User RFLAGS: R11 was clobbered by SYSCALL to hold RFLAGS
217                    new_ctx.r11 = frame.r11;
218                    new_ctx.rflags = frame.r11;
219
220                    // RCX holds user RIP (already set via set_instruction_pointer)
221                    new_ctx.rcx = frame.rcx;
222                } else {
223                    // No syscall frame (called outside syscall context).
224                    // Fall back to cloning parent's stored context.
225                    *new_ctx = (*ctx).clone();
226                    new_ctx.set_return_value(0);
227                }
228            }
229
230            #[cfg(not(target_arch = "x86_64"))]
231            {
232                *new_ctx = (*ctx).clone();
233                new_ctx.set_return_value(0);
234            }
235        } // Drop lock here
236
237        thread
238    };
239
240    let new_tid = new_thread.tid;
241    new_process.add_thread(new_thread)?;
242
243    // Add to parent's children list
244    #[cfg(feature = "alloc")]
245    {
246        current_process.children.lock().push(new_pid);
247    }
248
249    // Add process to table
250    table::add_process(new_process)?;
251
252    // Mark as ready and add to scheduler
253    if let Some(process) = table::get_process(new_pid) {
254        process.set_state(ProcessState::Ready);
255
256        if let Some(thread) = process.get_thread(new_tid) {
257            create_scheduler_task(process, thread)?;
258        }
259    }
260
261    // Return child PID to parent
262    Ok(new_pid)
263}
264
265/// Fork with copy-on-write semantics.
266///
267/// Instead of physically copying all user pages, this function:
268/// 1. Clones the address space metadata (full copy of page tables).
269/// 2. Registers shared frames in the COW table with ref_count = 2.
270///
271/// When either process writes to a COW page, the page fault handler
272/// allocates a private copy and decrements the COW ref count.
273#[cfg(feature = "alloc")]
274pub fn cow_fork() -> Result<ProcessId, KernelError> {
275    let current_count = table::PROCESS_TABLE.count();
276    if current_count >= super::MAX_PROCESSES {
277        println!(
278            "[PROCESS] cow_fork: process limit reached ({}/{})",
279            current_count,
280            super::MAX_PROCESSES
281        );
282        return Err(KernelError::ResourceExhausted {
283            resource: "process table",
284        });
285    }
286
287    let current_process =
288        super::current_process().ok_or(KernelError::ProcessNotFound { pid: 0 })?;
289    let current_thread = super::current_thread().ok_or(KernelError::ThreadNotFound { tid: 0 })?;
290
291    let new_process = ProcessBuilder::new(format!("{}-cow", current_process.name))
292        .parent(current_process.pid)
293        .priority(*current_process.priority.lock())
294        .build();
295
296    let new_pid = new_process.pid;
297
298    // COW address space setup
299    {
300        let current_space = current_process.memory_space.lock();
301        let mut new_space = new_process.memory_space.lock();
302
303        // Clone the address space (copies page tables and mapping metadata)
304        new_space.clone_from(&current_space)?;
305
306        // Collect user-space pages for COW marking
307        let user_pages = collect_user_pages(&current_space);
308
309        if !user_pages.is_empty() {
310            println!(
311                "[PROCESS] cow_fork: marking {} user pages as COW",
312                user_pages.len()
313            );
314
315            // Register shared frames in the demand paging COW table
316            for &(_vaddr, frame) in &user_pages {
317                crate::mm::demand_paging::with_manager_mut(|mgr| {
318                    mgr.add_cow_entry(frame.as_u64() as usize, frame);
319                    // Set ref_count to 2 (parent + child)
320                    if let Some(entry) = mgr.cow_table.entries.get(&(frame.as_u64() as usize)) {
321                        entry
322                            .ref_count
323                            .store(2, core::sync::atomic::Ordering::Release);
324                    }
325                });
326            }
327        }
328    }
329
330    // Clone capabilities
331    {
332        let current_caps = current_process.capability_space.lock();
333        let new_caps = new_process.capability_space.lock();
334        new_caps.clone_from(&current_caps)?;
335    }
336
337    // Clone file table
338    {
339        let parent_ft = current_process.file_table.lock();
340        let child_ft = parent_ft.clone_for_fork();
341        *new_process.file_table.lock() = child_ft;
342    }
343
344    // Inherit environment variables
345    #[cfg(feature = "alloc")]
346    {
347        let parent_env = current_process.env_vars.lock();
348        let mut child_env = new_process.env_vars.lock();
349        for (key, value) in parent_env.iter() {
350            child_env.insert(key.clone(), value.clone());
351        }
352    }
353
354    // Inherit pgid, sid
355    {
356        let parent_pgid = current_process
357            .pgid
358            .load(core::sync::atomic::Ordering::Acquire);
359        let parent_sid = current_process
360            .sid
361            .load(core::sync::atomic::Ordering::Acquire);
362        new_process
363            .pgid
364            .store(parent_pgid, core::sync::atomic::Ordering::Release);
365        new_process
366            .sid
367            .store(parent_sid, core::sync::atomic::Ordering::Release);
368    }
369
370    // Create thread in new process matching current thread
371    let new_thread = {
372        let ctx = current_thread.context.lock();
373        let thread = ThreadBuilder::new(
374            new_pid,
375            current_thread.name.clone(),
376            ctx.get_instruction_pointer(),
377        )
378        .user_stack_size(current_thread.user_stack.size)
379        .kernel_stack_size(current_thread.kernel_stack.size)
380        .priority(current_thread.priority)
381        .cpu_affinity(current_thread.get_affinity())
382        .build()?;
383
384        {
385            let mut new_ctx = thread.context.lock();
386
387            #[cfg(target_arch = "x86_64")]
388            {
389                use crate::arch::x86_64::syscall::{get_saved_user_rsp, get_syscall_frame};
390
391                if let Some(frame) = get_syscall_frame() {
392                    *new_ctx = (*ctx).clone();
393                    new_ctx.set_instruction_pointer(frame.rcx as usize);
394                    new_ctx.set_stack_pointer(get_saved_user_rsp() as usize);
395                    new_ctx.set_return_value(0);
396                    new_ctx.rbx = frame.rbx;
397                    new_ctx.rbp = frame.rbp;
398                    new_ctx.r12 = frame.r12;
399                    new_ctx.r13 = frame.r13;
400                    new_ctx.r14 = frame.r14;
401                    new_ctx.r15 = frame.r15;
402                    new_ctx.rdi = frame.rdi;
403                    new_ctx.rsi = frame.rsi;
404                    new_ctx.rdx = frame.rdx;
405                    new_ctx.r8 = frame.r8;
406                    new_ctx.r9 = frame.r9;
407                    new_ctx.r10 = frame.r10;
408                    new_ctx.r11 = frame.r11;
409                    new_ctx.rflags = frame.r11;
410                    new_ctx.rcx = frame.rcx;
411                } else {
412                    *new_ctx = (*ctx).clone();
413                    new_ctx.set_return_value(0);
414                }
415            }
416
417            #[cfg(not(target_arch = "x86_64"))]
418            {
419                *new_ctx = (*ctx).clone();
420                new_ctx.set_return_value(0);
421            }
422        }
423
424        thread
425    };
426
427    let new_tid = new_thread.tid;
428    new_process.add_thread(new_thread)?;
429
430    #[cfg(feature = "alloc")]
431    {
432        current_process.children.lock().push(new_pid);
433    }
434
435    table::add_process(new_process)?;
436
437    if let Some(process) = table::get_process(new_pid) {
438        process.set_state(ProcessState::Ready);
439        if let Some(thread) = process.get_thread(new_tid) {
440            create_scheduler_task(process, thread)?;
441        }
442    }
443
444    println!("[PROCESS] cow_fork: created child PID {}", new_pid);
445    Ok(new_pid)
446}
447
448/// Collect user-space page mappings from a VAS for COW marking.
449///
450/// Returns a vector of (virtual_address, frame_number) pairs for all
451/// user-space pages (addresses below 0x0000_8000_0000_0000).
452#[cfg(feature = "alloc")]
453fn collect_user_pages(
454    vas: &crate::mm::vas::VirtualAddressSpace,
455) -> Vec<(usize, crate::mm::FrameNumber)> {
456    let mut pages = Vec::new();
457    let mappings = vas.mappings_ref().lock();
458
459    for (vaddr, mapping) in mappings.iter() {
460        // Only collect user-space pages (below kernel split)
461        if vaddr.as_u64() >= 0x0000_8000_0000_0000 {
462            continue;
463        }
464
465        let base = vaddr.as_u64() as usize;
466        for (i, &frame) in mapping.physical_frames.iter().enumerate() {
467            let page_vaddr = base + i * 4096;
468            if frame.as_u64() != 0 {
469                pages.push((page_vaddr, frame));
470            }
471        }
472    }
473
474    pages
475}