⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/process/
thread.rs

1//! Thread management implementation
2//!
3//! Threads are the unit of execution within a process. Each thread has its own
4//! stack and CPU context but shares memory and other resources with its
5//! process.
6
7use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
8
9#[cfg(feature = "alloc")]
10extern crate alloc;
11
12#[cfg(feature = "alloc")]
13use alloc::{string::String, sync::Arc};
14use core::ptr::NonNull;
15
16use spin::Mutex;
17
18use super::ProcessId;
19use crate::{
20    arch::context::{ArchThreadContext, ThreadContext},
21    error::KernelError,
22    mm::{FRAME_ALLOCATOR, FRAME_SIZE},
23    sched::task::Task,
24};
25
26/// Per-thread filesystem state for CLONE_FS support.
27///
28/// Each thread has a reference-counted `ThreadFs` that holds the thread's
29/// current working directory (cwd) and file creation mask (umask). When
30/// `CLONE_FS` is set during `clone()`, the parent and child share the same
31/// `Arc<ThreadFs>`, so changes to cwd or umask in one thread are visible
32/// to the other. When `CLONE_FS` is not set, the child receives an
33/// independent copy (via [`clone_copy`](Self::clone_copy)).
34///
35/// This mirrors the Linux kernel's `struct fs_struct` semantics.
36#[cfg(feature = "alloc")]
37#[derive(Debug)]
38pub struct ThreadFs {
39    /// Current working directory path. Protected by a spinlock because
40    /// it can be read from syscall paths (getcwd) and modified from
41    /// others (chdir) concurrently.
42    pub cwd: Mutex<alloc::string::String>,
43    /// File creation mask (umask). Atomic because it can be read/written
44    /// from concurrent syscall paths without holding a lock.
45    pub umask: AtomicU32,
46}
47
48#[cfg(feature = "alloc")]
49impl ThreadFs {
50    /// Create a new root filesystem state with cwd="/" and umask=0o022.
51    ///
52    /// Used for the initial thread of a new process.
53    pub fn new_root() -> Arc<Self> {
54        Arc::new(Self {
55            cwd: Mutex::new(alloc::string::String::from("/")),
56            umask: AtomicU32::new(0o022),
57        })
58    }
59
60    /// Share the filesystem state (CLONE_FS semantics).
61    ///
62    /// Returns a clone of the `Arc`, so parent and child reference the
63    /// same underlying `ThreadFs`. Changes to cwd or umask in either
64    /// thread are visible to the other.
65    pub fn clone_shared(src: &Arc<Self>) -> Arc<Self> {
66        src.clone()
67    }
68
69    /// Copy the filesystem state (non-CLONE_FS semantics).
70    ///
71    /// Creates a new independent `ThreadFs` with the same cwd and umask
72    /// values. Subsequent changes in either thread are isolated.
73    pub fn clone_copy(src: &Arc<Self>) -> Arc<Self> {
74        Arc::new(Self {
75            cwd: Mutex::new(src.cwd.lock().clone()),
76            umask: AtomicU32::new(src.umask.load(Ordering::Acquire)),
77        })
78    }
79}
80
81/// Default kernel stack size: 64KB (16 pages)
82pub const DEFAULT_KERNEL_STACK_PAGES: usize = 16;
83
84/// Default user stack size: 64KB (16 pages) for kernel-created threads
85/// User processes use the value from ProcessCreateOptions instead.
86pub const DEFAULT_USER_STACK_PAGES: usize = 16;
87
88/// Default TLS region size: 4KB (1 page)
89pub const DEFAULT_TLS_PAGES: usize = 1;
90
91/// Guard page count (1 page below each stack to detect overflow)
92pub const GUARD_PAGE_COUNT: usize = 1;
93
94/// Base virtual address for kernel thread stacks.
95/// Each thread gets its own region at KERNEL_STACK_REGION_BASE - (thread_index
96/// * region_size).
97const KERNEL_STACK_REGION_BASE: usize = 0xFFFF_E000_0000_0000;
98
99/// Base virtual address for user thread stacks.
100/// Grows downward from near the top of user address space.
101const USER_STACK_REGION_BASE: usize = 0x0000_7FFE_0000_0000;
102
103/// Base virtual address for TLS regions.
104#[allow(dead_code)] // TLS mapping base address -- used when TLS is wired
105const TLS_REGION_BASE: usize = 0x0000_7000_0000_0000;
106
107/// Allocate physical frames for a stack region and return the base physical
108/// address.
109///
110/// Allocates `page_count` contiguous physical frames from the frame allocator.
111/// The frames are zero-filled by the allocator. Returns the frame number of the
112/// first allocated frame.
113fn allocate_stack_frames(page_count: usize) -> Result<crate::mm::FrameNumber, KernelError> {
114    let allocator = FRAME_ALLOCATOR.lock();
115    allocator
116        .allocate_frames(page_count, None)
117        .map_err(|_| KernelError::OutOfMemory {
118            requested: page_count * FRAME_SIZE,
119            available: 0,
120        })
121}
122
123/// Free previously allocated stack frames.
124#[allow(dead_code)] // Stack cleanup helper -- used in thread teardown path
125fn free_stack_frames(frame: crate::mm::FrameNumber, page_count: usize) {
126    let allocator = FRAME_ALLOCATOR.lock();
127    let _ = allocator.free_frames(frame, page_count);
128}
129
130/// Safe wrapper for task pointer that implements Send + Sync
131///
132/// Safety: Task pointers are only accessed from within the scheduler
133/// which has its own synchronization mechanisms.
134#[derive(Debug)]
135pub struct TaskPtr(Option<NonNull<Task>>);
136
137unsafe impl Send for TaskPtr {}
138unsafe impl Sync for TaskPtr {}
139
140/// Thread ID type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142pub struct ThreadId(pub u64);
143
144impl core::fmt::Display for ThreadId {
145    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
146        write!(f, "{}", self.0)
147    }
148}
149
150/// Thread state
151#[repr(u8)]
152#[derive(Debug, Clone, Copy, PartialEq, Eq)]
153pub enum ThreadState {
154    /// Thread is being created
155    Creating = 0,
156    /// Thread is ready to run
157    Ready = 1,
158    /// Thread is currently running
159    Running = 2,
160    /// Thread is blocked waiting
161    Blocked = 3,
162    /// Thread is sleeping
163    Sleeping = 4,
164    /// Thread has exited but not yet cleaned up (zombie)
165    Zombie = 5,
166    /// Thread is completely dead and can be cleaned up
167    Dead = 6,
168}
169
170/// Thread Local Storage (TLS) data
171pub struct ThreadLocalStorage {
172    /// TLS base address
173    pub base: usize,
174    /// TLS size in bytes
175    pub size: usize,
176    /// TLS data pointer (architecture-specific)
177    pub data_ptr: usize,
178    /// TLS key-value data storage
179    #[cfg(feature = "alloc")]
180    pub data: alloc::collections::BTreeMap<u64, u64>,
181}
182
183impl ThreadLocalStorage {
184    /// Create new TLS area
185    pub fn new() -> Self {
186        Self {
187            base: 0,
188            size: 0,
189            data_ptr: 0,
190            #[cfg(feature = "alloc")]
191            data: alloc::collections::BTreeMap::new(),
192        }
193    }
194
195    /// Allocate TLS area backed by real physical frames.
196    ///
197    /// Allocates enough physical frames to cover `size` bytes. The TLS base
198    /// address is set to the physical frame address (which is identity-mapped
199    /// in kernel space). The allocated memory is logically zero-filled
200    /// (`.tbss` equivalent).
201    pub fn allocate(&mut self, size: usize) -> Result<(), KernelError> {
202        if size == 0 {
203            return Ok(());
204        }
205
206        let page_count = size.div_ceil(FRAME_SIZE);
207        let frame = allocate_stack_frames(page_count)?;
208        let phys_addr = frame.as_addr().as_usize();
209
210        // Zero-fill the TLS region (for .tbss equivalent)
211        // SAFETY: `phys_addr` is the physical address of frames we just
212        // allocated. On x86_64, physical memory is mapped at a dynamic
213        // offset, so we convert via phys_to_virt_addr(). We write zeroes
214        // to `page_count * FRAME_SIZE` bytes, exactly what we allocated.
215        // No other code references these frames yet.
216        unsafe {
217            let virt = crate::mm::phys_to_virt_addr(phys_addr as u64);
218            core::ptr::write_bytes(virt as *mut u8, 0, page_count * FRAME_SIZE);
219        }
220
221        self.base = phys_addr;
222        self.size = page_count * FRAME_SIZE;
223        self.data_ptr = phys_addr;
224        Ok(())
225    }
226
227    /// Install a user-provided TLS base (arch_prctl/TPIDR_EL0/tp).
228    pub fn install_base(&mut self, base: usize) {
229        self.base = base;
230        self.data_ptr = base;
231        // size is unknown when provided by user; leave as-is
232    }
233
234    /// Set architecture TLS base register value (for user mode)
235    pub fn set_tls_base(&mut self, base: usize) {
236        self.base = base;
237        self.data_ptr = base;
238    }
239
240    /// Get TLS base register value
241    pub fn tls_base(&self) -> usize {
242        self.base
243    }
244
245    /// Set TLS value for key
246    #[cfg(feature = "alloc")]
247    pub fn set_value(&mut self, key: u64, value: u64) {
248        self.data.insert(key, value);
249    }
250
251    /// Get TLS value for key
252    #[cfg(feature = "alloc")]
253    pub fn get_value(&self, key: u64) -> Option<u64> {
254        self.data.get(&key).copied()
255    }
256
257    /// Remove TLS value for key
258    #[cfg(feature = "alloc")]
259    pub fn remove_value(&mut self, key: u64) -> Option<u64> {
260        self.data.remove(&key)
261    }
262
263    /// Get all TLS keys
264    #[cfg(feature = "alloc")]
265    pub fn keys(&self) -> impl Iterator<Item = &u64> {
266        self.data.keys()
267    }
268
269    /// Set the architecture-specific TLS base register.
270    ///
271    /// On x86_64, sets FS base (via WRFSBASE or MSR).
272    /// On AArch64, sets TPIDR_EL0.
273    /// On RISC-V, sets the `tp` register.
274    ///
275    /// This should be called during context switch or thread initialization
276    /// to point the hardware TLS register to this thread's TLS area.
277    pub fn activate_tls_register(&self) {
278        if self.base == 0 {
279            return;
280        }
281
282        #[cfg(target_arch = "x86_64")]
283        {
284            // Set FS base via MSR (IA32_FS_BASE = 0xC0000100)
285            // SAFETY: Writing MSR 0xC0000100 (IA32_FS_BASE) sets the base address
286            // for the FS segment register. `self.base` is a valid address obtained
287            // from the frame allocator via `allocate()`. This is a privileged
288            // operation executed in kernel mode (ring 0). The value is split into
289            // low 32 bits (EAX) and high 32 bits (EDX) as required by WRMSR.
290            unsafe {
291                let base = self.base as u64;
292                core::arch::asm!(
293                    "wrmsr",
294                    in("ecx") 0xC000_0100u32,
295                    in("eax") (base & 0xFFFF_FFFF) as u32,
296                    in("edx") (base >> 32) as u32,
297                );
298            }
299        }
300
301        #[cfg(target_arch = "aarch64")]
302        {
303            // Set TPIDR_EL0 (Thread Pointer ID Register for EL0)
304            // SAFETY: Writing TPIDR_EL0 sets the user-space thread pointer
305            // register. `self.base` is a valid address from the frame allocator.
306            // This is accessible from EL1 (kernel mode) and will be readable
307            // from EL0 (user mode) for TLS access. No side effects beyond
308            // setting the register value.
309            unsafe {
310                core::arch::asm!("msr tpidr_el0, {}", in(reg) self.base);
311            }
312        }
313
314        #[cfg(target_arch = "riscv64")]
315        {
316            // Set tp (thread pointer) register
317            // SAFETY: Writing the `tp` register sets the thread pointer used for
318            // TLS access. `self.base` is a valid address from the frame allocator.
319            // The `tp` register is a general-purpose register designated by the
320            // RISC-V ABI for TLS, accessible in both S-mode and U-mode.
321            unsafe {
322                core::arch::asm!("mv tp, {}", in(reg) self.base);
323            }
324        }
325    }
326}
327
328impl Default for ThreadLocalStorage {
329    fn default() -> Self {
330        Self::new()
331    }
332}
333
334/// Thread control block
335pub struct Thread {
336    /// Thread ID
337    pub tid: ThreadId,
338
339    /// Parent process ID
340    pub process: ProcessId,
341
342    /// Thread name
343    #[cfg(feature = "alloc")]
344    pub name: String,
345
346    /// Thread state
347    pub state: AtomicU32,
348
349    /// CPU context (registers, etc.)
350    pub context: Mutex<ArchThreadContext>,
351
352    /// User stack
353    pub user_stack: Stack,
354
355    /// Kernel stack
356    pub kernel_stack: Stack,
357
358    /// Thread-local storage
359    pub tls: Mutex<ThreadLocalStorage>,
360
361    /// CPU affinity mask
362    pub cpu_affinity: AtomicUsize,
363
364    /// Current CPU (if running)
365    pub current_cpu: AtomicU32,
366
367    /// Time slice remaining
368    pub time_slice: AtomicU32,
369
370    /// Total CPU time used (microseconds)
371    pub cpu_time: AtomicU64,
372
373    /// Wake up time (for sleeping threads)
374    pub wake_time: AtomicU64,
375
376    /// Exit code
377    pub exit_code: AtomicU32,
378
379    /// Thread priority (inherited from process)
380    pub priority: u8,
381
382    /// Floating point state saved flag
383    pub fpu_used: AtomicU32,
384
385    /// Scheduler task pointer (if scheduled)
386    pub task_ptr: Mutex<TaskPtr>,
387
388    /// clear_tid pointer for CLONE_CHILD_CLEARTID
389    pub clear_tid: AtomicUsize,
390    /// Detached flag (pthread_detach)
391    pub detached: AtomicBool,
392    /// Filesystem view (cwd, umask)
393    #[cfg(feature = "alloc")]
394    pub fs: Arc<ThreadFs>,
395}
396
397/// Stack information
398#[derive(Debug)]
399pub struct Stack {
400    /// Base address (lowest address, virtual)
401    pub base: usize,
402    /// Size in bytes
403    pub size: usize,
404    /// Current stack pointer
405    pub sp: AtomicUsize,
406    /// Physical frame number of the contiguous allocation from the frame
407    /// allocator. Stored so cleanup can free the frames without walking
408    /// page tables. Zero means no physical frames are owned by this Stack
409    /// (e.g., user stacks whose frames are managed by the VAS).
410    pub phys_frame: AtomicU64,
411    /// Number of physical pages owned by this stack (for `free_frames`).
412    pub phys_page_count: AtomicUsize,
413}
414
415impl Stack {
416    /// Create a new stack
417    pub fn new(base: usize, size: usize) -> Self {
418        Self {
419            base,
420            size,
421            sp: AtomicUsize::new(base + size), // Stack grows down
422            phys_frame: AtomicU64::new(0),
423            phys_page_count: AtomicUsize::new(0),
424        }
425    }
426
427    /// Get stack top (initial SP)
428    pub fn top(&self) -> usize {
429        self.base + self.size
430    }
431
432    /// Check if address is within stack
433    pub fn contains(&self, addr: usize) -> bool {
434        addr >= self.base && addr < self.base + self.size
435    }
436
437    /// Get current stack pointer
438    pub fn get_sp(&self) -> usize {
439        self.sp.load(Ordering::Acquire)
440    }
441
442    /// Set stack pointer
443    pub fn set_sp(&self, sp: usize) {
444        self.sp.store(sp, Ordering::Release);
445    }
446}
447
448/// Thread creation parameters
449#[cfg(feature = "alloc")]
450pub struct ThreadParams {
451    pub tid: ThreadId,
452    pub process: ProcessId,
453    pub name: String,
454    pub entry_point: usize,
455    pub user_stack_base: usize,
456    pub user_stack_size: usize,
457    pub kernel_stack_base: usize,
458    pub kernel_stack_size: usize,
459}
460
461impl Thread {
462    /// Create a new thread
463    #[cfg(feature = "alloc")]
464    #[allow(clippy::too_many_arguments)]
465    pub fn new(
466        tid: ThreadId,
467        process: ProcessId,
468        name: String,
469        entry_point: usize,
470        user_stack_base: usize,
471        user_stack_size: usize,
472        kernel_stack_base: usize,
473        kernel_stack_size: usize,
474        fs: Arc<ThreadFs>,
475    ) -> Self {
476        // Create context using ThreadContext trait
477        let mut context = <ArchThreadContext as ThreadContext>::new();
478        context.init(
479            entry_point,
480            user_stack_base + user_stack_size,
481            kernel_stack_base + kernel_stack_size,
482        );
483
484        Self {
485            tid,
486            process,
487            name,
488            state: AtomicU32::new(ThreadState::Creating as u32),
489            context: Mutex::new(context),
490            user_stack: Stack::new(user_stack_base, user_stack_size),
491            kernel_stack: Stack::new(kernel_stack_base, kernel_stack_size),
492            tls: Mutex::new(ThreadLocalStorage::new()),
493            cpu_affinity: AtomicUsize::new(usize::MAX), // All CPUs
494            current_cpu: AtomicU32::new(u32::MAX),
495            time_slice: AtomicU32::new(10), // Default time slice
496            cpu_time: AtomicU64::new(0),
497            wake_time: AtomicU64::new(0),
498            exit_code: AtomicU32::new(0),
499            priority: 2, // Normal priority
500            fpu_used: AtomicU32::new(0),
501            task_ptr: Mutex::new(TaskPtr(None)),
502            clear_tid: AtomicUsize::new(0),
503            detached: AtomicBool::new(false),
504            fs,
505        }
506    }
507
508    /// Get thread state
509    pub fn get_state(&self) -> ThreadState {
510        match self.state.load(Ordering::Acquire) {
511            0 => ThreadState::Creating,
512            1 => ThreadState::Ready,
513            2 => ThreadState::Running,
514            3 => ThreadState::Blocked,
515            4 => ThreadState::Sleeping,
516            5 => ThreadState::Zombie,
517            6 => ThreadState::Dead,
518            _ => ThreadState::Dead,
519        }
520    }
521
522    /// Set thread state
523    pub fn set_state(&self, state: ThreadState) {
524        self.state.store(state as u32, Ordering::Release);
525    }
526
527    /// Check if thread is runnable
528    pub fn is_runnable(&self) -> bool {
529        matches!(self.get_state(), ThreadState::Ready | ThreadState::Running)
530    }
531
532    /// Set CPU affinity
533    pub fn set_affinity(&self, mask: usize) {
534        self.cpu_affinity.store(mask, Ordering::Release);
535    }
536
537    /// Get CPU affinity
538    pub fn get_affinity(&self) -> usize {
539        self.cpu_affinity.load(Ordering::Acquire)
540    }
541
542    /// Check if thread can run on CPU
543    pub fn can_run_on_cpu(&self, cpu: u8) -> bool {
544        let mask = self.get_affinity();
545        (mask & (1 << cpu)) != 0
546    }
547
548    /// Mark thread as using FPU
549    pub fn mark_fpu_used(&self) {
550        self.fpu_used.store(1, Ordering::Release);
551    }
552
553    /// Check if thread uses FPU
554    pub fn uses_fpu(&self) -> bool {
555        self.fpu_used.load(Ordering::Acquire) != 0
556    }
557
558    /// Sleep thread until specified time
559    pub fn sleep_until(&self, wake_time: u64) {
560        self.wake_time.store(wake_time, Ordering::Release);
561        self.set_state(ThreadState::Sleeping);
562    }
563
564    /// Wake up thread if it's time
565    pub fn check_wake(&self, current_time: u64) -> bool {
566        if self.get_state() == ThreadState::Sleeping {
567            let wake_time = self.wake_time.load(Ordering::Acquire);
568            if current_time >= wake_time {
569                self.set_state(ThreadState::Ready);
570                return true;
571            }
572        }
573        false
574    }
575
576    /// Update CPU time
577    pub fn add_cpu_time(&self, microseconds: u64) {
578        self.cpu_time.fetch_add(microseconds, Ordering::Relaxed);
579    }
580
581    /// Set scheduler task pointer
582    pub fn set_task_ptr(&self, task: Option<NonNull<Task>>) {
583        self.task_ptr.lock().0 = task;
584    }
585
586    /// Get scheduler task pointer
587    pub fn get_task_ptr(&self) -> Option<NonNull<Task>> {
588        self.task_ptr.lock().0
589    }
590
591    /// Synchronize state with scheduler task
592    pub fn sync_state_with_scheduler(&self, new_state: ThreadState) {
593        // Update our state
594        self.set_state(new_state);
595
596        // Update scheduler task state if linked
597        if let Some(task_ptr) = self.get_task_ptr() {
598            // SAFETY: task_ptr is a NonNull<Task> obtained from
599            // get_task_ptr(), which returns a valid pointer to the
600            // scheduler's task. Writing the state field synchronizes the
601            // thread state with the scheduler's view. The pointer is valid
602            // because the task is not freed while the thread holds a
603            // reference to it.
604            unsafe {
605                let task = task_ptr.as_ptr();
606                (*task).state = match new_state {
607                    ThreadState::Creating => crate::process::ProcessState::Creating,
608                    ThreadState::Ready => crate::process::ProcessState::Ready,
609                    ThreadState::Running => crate::process::ProcessState::Running,
610                    ThreadState::Blocked => crate::process::ProcessState::Blocked,
611                    ThreadState::Sleeping => crate::process::ProcessState::Sleeping,
612                    ThreadState::Zombie => crate::process::ProcessState::Zombie,
613                    ThreadState::Dead => crate::process::ProcessState::Dead,
614                };
615            }
616        }
617    }
618
619    /// Mark thread as ready to run
620    pub fn set_ready(&self) {
621        self.sync_state_with_scheduler(ThreadState::Ready);
622    }
623
624    /// Mark thread as blocked
625    pub fn set_blocked(&self, reason: Option<u64>) {
626        self.sync_state_with_scheduler(ThreadState::Blocked);
627
628        // Update scheduler task blocked_on field if linked
629        if let Some(task_ptr) = self.get_task_ptr() {
630            // SAFETY: task_ptr is a NonNull<Task> from get_task_ptr(),
631            // pointing to the scheduler's task entry. Writing blocked_on
632            // records the blocking reason. The pointer remains valid because
633            // the task is not freed while the thread references it.
634            unsafe {
635                let task = task_ptr.as_ptr();
636                (*task).blocked_on = reason;
637            }
638        }
639    }
640
641    /// Mark thread as running on CPU
642    pub fn set_running(&self, cpu: u8) {
643        self.current_cpu.store(cpu as u32, Ordering::Release);
644        self.sync_state_with_scheduler(ThreadState::Running);
645    }
646
647    /// Mark thread as exited
648    pub fn set_exited(&self, exit_code: i32) {
649        self.exit_code.store(exit_code as u32, Ordering::Release);
650        self.sync_state_with_scheduler(ThreadState::Zombie);
651    }
652
653    /// Get total CPU time
654    pub fn get_cpu_time(&self) -> u64 {
655        self.cpu_time.load(Ordering::Relaxed)
656    }
657
658    /// Set TLS value for this thread
659    #[cfg(feature = "alloc")]
660    pub fn set_tls_value(&self, key: u64, value: u64) {
661        self.tls.lock().set_value(key, value);
662    }
663
664    /// Get TLS value for this thread
665    #[cfg(feature = "alloc")]
666    pub fn get_tls_value(&self, key: u64) -> Option<u64> {
667        self.tls.lock().get_value(key)
668    }
669
670    /// Remove TLS value for this thread
671    #[cfg(feature = "alloc")]
672    pub fn remove_tls_value(&self, key: u64) -> Option<u64> {
673        self.tls.lock().remove_value(key)
674    }
675
676    /// Get all TLS keys for this thread
677    #[cfg(feature = "alloc")]
678    pub fn get_tls_keys(&self) -> alloc::vec::Vec<u64> {
679        self.tls.lock().keys().copied().collect()
680    }
681
682    /// Set thread entry point
683    pub fn set_entry_point(&mut self, entry: usize) {
684        self.context.get_mut().set_instruction_pointer(entry);
685    }
686
687    /// Reset thread context for exec
688    pub fn reset_context(&mut self) {
689        // Reset to initial state
690        *self.context.get_mut() = ArchThreadContext::default();
691        self.state
692            .store(ThreadState::Ready as u32, Ordering::Release);
693        self.cpu_time.store(0, Ordering::Relaxed);
694        self.time_slice.store(10, Ordering::Relaxed); // Default time slice
695    }
696
697    /// Get filesystem view (cwd/umask)
698    #[cfg(feature = "alloc")]
699    pub fn fs(&self) -> Arc<ThreadFs> {
700        self.fs.clone()
701    }
702}
703
704/// Builder for creating new threads with specific configurations.
705///
706/// `ThreadBuilder` follows the builder pattern to construct a `Thread`
707/// with custom stack sizes, priority, CPU affinity, TLS base, and
708/// filesystem state. The [`build`](Self::build) method allocates physical
709/// frames for both user and kernel stacks, assigns virtual address regions
710/// with guard pages, and initializes the thread's CPU context.
711///
712/// # Example
713///
714/// ```ignore
715/// let thread = ThreadBuilder::new(pid, "worker".into(), entry_fn as usize)
716///     .user_stack_size(2 * 1024 * 1024)  // 2 MB user stack
717///     .priority(3)
718///     .cpu_affinity(0x3)                  // CPUs 0 and 1
719///     .build()?;
720/// ```
721///
722/// # Defaults
723///
724/// - User stack: 1 MB
725/// - Kernel stack: 64 KB
726/// - Priority: 2 (normal)
727/// - CPU affinity: all CPUs (usize::MAX)
728/// - TLS base: None (no TLS)
729/// - Filesystem state: new root ("/", umask 0o022)
730#[cfg(feature = "alloc")]
731pub struct ThreadBuilder {
732    process: ProcessId,
733    name: String,
734    entry_point: usize,
735    user_stack_size: usize,
736    kernel_stack_size: usize,
737    priority: u8,
738    cpu_affinity: usize,
739    clear_tid: usize,
740    tls_base: Option<usize>,
741    fs: Option<Arc<ThreadFs>>,
742}
743
744#[cfg(feature = "alloc")]
745impl ThreadBuilder {
746    /// Create a new thread builder with required parameters.
747    ///
748    /// # Arguments
749    /// - `process`: The parent process ID that owns this thread.
750    /// - `name`: Human-readable thread name (for debugging/logging).
751    /// - `entry_point`: Virtual address where thread execution begins.
752    pub fn new(process: ProcessId, name: String, entry_point: usize) -> Self {
753        Self {
754            process,
755            name,
756            entry_point,
757            user_stack_size: 1024 * 1024, // 1MB default
758            kernel_stack_size: 64 * 1024, // 64KB default
759            priority: 2,
760            cpu_affinity: usize::MAX,
761            clear_tid: 0,
762            tls_base: None,
763            fs: None,
764        }
765    }
766
767    /// Set user stack size
768    pub fn user_stack_size(mut self, size: usize) -> Self {
769        self.user_stack_size = size;
770        self
771    }
772
773    /// Set kernel stack size
774    pub fn kernel_stack_size(mut self, size: usize) -> Self {
775        self.kernel_stack_size = size;
776        self
777    }
778
779    /// Set priority
780    pub fn priority(mut self, priority: u8) -> Self {
781        self.priority = priority;
782        self
783    }
784
785    /// Set CPU affinity
786    pub fn cpu_affinity(mut self, mask: usize) -> Self {
787        self.cpu_affinity = mask;
788        self
789    }
790
791    /// Set clear_tid pointer for CLONE_CHILD_CLEARTID
792    pub fn clear_tid(mut self, ptr: usize) -> Self {
793        self.clear_tid = ptr;
794        self
795    }
796
797    /// Set the filesystem view (cwd/umask) for the new thread.
798    ///
799    /// When creating a thread via `clone()` with `CLONE_FS`, pass a
800    /// shared `Arc<ThreadFs>` from the parent. Without `CLONE_FS`, pass
801    /// a copy via [`ThreadFs::clone_copy`]. If not set, the builder
802    /// defaults to a new root filesystem state.
803    pub fn fs(mut self, fs: Arc<ThreadFs>) -> Self {
804        self.fs = Some(fs);
805        self
806    }
807
808    /// Set the TLS (Thread-Local Storage) base address for the new thread.
809    ///
810    /// This corresponds to `CLONE_SETTLS` on Linux or `arch_prctl(ARCH_SET_FS)`
811    /// on x86_64. The base address is written into the architecture-specific
812    /// TLS register (FS base on x86_64, TPIDR_EL0 on AArch64, tp on RISC-V)
813    /// when the thread is first scheduled.
814    pub fn tls_base(mut self, base: usize) -> Self {
815        self.tls_base = Some(base);
816        self
817    }
818
819    /// Build the thread with real stack allocation.
820    ///
821    /// Allocates physical frames for the **kernel** stack via the global frame
822    /// allocator. User stack frames are NOT allocated here because every caller
823    /// immediately maps user stack pages through the VAS (via `map_page()`),
824    /// which allocates its own tracked frames. Allocating frames here would
825    /// orphan them (never freed, never used), leaking 64 pages (256 KB) per
826    /// thread -- the root cause of the native-compilation OOM.
827    ///
828    /// Kernel stack frames are stored in `Stack::phys_frame` so that
829    /// `cleanup_process()` can free them without walking page tables.
830    ///
831    /// Each stack gets a guard page (unmapped) below it to detect overflow.
832    /// Stack pointers are set to the top of each allocated region since stacks
833    /// grow downward on all supported architectures (x86_64, AArch64, RISC-V).
834    pub fn build(self) -> Result<Thread, KernelError> {
835        let tid = super::alloc_tid();
836
837        // Calculate page counts for stacks
838        let user_stack_pages = self.user_stack_size.div_ceil(FRAME_SIZE);
839        let kernel_stack_pages = self.kernel_stack_size.div_ceil(FRAME_SIZE);
840
841        // NOTE: User stack frames are NOT allocated here. Every code path that
842        // calls build() subsequently maps user stack pages through the VAS:
843        //   - create_process_with_options(): calls map_page() for each page
844        //   - exec_process(): calls map_page() for re-mapped stack
845        //   - fork_process(): clone_from() deep-copies parent's VAS pages
846        // Those VAS-managed frames are the ones actually used and properly
847        // freed by VAS::clear(). Allocating frames here would orphan them.
848
849        // Allocate physical frames for kernel stack
850        let kernel_frame = allocate_stack_frames(kernel_stack_pages).inspect_err(|_| {
851            crate::println!(
852                "[THREAD] Failed to allocate {} kernel stack frames for tid {}",
853                kernel_stack_pages,
854                tid.0
855            );
856        })?;
857        let kernel_stack_phys = kernel_frame.as_addr().as_usize();
858
859        // Compute virtual addresses for stacks.
860        // Use the thread index (tid) to space stacks apart so each thread
861        // gets a unique region. Each region includes a guard page below.
862        let thread_index = tid.0 as usize;
863
864        // Kernel stack virtual address: each thread gets
865        // (kernel_stack_pages + GUARD_PAGE_COUNT) pages of virtual space
866        let kernel_region_size = (kernel_stack_pages + GUARD_PAGE_COUNT) * FRAME_SIZE;
867        let kernel_stack_base =
868            KERNEL_STACK_REGION_BASE - ((thread_index + 1) * kernel_region_size);
869        // Skip guard page at the bottom
870        let kernel_stack_usable_base = kernel_stack_base + (GUARD_PAGE_COUNT * FRAME_SIZE);
871
872        // User stack virtual address: similar layout in user space
873        let user_region_size = (user_stack_pages + GUARD_PAGE_COUNT) * FRAME_SIZE;
874        let user_stack_base = USER_STACK_REGION_BASE - ((thread_index + 1) * user_region_size);
875        let user_stack_usable_base = user_stack_base + (GUARD_PAGE_COUNT * FRAME_SIZE);
876
877        // Calculate actual stack sizes based on full pages
878        let user_stack_size = user_stack_pages * FRAME_SIZE;
879        let kernel_stack_size = kernel_stack_pages * FRAME_SIZE;
880
881        // Zero the kernel stack region for safety
882        // SAFETY: `kernel_stack_phys` is the physical address of frames we
883        // just allocated from the frame allocator. On x86_64 with bootloader
884        // 0.11, physical memory is mapped at a dynamic offset (not identity-
885        // mapped), so we must convert via phys_to_virt_addr(). We write
886        // zeroes to exactly `kernel_stack_size` bytes. No other code
887        // references these frames yet.
888        unsafe {
889            let virt = crate::mm::phys_to_virt_addr(kernel_stack_phys as u64);
890            core::ptr::write_bytes(virt as *mut u8, 0, kernel_stack_size);
891        }
892
893        let mut thread = Thread::new(
894            tid,
895            self.process,
896            self.name,
897            self.entry_point,
898            user_stack_usable_base,
899            user_stack_size,
900            kernel_stack_usable_base,
901            kernel_stack_size,
902            self.fs.unwrap_or_else(ThreadFs::new_root),
903        );
904
905        // Store kernel stack physical frame info for cleanup_process() to free.
906        thread
907            .kernel_stack
908            .phys_frame
909            .store(kernel_frame.as_u64(), Ordering::Release);
910        thread
911            .kernel_stack
912            .phys_page_count
913            .store(kernel_stack_pages, Ordering::Release);
914
915        thread.priority = self.priority;
916        thread.set_affinity(self.cpu_affinity);
917        thread.clear_tid.store(self.clear_tid, Ordering::Release);
918        if let Some(base) = self.tls_base {
919            let mut tls = thread.tls.lock();
920            tls.set_tls_base(base);
921            // Seed the arch context with the TLS base so the first user entry sees it
922            thread.context.lock().set_tls_base(base as u64);
923        }
924
925        crate::println!(
926            "[THREAD] Allocated stacks for tid {}: user={:#x}..{:#x}, kernel={:#x}..{:#x} \
927             (phys={:#x}), guard pages installed",
928            tid.0,
929            user_stack_usable_base,
930            user_stack_usable_base + user_stack_size,
931            kernel_stack_usable_base,
932            kernel_stack_usable_base + kernel_stack_size,
933            kernel_stack_phys,
934        );
935
936        #[cfg(target_arch = "x86_64")]
937        // SAFETY: raw_serial_str writes directly to the COM1 serial port (0x3F8).
938        // This is safe to call at any point during kernel execution.
939        unsafe {
940            crate::arch::x86_64::idt::raw_serial_str(b"[THREAD] build() returning Ok\n");
941        }
942
943        Ok(thread)
944    }
945}