veridian_kernel/process/thread.rs
1//! Thread management implementation
2//!
3//! Threads are the unit of execution within a process. Each thread has its own
4//! stack and CPU context but shares memory and other resources with its
5//! process.
6
7use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
8
9#[cfg(feature = "alloc")]
10extern crate alloc;
11
12#[cfg(feature = "alloc")]
13use alloc::{string::String, sync::Arc};
14use core::ptr::NonNull;
15
16use spin::Mutex;
17
18use super::ProcessId;
19use crate::{
20 arch::context::{ArchThreadContext, ThreadContext},
21 error::KernelError,
22 mm::{FRAME_ALLOCATOR, FRAME_SIZE},
23 sched::task::Task,
24};
25
26/// Per-thread filesystem state for CLONE_FS support.
27///
28/// Each thread has a reference-counted `ThreadFs` that holds the thread's
29/// current working directory (cwd) and file creation mask (umask). When
30/// `CLONE_FS` is set during `clone()`, the parent and child share the same
31/// `Arc<ThreadFs>`, so changes to cwd or umask in one thread are visible
32/// to the other. When `CLONE_FS` is not set, the child receives an
33/// independent copy (via [`clone_copy`](Self::clone_copy)).
34///
35/// This mirrors the Linux kernel's `struct fs_struct` semantics.
36#[cfg(feature = "alloc")]
37#[derive(Debug)]
38pub struct ThreadFs {
39 /// Current working directory path. Protected by a spinlock because
40 /// it can be read from syscall paths (getcwd) and modified from
41 /// others (chdir) concurrently.
42 pub cwd: Mutex<alloc::string::String>,
43 /// File creation mask (umask). Atomic because it can be read/written
44 /// from concurrent syscall paths without holding a lock.
45 pub umask: AtomicU32,
46}
47
48#[cfg(feature = "alloc")]
49impl ThreadFs {
50 /// Create a new root filesystem state with cwd="/" and umask=0o022.
51 ///
52 /// Used for the initial thread of a new process.
53 pub fn new_root() -> Arc<Self> {
54 Arc::new(Self {
55 cwd: Mutex::new(alloc::string::String::from("/")),
56 umask: AtomicU32::new(0o022),
57 })
58 }
59
60 /// Share the filesystem state (CLONE_FS semantics).
61 ///
62 /// Returns a clone of the `Arc`, so parent and child reference the
63 /// same underlying `ThreadFs`. Changes to cwd or umask in either
64 /// thread are visible to the other.
65 pub fn clone_shared(src: &Arc<Self>) -> Arc<Self> {
66 src.clone()
67 }
68
69 /// Copy the filesystem state (non-CLONE_FS semantics).
70 ///
71 /// Creates a new independent `ThreadFs` with the same cwd and umask
72 /// values. Subsequent changes in either thread are isolated.
73 pub fn clone_copy(src: &Arc<Self>) -> Arc<Self> {
74 Arc::new(Self {
75 cwd: Mutex::new(src.cwd.lock().clone()),
76 umask: AtomicU32::new(src.umask.load(Ordering::Acquire)),
77 })
78 }
79}
80
81/// Default kernel stack size: 64KB (16 pages)
82pub const DEFAULT_KERNEL_STACK_PAGES: usize = 16;
83
84/// Default user stack size: 64KB (16 pages) for kernel-created threads
85/// User processes use the value from ProcessCreateOptions instead.
86pub const DEFAULT_USER_STACK_PAGES: usize = 16;
87
88/// Default TLS region size: 4KB (1 page)
89pub const DEFAULT_TLS_PAGES: usize = 1;
90
91/// Guard page count (1 page below each stack to detect overflow)
92pub const GUARD_PAGE_COUNT: usize = 1;
93
94/// Base virtual address for kernel thread stacks.
95/// Each thread gets its own region at KERNEL_STACK_REGION_BASE - (thread_index
96/// * region_size).
97const KERNEL_STACK_REGION_BASE: usize = 0xFFFF_E000_0000_0000;
98
99/// Base virtual address for user thread stacks.
100/// Grows downward from near the top of user address space.
101const USER_STACK_REGION_BASE: usize = 0x0000_7FFE_0000_0000;
102
103/// Base virtual address for TLS regions.
104#[allow(dead_code)] // TLS mapping base address -- used when TLS is wired
105const TLS_REGION_BASE: usize = 0x0000_7000_0000_0000;
106
107/// Allocate physical frames for a stack region and return the base physical
108/// address.
109///
110/// Allocates `page_count` contiguous physical frames from the frame allocator.
111/// The frames are zero-filled by the allocator. Returns the frame number of the
112/// first allocated frame.
113fn allocate_stack_frames(page_count: usize) -> Result<crate::mm::FrameNumber, KernelError> {
114 let allocator = FRAME_ALLOCATOR.lock();
115 allocator
116 .allocate_frames(page_count, None)
117 .map_err(|_| KernelError::OutOfMemory {
118 requested: page_count * FRAME_SIZE,
119 available: 0,
120 })
121}
122
123/// Free previously allocated stack frames.
124#[allow(dead_code)] // Stack cleanup helper -- used in thread teardown path
125fn free_stack_frames(frame: crate::mm::FrameNumber, page_count: usize) {
126 let allocator = FRAME_ALLOCATOR.lock();
127 let _ = allocator.free_frames(frame, page_count);
128}
129
130/// Safe wrapper for task pointer that implements Send + Sync
131///
132/// Safety: Task pointers are only accessed from within the scheduler
133/// which has its own synchronization mechanisms.
134#[derive(Debug)]
135pub struct TaskPtr(Option<NonNull<Task>>);
136
137unsafe impl Send for TaskPtr {}
138unsafe impl Sync for TaskPtr {}
139
140/// Thread ID type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142pub struct ThreadId(pub u64);
143
144impl core::fmt::Display for ThreadId {
145 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
146 write!(f, "{}", self.0)
147 }
148}
149
150/// Thread state
151#[repr(u8)]
152#[derive(Debug, Clone, Copy, PartialEq, Eq)]
153pub enum ThreadState {
154 /// Thread is being created
155 Creating = 0,
156 /// Thread is ready to run
157 Ready = 1,
158 /// Thread is currently running
159 Running = 2,
160 /// Thread is blocked waiting
161 Blocked = 3,
162 /// Thread is sleeping
163 Sleeping = 4,
164 /// Thread has exited but not yet cleaned up (zombie)
165 Zombie = 5,
166 /// Thread is completely dead and can be cleaned up
167 Dead = 6,
168}
169
170/// Thread Local Storage (TLS) data
171pub struct ThreadLocalStorage {
172 /// TLS base address
173 pub base: usize,
174 /// TLS size in bytes
175 pub size: usize,
176 /// TLS data pointer (architecture-specific)
177 pub data_ptr: usize,
178 /// TLS key-value data storage
179 #[cfg(feature = "alloc")]
180 pub data: alloc::collections::BTreeMap<u64, u64>,
181}
182
183impl ThreadLocalStorage {
184 /// Create new TLS area
185 pub fn new() -> Self {
186 Self {
187 base: 0,
188 size: 0,
189 data_ptr: 0,
190 #[cfg(feature = "alloc")]
191 data: alloc::collections::BTreeMap::new(),
192 }
193 }
194
195 /// Allocate TLS area backed by real physical frames.
196 ///
197 /// Allocates enough physical frames to cover `size` bytes. The TLS base
198 /// address is set to the physical frame address (which is identity-mapped
199 /// in kernel space). The allocated memory is logically zero-filled
200 /// (`.tbss` equivalent).
201 pub fn allocate(&mut self, size: usize) -> Result<(), KernelError> {
202 if size == 0 {
203 return Ok(());
204 }
205
206 let page_count = size.div_ceil(FRAME_SIZE);
207 let frame = allocate_stack_frames(page_count)?;
208 let phys_addr = frame.as_addr().as_usize();
209
210 // Zero-fill the TLS region (for .tbss equivalent)
211 // SAFETY: `phys_addr` is the physical address of frames we just
212 // allocated. On x86_64, physical memory is mapped at a dynamic
213 // offset, so we convert via phys_to_virt_addr(). We write zeroes
214 // to `page_count * FRAME_SIZE` bytes, exactly what we allocated.
215 // No other code references these frames yet.
216 unsafe {
217 let virt = crate::mm::phys_to_virt_addr(phys_addr as u64);
218 core::ptr::write_bytes(virt as *mut u8, 0, page_count * FRAME_SIZE);
219 }
220
221 self.base = phys_addr;
222 self.size = page_count * FRAME_SIZE;
223 self.data_ptr = phys_addr;
224 Ok(())
225 }
226
227 /// Install a user-provided TLS base (arch_prctl/TPIDR_EL0/tp).
228 pub fn install_base(&mut self, base: usize) {
229 self.base = base;
230 self.data_ptr = base;
231 // size is unknown when provided by user; leave as-is
232 }
233
234 /// Set architecture TLS base register value (for user mode)
235 pub fn set_tls_base(&mut self, base: usize) {
236 self.base = base;
237 self.data_ptr = base;
238 }
239
240 /// Get TLS base register value
241 pub fn tls_base(&self) -> usize {
242 self.base
243 }
244
245 /// Set TLS value for key
246 #[cfg(feature = "alloc")]
247 pub fn set_value(&mut self, key: u64, value: u64) {
248 self.data.insert(key, value);
249 }
250
251 /// Get TLS value for key
252 #[cfg(feature = "alloc")]
253 pub fn get_value(&self, key: u64) -> Option<u64> {
254 self.data.get(&key).copied()
255 }
256
257 /// Remove TLS value for key
258 #[cfg(feature = "alloc")]
259 pub fn remove_value(&mut self, key: u64) -> Option<u64> {
260 self.data.remove(&key)
261 }
262
263 /// Get all TLS keys
264 #[cfg(feature = "alloc")]
265 pub fn keys(&self) -> impl Iterator<Item = &u64> {
266 self.data.keys()
267 }
268
269 /// Set the architecture-specific TLS base register.
270 ///
271 /// On x86_64, sets FS base (via WRFSBASE or MSR).
272 /// On AArch64, sets TPIDR_EL0.
273 /// On RISC-V, sets the `tp` register.
274 ///
275 /// This should be called during context switch or thread initialization
276 /// to point the hardware TLS register to this thread's TLS area.
277 pub fn activate_tls_register(&self) {
278 if self.base == 0 {
279 return;
280 }
281
282 #[cfg(target_arch = "x86_64")]
283 {
284 // Set FS base via MSR (IA32_FS_BASE = 0xC0000100)
285 // SAFETY: Writing MSR 0xC0000100 (IA32_FS_BASE) sets the base address
286 // for the FS segment register. `self.base` is a valid address obtained
287 // from the frame allocator via `allocate()`. This is a privileged
288 // operation executed in kernel mode (ring 0). The value is split into
289 // low 32 bits (EAX) and high 32 bits (EDX) as required by WRMSR.
290 unsafe {
291 let base = self.base as u64;
292 core::arch::asm!(
293 "wrmsr",
294 in("ecx") 0xC000_0100u32,
295 in("eax") (base & 0xFFFF_FFFF) as u32,
296 in("edx") (base >> 32) as u32,
297 );
298 }
299 }
300
301 #[cfg(target_arch = "aarch64")]
302 {
303 // Set TPIDR_EL0 (Thread Pointer ID Register for EL0)
304 // SAFETY: Writing TPIDR_EL0 sets the user-space thread pointer
305 // register. `self.base` is a valid address from the frame allocator.
306 // This is accessible from EL1 (kernel mode) and will be readable
307 // from EL0 (user mode) for TLS access. No side effects beyond
308 // setting the register value.
309 unsafe {
310 core::arch::asm!("msr tpidr_el0, {}", in(reg) self.base);
311 }
312 }
313
314 #[cfg(target_arch = "riscv64")]
315 {
316 // Set tp (thread pointer) register
317 // SAFETY: Writing the `tp` register sets the thread pointer used for
318 // TLS access. `self.base` is a valid address from the frame allocator.
319 // The `tp` register is a general-purpose register designated by the
320 // RISC-V ABI for TLS, accessible in both S-mode and U-mode.
321 unsafe {
322 core::arch::asm!("mv tp, {}", in(reg) self.base);
323 }
324 }
325 }
326}
327
328impl Default for ThreadLocalStorage {
329 fn default() -> Self {
330 Self::new()
331 }
332}
333
334/// Thread control block
335pub struct Thread {
336 /// Thread ID
337 pub tid: ThreadId,
338
339 /// Parent process ID
340 pub process: ProcessId,
341
342 /// Thread name
343 #[cfg(feature = "alloc")]
344 pub name: String,
345
346 /// Thread state
347 pub state: AtomicU32,
348
349 /// CPU context (registers, etc.)
350 pub context: Mutex<ArchThreadContext>,
351
352 /// User stack
353 pub user_stack: Stack,
354
355 /// Kernel stack
356 pub kernel_stack: Stack,
357
358 /// Thread-local storage
359 pub tls: Mutex<ThreadLocalStorage>,
360
361 /// CPU affinity mask
362 pub cpu_affinity: AtomicUsize,
363
364 /// Current CPU (if running)
365 pub current_cpu: AtomicU32,
366
367 /// Time slice remaining
368 pub time_slice: AtomicU32,
369
370 /// Total CPU time used (microseconds)
371 pub cpu_time: AtomicU64,
372
373 /// Wake up time (for sleeping threads)
374 pub wake_time: AtomicU64,
375
376 /// Exit code
377 pub exit_code: AtomicU32,
378
379 /// Thread priority (inherited from process)
380 pub priority: u8,
381
382 /// Floating point state saved flag
383 pub fpu_used: AtomicU32,
384
385 /// Scheduler task pointer (if scheduled)
386 pub task_ptr: Mutex<TaskPtr>,
387
388 /// clear_tid pointer for CLONE_CHILD_CLEARTID
389 pub clear_tid: AtomicUsize,
390 /// Detached flag (pthread_detach)
391 pub detached: AtomicBool,
392 /// Filesystem view (cwd, umask)
393 #[cfg(feature = "alloc")]
394 pub fs: Arc<ThreadFs>,
395}
396
397/// Stack information
398#[derive(Debug)]
399pub struct Stack {
400 /// Base address (lowest address, virtual)
401 pub base: usize,
402 /// Size in bytes
403 pub size: usize,
404 /// Current stack pointer
405 pub sp: AtomicUsize,
406 /// Physical frame number of the contiguous allocation from the frame
407 /// allocator. Stored so cleanup can free the frames without walking
408 /// page tables. Zero means no physical frames are owned by this Stack
409 /// (e.g., user stacks whose frames are managed by the VAS).
410 pub phys_frame: AtomicU64,
411 /// Number of physical pages owned by this stack (for `free_frames`).
412 pub phys_page_count: AtomicUsize,
413}
414
415impl Stack {
416 /// Create a new stack
417 pub fn new(base: usize, size: usize) -> Self {
418 Self {
419 base,
420 size,
421 sp: AtomicUsize::new(base + size), // Stack grows down
422 phys_frame: AtomicU64::new(0),
423 phys_page_count: AtomicUsize::new(0),
424 }
425 }
426
427 /// Get stack top (initial SP)
428 pub fn top(&self) -> usize {
429 self.base + self.size
430 }
431
432 /// Check if address is within stack
433 pub fn contains(&self, addr: usize) -> bool {
434 addr >= self.base && addr < self.base + self.size
435 }
436
437 /// Get current stack pointer
438 pub fn get_sp(&self) -> usize {
439 self.sp.load(Ordering::Acquire)
440 }
441
442 /// Set stack pointer
443 pub fn set_sp(&self, sp: usize) {
444 self.sp.store(sp, Ordering::Release);
445 }
446}
447
448/// Thread creation parameters
449#[cfg(feature = "alloc")]
450pub struct ThreadParams {
451 pub tid: ThreadId,
452 pub process: ProcessId,
453 pub name: String,
454 pub entry_point: usize,
455 pub user_stack_base: usize,
456 pub user_stack_size: usize,
457 pub kernel_stack_base: usize,
458 pub kernel_stack_size: usize,
459}
460
461impl Thread {
462 /// Create a new thread
463 #[cfg(feature = "alloc")]
464 #[allow(clippy::too_many_arguments)]
465 pub fn new(
466 tid: ThreadId,
467 process: ProcessId,
468 name: String,
469 entry_point: usize,
470 user_stack_base: usize,
471 user_stack_size: usize,
472 kernel_stack_base: usize,
473 kernel_stack_size: usize,
474 fs: Arc<ThreadFs>,
475 ) -> Self {
476 // Create context using ThreadContext trait
477 let mut context = <ArchThreadContext as ThreadContext>::new();
478 context.init(
479 entry_point,
480 user_stack_base + user_stack_size,
481 kernel_stack_base + kernel_stack_size,
482 );
483
484 Self {
485 tid,
486 process,
487 name,
488 state: AtomicU32::new(ThreadState::Creating as u32),
489 context: Mutex::new(context),
490 user_stack: Stack::new(user_stack_base, user_stack_size),
491 kernel_stack: Stack::new(kernel_stack_base, kernel_stack_size),
492 tls: Mutex::new(ThreadLocalStorage::new()),
493 cpu_affinity: AtomicUsize::new(usize::MAX), // All CPUs
494 current_cpu: AtomicU32::new(u32::MAX),
495 time_slice: AtomicU32::new(10), // Default time slice
496 cpu_time: AtomicU64::new(0),
497 wake_time: AtomicU64::new(0),
498 exit_code: AtomicU32::new(0),
499 priority: 2, // Normal priority
500 fpu_used: AtomicU32::new(0),
501 task_ptr: Mutex::new(TaskPtr(None)),
502 clear_tid: AtomicUsize::new(0),
503 detached: AtomicBool::new(false),
504 fs,
505 }
506 }
507
508 /// Get thread state
509 pub fn get_state(&self) -> ThreadState {
510 match self.state.load(Ordering::Acquire) {
511 0 => ThreadState::Creating,
512 1 => ThreadState::Ready,
513 2 => ThreadState::Running,
514 3 => ThreadState::Blocked,
515 4 => ThreadState::Sleeping,
516 5 => ThreadState::Zombie,
517 6 => ThreadState::Dead,
518 _ => ThreadState::Dead,
519 }
520 }
521
522 /// Set thread state
523 pub fn set_state(&self, state: ThreadState) {
524 self.state.store(state as u32, Ordering::Release);
525 }
526
527 /// Check if thread is runnable
528 pub fn is_runnable(&self) -> bool {
529 matches!(self.get_state(), ThreadState::Ready | ThreadState::Running)
530 }
531
532 /// Set CPU affinity
533 pub fn set_affinity(&self, mask: usize) {
534 self.cpu_affinity.store(mask, Ordering::Release);
535 }
536
537 /// Get CPU affinity
538 pub fn get_affinity(&self) -> usize {
539 self.cpu_affinity.load(Ordering::Acquire)
540 }
541
542 /// Check if thread can run on CPU
543 pub fn can_run_on_cpu(&self, cpu: u8) -> bool {
544 let mask = self.get_affinity();
545 (mask & (1 << cpu)) != 0
546 }
547
548 /// Mark thread as using FPU
549 pub fn mark_fpu_used(&self) {
550 self.fpu_used.store(1, Ordering::Release);
551 }
552
553 /// Check if thread uses FPU
554 pub fn uses_fpu(&self) -> bool {
555 self.fpu_used.load(Ordering::Acquire) != 0
556 }
557
558 /// Sleep thread until specified time
559 pub fn sleep_until(&self, wake_time: u64) {
560 self.wake_time.store(wake_time, Ordering::Release);
561 self.set_state(ThreadState::Sleeping);
562 }
563
564 /// Wake up thread if it's time
565 pub fn check_wake(&self, current_time: u64) -> bool {
566 if self.get_state() == ThreadState::Sleeping {
567 let wake_time = self.wake_time.load(Ordering::Acquire);
568 if current_time >= wake_time {
569 self.set_state(ThreadState::Ready);
570 return true;
571 }
572 }
573 false
574 }
575
576 /// Update CPU time
577 pub fn add_cpu_time(&self, microseconds: u64) {
578 self.cpu_time.fetch_add(microseconds, Ordering::Relaxed);
579 }
580
581 /// Set scheduler task pointer
582 pub fn set_task_ptr(&self, task: Option<NonNull<Task>>) {
583 self.task_ptr.lock().0 = task;
584 }
585
586 /// Get scheduler task pointer
587 pub fn get_task_ptr(&self) -> Option<NonNull<Task>> {
588 self.task_ptr.lock().0
589 }
590
591 /// Synchronize state with scheduler task
592 pub fn sync_state_with_scheduler(&self, new_state: ThreadState) {
593 // Update our state
594 self.set_state(new_state);
595
596 // Update scheduler task state if linked
597 if let Some(task_ptr) = self.get_task_ptr() {
598 // SAFETY: task_ptr is a NonNull<Task> obtained from
599 // get_task_ptr(), which returns a valid pointer to the
600 // scheduler's task. Writing the state field synchronizes the
601 // thread state with the scheduler's view. The pointer is valid
602 // because the task is not freed while the thread holds a
603 // reference to it.
604 unsafe {
605 let task = task_ptr.as_ptr();
606 (*task).state = match new_state {
607 ThreadState::Creating => crate::process::ProcessState::Creating,
608 ThreadState::Ready => crate::process::ProcessState::Ready,
609 ThreadState::Running => crate::process::ProcessState::Running,
610 ThreadState::Blocked => crate::process::ProcessState::Blocked,
611 ThreadState::Sleeping => crate::process::ProcessState::Sleeping,
612 ThreadState::Zombie => crate::process::ProcessState::Zombie,
613 ThreadState::Dead => crate::process::ProcessState::Dead,
614 };
615 }
616 }
617 }
618
619 /// Mark thread as ready to run
620 pub fn set_ready(&self) {
621 self.sync_state_with_scheduler(ThreadState::Ready);
622 }
623
624 /// Mark thread as blocked
625 pub fn set_blocked(&self, reason: Option<u64>) {
626 self.sync_state_with_scheduler(ThreadState::Blocked);
627
628 // Update scheduler task blocked_on field if linked
629 if let Some(task_ptr) = self.get_task_ptr() {
630 // SAFETY: task_ptr is a NonNull<Task> from get_task_ptr(),
631 // pointing to the scheduler's task entry. Writing blocked_on
632 // records the blocking reason. The pointer remains valid because
633 // the task is not freed while the thread references it.
634 unsafe {
635 let task = task_ptr.as_ptr();
636 (*task).blocked_on = reason;
637 }
638 }
639 }
640
641 /// Mark thread as running on CPU
642 pub fn set_running(&self, cpu: u8) {
643 self.current_cpu.store(cpu as u32, Ordering::Release);
644 self.sync_state_with_scheduler(ThreadState::Running);
645 }
646
647 /// Mark thread as exited
648 pub fn set_exited(&self, exit_code: i32) {
649 self.exit_code.store(exit_code as u32, Ordering::Release);
650 self.sync_state_with_scheduler(ThreadState::Zombie);
651 }
652
653 /// Get total CPU time
654 pub fn get_cpu_time(&self) -> u64 {
655 self.cpu_time.load(Ordering::Relaxed)
656 }
657
658 /// Set TLS value for this thread
659 #[cfg(feature = "alloc")]
660 pub fn set_tls_value(&self, key: u64, value: u64) {
661 self.tls.lock().set_value(key, value);
662 }
663
664 /// Get TLS value for this thread
665 #[cfg(feature = "alloc")]
666 pub fn get_tls_value(&self, key: u64) -> Option<u64> {
667 self.tls.lock().get_value(key)
668 }
669
670 /// Remove TLS value for this thread
671 #[cfg(feature = "alloc")]
672 pub fn remove_tls_value(&self, key: u64) -> Option<u64> {
673 self.tls.lock().remove_value(key)
674 }
675
676 /// Get all TLS keys for this thread
677 #[cfg(feature = "alloc")]
678 pub fn get_tls_keys(&self) -> alloc::vec::Vec<u64> {
679 self.tls.lock().keys().copied().collect()
680 }
681
682 /// Set thread entry point
683 pub fn set_entry_point(&mut self, entry: usize) {
684 self.context.get_mut().set_instruction_pointer(entry);
685 }
686
687 /// Reset thread context for exec
688 pub fn reset_context(&mut self) {
689 // Reset to initial state
690 *self.context.get_mut() = ArchThreadContext::default();
691 self.state
692 .store(ThreadState::Ready as u32, Ordering::Release);
693 self.cpu_time.store(0, Ordering::Relaxed);
694 self.time_slice.store(10, Ordering::Relaxed); // Default time slice
695 }
696
697 /// Get filesystem view (cwd/umask)
698 #[cfg(feature = "alloc")]
699 pub fn fs(&self) -> Arc<ThreadFs> {
700 self.fs.clone()
701 }
702}
703
704/// Builder for creating new threads with specific configurations.
705///
706/// `ThreadBuilder` follows the builder pattern to construct a `Thread`
707/// with custom stack sizes, priority, CPU affinity, TLS base, and
708/// filesystem state. The [`build`](Self::build) method allocates physical
709/// frames for both user and kernel stacks, assigns virtual address regions
710/// with guard pages, and initializes the thread's CPU context.
711///
712/// # Example
713///
714/// ```ignore
715/// let thread = ThreadBuilder::new(pid, "worker".into(), entry_fn as usize)
716/// .user_stack_size(2 * 1024 * 1024) // 2 MB user stack
717/// .priority(3)
718/// .cpu_affinity(0x3) // CPUs 0 and 1
719/// .build()?;
720/// ```
721///
722/// # Defaults
723///
724/// - User stack: 1 MB
725/// - Kernel stack: 64 KB
726/// - Priority: 2 (normal)
727/// - CPU affinity: all CPUs (usize::MAX)
728/// - TLS base: None (no TLS)
729/// - Filesystem state: new root ("/", umask 0o022)
730#[cfg(feature = "alloc")]
731pub struct ThreadBuilder {
732 process: ProcessId,
733 name: String,
734 entry_point: usize,
735 user_stack_size: usize,
736 kernel_stack_size: usize,
737 priority: u8,
738 cpu_affinity: usize,
739 clear_tid: usize,
740 tls_base: Option<usize>,
741 fs: Option<Arc<ThreadFs>>,
742}
743
744#[cfg(feature = "alloc")]
745impl ThreadBuilder {
746 /// Create a new thread builder with required parameters.
747 ///
748 /// # Arguments
749 /// - `process`: The parent process ID that owns this thread.
750 /// - `name`: Human-readable thread name (for debugging/logging).
751 /// - `entry_point`: Virtual address where thread execution begins.
752 pub fn new(process: ProcessId, name: String, entry_point: usize) -> Self {
753 Self {
754 process,
755 name,
756 entry_point,
757 user_stack_size: 1024 * 1024, // 1MB default
758 kernel_stack_size: 64 * 1024, // 64KB default
759 priority: 2,
760 cpu_affinity: usize::MAX,
761 clear_tid: 0,
762 tls_base: None,
763 fs: None,
764 }
765 }
766
767 /// Set user stack size
768 pub fn user_stack_size(mut self, size: usize) -> Self {
769 self.user_stack_size = size;
770 self
771 }
772
773 /// Set kernel stack size
774 pub fn kernel_stack_size(mut self, size: usize) -> Self {
775 self.kernel_stack_size = size;
776 self
777 }
778
779 /// Set priority
780 pub fn priority(mut self, priority: u8) -> Self {
781 self.priority = priority;
782 self
783 }
784
785 /// Set CPU affinity
786 pub fn cpu_affinity(mut self, mask: usize) -> Self {
787 self.cpu_affinity = mask;
788 self
789 }
790
791 /// Set clear_tid pointer for CLONE_CHILD_CLEARTID
792 pub fn clear_tid(mut self, ptr: usize) -> Self {
793 self.clear_tid = ptr;
794 self
795 }
796
797 /// Set the filesystem view (cwd/umask) for the new thread.
798 ///
799 /// When creating a thread via `clone()` with `CLONE_FS`, pass a
800 /// shared `Arc<ThreadFs>` from the parent. Without `CLONE_FS`, pass
801 /// a copy via [`ThreadFs::clone_copy`]. If not set, the builder
802 /// defaults to a new root filesystem state.
803 pub fn fs(mut self, fs: Arc<ThreadFs>) -> Self {
804 self.fs = Some(fs);
805 self
806 }
807
808 /// Set the TLS (Thread-Local Storage) base address for the new thread.
809 ///
810 /// This corresponds to `CLONE_SETTLS` on Linux or `arch_prctl(ARCH_SET_FS)`
811 /// on x86_64. The base address is written into the architecture-specific
812 /// TLS register (FS base on x86_64, TPIDR_EL0 on AArch64, tp on RISC-V)
813 /// when the thread is first scheduled.
814 pub fn tls_base(mut self, base: usize) -> Self {
815 self.tls_base = Some(base);
816 self
817 }
818
819 /// Build the thread with real stack allocation.
820 ///
821 /// Allocates physical frames for the **kernel** stack via the global frame
822 /// allocator. User stack frames are NOT allocated here because every caller
823 /// immediately maps user stack pages through the VAS (via `map_page()`),
824 /// which allocates its own tracked frames. Allocating frames here would
825 /// orphan them (never freed, never used), leaking 64 pages (256 KB) per
826 /// thread -- the root cause of the native-compilation OOM.
827 ///
828 /// Kernel stack frames are stored in `Stack::phys_frame` so that
829 /// `cleanup_process()` can free them without walking page tables.
830 ///
831 /// Each stack gets a guard page (unmapped) below it to detect overflow.
832 /// Stack pointers are set to the top of each allocated region since stacks
833 /// grow downward on all supported architectures (x86_64, AArch64, RISC-V).
834 pub fn build(self) -> Result<Thread, KernelError> {
835 let tid = super::alloc_tid();
836
837 // Calculate page counts for stacks
838 let user_stack_pages = self.user_stack_size.div_ceil(FRAME_SIZE);
839 let kernel_stack_pages = self.kernel_stack_size.div_ceil(FRAME_SIZE);
840
841 // NOTE: User stack frames are NOT allocated here. Every code path that
842 // calls build() subsequently maps user stack pages through the VAS:
843 // - create_process_with_options(): calls map_page() for each page
844 // - exec_process(): calls map_page() for re-mapped stack
845 // - fork_process(): clone_from() deep-copies parent's VAS pages
846 // Those VAS-managed frames are the ones actually used and properly
847 // freed by VAS::clear(). Allocating frames here would orphan them.
848
849 // Allocate physical frames for kernel stack
850 let kernel_frame = allocate_stack_frames(kernel_stack_pages).inspect_err(|_| {
851 crate::println!(
852 "[THREAD] Failed to allocate {} kernel stack frames for tid {}",
853 kernel_stack_pages,
854 tid.0
855 );
856 })?;
857 let kernel_stack_phys = kernel_frame.as_addr().as_usize();
858
859 // Compute virtual addresses for stacks.
860 // Use the thread index (tid) to space stacks apart so each thread
861 // gets a unique region. Each region includes a guard page below.
862 let thread_index = tid.0 as usize;
863
864 // Kernel stack virtual address: each thread gets
865 // (kernel_stack_pages + GUARD_PAGE_COUNT) pages of virtual space
866 let kernel_region_size = (kernel_stack_pages + GUARD_PAGE_COUNT) * FRAME_SIZE;
867 let kernel_stack_base =
868 KERNEL_STACK_REGION_BASE - ((thread_index + 1) * kernel_region_size);
869 // Skip guard page at the bottom
870 let kernel_stack_usable_base = kernel_stack_base + (GUARD_PAGE_COUNT * FRAME_SIZE);
871
872 // User stack virtual address: similar layout in user space
873 let user_region_size = (user_stack_pages + GUARD_PAGE_COUNT) * FRAME_SIZE;
874 let user_stack_base = USER_STACK_REGION_BASE - ((thread_index + 1) * user_region_size);
875 let user_stack_usable_base = user_stack_base + (GUARD_PAGE_COUNT * FRAME_SIZE);
876
877 // Calculate actual stack sizes based on full pages
878 let user_stack_size = user_stack_pages * FRAME_SIZE;
879 let kernel_stack_size = kernel_stack_pages * FRAME_SIZE;
880
881 // Zero the kernel stack region for safety
882 // SAFETY: `kernel_stack_phys` is the physical address of frames we
883 // just allocated from the frame allocator. On x86_64 with bootloader
884 // 0.11, physical memory is mapped at a dynamic offset (not identity-
885 // mapped), so we must convert via phys_to_virt_addr(). We write
886 // zeroes to exactly `kernel_stack_size` bytes. No other code
887 // references these frames yet.
888 unsafe {
889 let virt = crate::mm::phys_to_virt_addr(kernel_stack_phys as u64);
890 core::ptr::write_bytes(virt as *mut u8, 0, kernel_stack_size);
891 }
892
893 let mut thread = Thread::new(
894 tid,
895 self.process,
896 self.name,
897 self.entry_point,
898 user_stack_usable_base,
899 user_stack_size,
900 kernel_stack_usable_base,
901 kernel_stack_size,
902 self.fs.unwrap_or_else(ThreadFs::new_root),
903 );
904
905 // Store kernel stack physical frame info for cleanup_process() to free.
906 thread
907 .kernel_stack
908 .phys_frame
909 .store(kernel_frame.as_u64(), Ordering::Release);
910 thread
911 .kernel_stack
912 .phys_page_count
913 .store(kernel_stack_pages, Ordering::Release);
914
915 thread.priority = self.priority;
916 thread.set_affinity(self.cpu_affinity);
917 thread.clear_tid.store(self.clear_tid, Ordering::Release);
918 if let Some(base) = self.tls_base {
919 let mut tls = thread.tls.lock();
920 tls.set_tls_base(base);
921 // Seed the arch context with the TLS base so the first user entry sees it
922 thread.context.lock().set_tls_base(base as u64);
923 }
924
925 crate::println!(
926 "[THREAD] Allocated stacks for tid {}: user={:#x}..{:#x}, kernel={:#x}..{:#x} \
927 (phys={:#x}), guard pages installed",
928 tid.0,
929 user_stack_usable_base,
930 user_stack_usable_base + user_stack_size,
931 kernel_stack_usable_base,
932 kernel_stack_usable_base + kernel_stack_size,
933 kernel_stack_phys,
934 );
935
936 #[cfg(target_arch = "x86_64")]
937 // SAFETY: raw_serial_str writes directly to the COM1 serial port (0x3F8).
938 // This is safe to call at any point during kernel execution.
939 unsafe {
940 crate::arch::x86_64::idt::raw_serial_str(b"[THREAD] build() returning Ok\n");
941 }
942
943 Ok(thread)
944 }
945}