veridian_kernel/sched/load_balance.rs
1//! Load balancing and task migration between CPUs
2//!
3//! Implements periodic load balancing across CPUs, task migration from
4//! overloaded to underloaded CPUs, and deferred cleanup of dead tasks.
5
6use core::sync::atomic::Ordering;
7
8use super::{metrics, smp, task::Task};
9
10/// Wrapper to make NonNull<Task> Send/Sync for load balancing data structures.
11///
12/// # Safety
13///
14/// TaskPtr instances in load balancing are only accessed under appropriate
15/// locks (cleanup queue mutex or CPU ready queue locks). Task memory is
16/// managed by the kernel allocator.
17#[derive(Clone, Copy)]
18struct TaskPtr(core::ptr::NonNull<Task>);
19
20// SAFETY: TaskPtr is only accessed under mutex locks in the cleanup queue or
21// during load balancing with CPU ready queue locks held. No unsynchronized
22// concurrent access occurs.
23unsafe impl Send for TaskPtr {}
24// SAFETY: Same as Send -- all access is synchronized via mutexes.
25unsafe impl Sync for TaskPtr {}
26
27/// Clean up dead tasks that have been marked for deferred deallocation
28#[cfg(feature = "alloc")]
29pub fn cleanup_dead_tasks() {
30 extern crate alloc;
31 use alloc::{boxed::Box, vec::Vec};
32
33 use spin::Lazy;
34
35 static CLEANUP_QUEUE: Lazy<spin::Mutex<Vec<(TaskPtr, u64)>>> =
36 Lazy::new(|| spin::Mutex::new(Vec::new()));
37
38 let current_tick = crate::arch::timer::get_ticks();
39 let mut queue = CLEANUP_QUEUE.lock();
40
41 // Find tasks that are ready to be cleaned up
42 let mut i = 0;
43 while i < queue.len() {
44 let (TaskPtr(task_ptr), cleanup_tick) = queue[i];
45
46 if current_tick >= cleanup_tick {
47 // Remove from queue
48 queue.swap_remove(i);
49
50 // SAFETY: This task pointer was placed in the cleanup queue by
51 // `exit_task` after being removed from the scheduler. We waited
52 // at least 100 ticks (the cleanup delay) to ensure no other CPU
53 // holds a reference to this task. The pointer was originally
54 // created via `Box::leak` and is valid to reconstruct.
55 unsafe {
56 let task_box = Box::from_raw(task_ptr.as_ptr());
57 drop(task_box);
58 }
59
60 kprintln!("[SCHED] Cleaned up dead task");
61 } else {
62 i += 1;
63 }
64 }
65}
66
67/// Perform load balancing across CPUs
68#[cfg(feature = "alloc")]
69pub fn balance_load() {
70 use core::sync::atomic::Ordering;
71
72 // Find most loaded and least loaded CPUs
73 let mut max_load = 0u8;
74 let mut min_load = 100u8;
75 let mut busiest_cpu = 0u8;
76 let mut idlest_cpu = 0u8;
77
78 for cpu_id in 0..smp::MAX_CPUS as u8 {
79 if let Some(cpu_data) = smp::per_cpu(cpu_id) {
80 if cpu_data.cpu_info.is_online() {
81 let load = cpu_data.cpu_info.load.load(Ordering::Relaxed);
82
83 if load > max_load {
84 max_load = load;
85 busiest_cpu = cpu_id;
86 }
87
88 if load < min_load {
89 min_load = load;
90 idlest_cpu = cpu_id;
91 }
92 }
93 }
94 }
95
96 // If imbalance is significant, migrate tasks
97 let imbalance = max_load.saturating_sub(min_load);
98 if imbalance > 20 {
99 // Calculate how many tasks to migrate
100 let tasks_to_migrate = ((imbalance / 20) as u32).min(3); // Migrate up to 3 tasks
101
102 if tasks_to_migrate > 0 {
103 kprintln!("[SCHED] Load balancing: migrating tasks");
104
105 // Record load balance metric
106 metrics::SCHEDULER_METRICS.record_load_balance();
107
108 // Perform actual task migration
109 migrate_tasks(busiest_cpu, idlest_cpu, tasks_to_migrate);
110 }
111 }
112}
113
114/// Migrate tasks from source CPU to target CPU
115#[cfg(feature = "alloc")]
116fn migrate_tasks(source_cpu: u8, target_cpu: u8, count: u32) {
117 use alloc::vec::Vec;
118 let mut migrated = 0u32;
119
120 // Try to get tasks from source CPU's ready queue
121 if let Some(source_cpu_data) = smp::per_cpu(source_cpu) {
122 // Collect tasks to migrate
123 let mut tasks_to_migrate = Vec::new();
124
125 {
126 let mut queue = source_cpu_data.cpu_info.ready_queue.lock();
127
128 // Try to dequeue tasks that can run on target CPU
129 for _ in 0..count {
130 if let Some(task_ptr) = queue.dequeue() {
131 // SAFETY: `task_ptr` is a valid NonNull<Task> returned by
132 // `queue.dequeue()`. We hold the queue lock so the task
133 // is not concurrently modified. We read `can_run_on` to
134 // check affinity.
135 unsafe {
136 let task = task_ptr.as_ref();
137 if task.can_run_on(target_cpu) {
138 tasks_to_migrate.push(task_ptr);
139 } else {
140 // Put it back if it can't run on target
141 queue.enqueue(task_ptr);
142 }
143 }
144 }
145 }
146
147 // Update source CPU load
148 source_cpu_data
149 .cpu_info
150 .nr_running
151 .fetch_sub(tasks_to_migrate.len() as u32, Ordering::Relaxed);
152 source_cpu_data.cpu_info.update_load();
153 }
154
155 // Migrate collected tasks to target CPU
156 if let Some(target_cpu_data) = smp::per_cpu(target_cpu) {
157 let mut target_queue = target_cpu_data.cpu_info.ready_queue.lock();
158
159 for task_ptr in tasks_to_migrate {
160 // SAFETY: `task_ptr` is a valid NonNull<Task> that was just
161 // dequeued from the source CPU. We hold the target queue lock
162 // and update the task's migration tracking fields before
163 // enqueuing it on the target CPU.
164 unsafe {
165 let task_mut = task_ptr.as_ptr();
166
167 // Update task's CPU assignment
168 (*task_mut).last_cpu = Some(source_cpu);
169 (*task_mut).migrations += 1;
170
171 // Enqueue on target CPU
172 target_queue.enqueue(task_ptr);
173 migrated += 1;
174 }
175 }
176
177 // Update target CPU load
178 target_cpu_data
179 .cpu_info
180 .nr_running
181 .fetch_add(migrated, Ordering::Relaxed);
182 target_cpu_data.cpu_info.update_load();
183
184 // Wake up target CPU if idle
185 if target_cpu_data.cpu_info.is_idle() {
186 smp::send_ipi(target_cpu, 0);
187 }
188 }
189
190 if migrated > 0 {
191 kprintln!("[SCHED] Successfully migrated tasks");
192
193 // Record migration metrics
194 for _ in 0..migrated {
195 metrics::SCHEDULER_METRICS.record_migration();
196 }
197 }
198 }
199}