⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/sched/
load_balance.rs

1//! Load balancing and task migration between CPUs
2//!
3//! Implements periodic load balancing across CPUs, task migration from
4//! overloaded to underloaded CPUs, and deferred cleanup of dead tasks.
5
6use core::sync::atomic::Ordering;
7
8use super::{metrics, smp, task::Task};
9
10/// Wrapper to make NonNull<Task> Send/Sync for load balancing data structures.
11///
12/// # Safety
13///
14/// TaskPtr instances in load balancing are only accessed under appropriate
15/// locks (cleanup queue mutex or CPU ready queue locks). Task memory is
16/// managed by the kernel allocator.
17#[derive(Clone, Copy)]
18struct TaskPtr(core::ptr::NonNull<Task>);
19
20// SAFETY: TaskPtr is only accessed under mutex locks in the cleanup queue or
21// during load balancing with CPU ready queue locks held. No unsynchronized
22// concurrent access occurs.
23unsafe impl Send for TaskPtr {}
24// SAFETY: Same as Send -- all access is synchronized via mutexes.
25unsafe impl Sync for TaskPtr {}
26
27/// Clean up dead tasks that have been marked for deferred deallocation
28#[cfg(feature = "alloc")]
29pub fn cleanup_dead_tasks() {
30    extern crate alloc;
31    use alloc::{boxed::Box, vec::Vec};
32
33    use spin::Lazy;
34
35    static CLEANUP_QUEUE: Lazy<spin::Mutex<Vec<(TaskPtr, u64)>>> =
36        Lazy::new(|| spin::Mutex::new(Vec::new()));
37
38    let current_tick = crate::arch::timer::get_ticks();
39    let mut queue = CLEANUP_QUEUE.lock();
40
41    // Find tasks that are ready to be cleaned up
42    let mut i = 0;
43    while i < queue.len() {
44        let (TaskPtr(task_ptr), cleanup_tick) = queue[i];
45
46        if current_tick >= cleanup_tick {
47            // Remove from queue
48            queue.swap_remove(i);
49
50            // SAFETY: This task pointer was placed in the cleanup queue by
51            // `exit_task` after being removed from the scheduler. We waited
52            // at least 100 ticks (the cleanup delay) to ensure no other CPU
53            // holds a reference to this task. The pointer was originally
54            // created via `Box::leak` and is valid to reconstruct.
55            unsafe {
56                let task_box = Box::from_raw(task_ptr.as_ptr());
57                drop(task_box);
58            }
59
60            kprintln!("[SCHED] Cleaned up dead task");
61        } else {
62            i += 1;
63        }
64    }
65}
66
67/// Perform load balancing across CPUs
68#[cfg(feature = "alloc")]
69pub fn balance_load() {
70    use core::sync::atomic::Ordering;
71
72    // Find most loaded and least loaded CPUs
73    let mut max_load = 0u8;
74    let mut min_load = 100u8;
75    let mut busiest_cpu = 0u8;
76    let mut idlest_cpu = 0u8;
77
78    for cpu_id in 0..smp::MAX_CPUS as u8 {
79        if let Some(cpu_data) = smp::per_cpu(cpu_id) {
80            if cpu_data.cpu_info.is_online() {
81                let load = cpu_data.cpu_info.load.load(Ordering::Relaxed);
82
83                if load > max_load {
84                    max_load = load;
85                    busiest_cpu = cpu_id;
86                }
87
88                if load < min_load {
89                    min_load = load;
90                    idlest_cpu = cpu_id;
91                }
92            }
93        }
94    }
95
96    // If imbalance is significant, migrate tasks
97    let imbalance = max_load.saturating_sub(min_load);
98    if imbalance > 20 {
99        // Calculate how many tasks to migrate
100        let tasks_to_migrate = ((imbalance / 20) as u32).min(3); // Migrate up to 3 tasks
101
102        if tasks_to_migrate > 0 {
103            kprintln!("[SCHED] Load balancing: migrating tasks");
104
105            // Record load balance metric
106            metrics::SCHEDULER_METRICS.record_load_balance();
107
108            // Perform actual task migration
109            migrate_tasks(busiest_cpu, idlest_cpu, tasks_to_migrate);
110        }
111    }
112}
113
114/// Migrate tasks from source CPU to target CPU
115#[cfg(feature = "alloc")]
116fn migrate_tasks(source_cpu: u8, target_cpu: u8, count: u32) {
117    use alloc::vec::Vec;
118    let mut migrated = 0u32;
119
120    // Try to get tasks from source CPU's ready queue
121    if let Some(source_cpu_data) = smp::per_cpu(source_cpu) {
122        // Collect tasks to migrate
123        let mut tasks_to_migrate = Vec::new();
124
125        {
126            let mut queue = source_cpu_data.cpu_info.ready_queue.lock();
127
128            // Try to dequeue tasks that can run on target CPU
129            for _ in 0..count {
130                if let Some(task_ptr) = queue.dequeue() {
131                    // SAFETY: `task_ptr` is a valid NonNull<Task> returned by
132                    // `queue.dequeue()`. We hold the queue lock so the task
133                    // is not concurrently modified. We read `can_run_on` to
134                    // check affinity.
135                    unsafe {
136                        let task = task_ptr.as_ref();
137                        if task.can_run_on(target_cpu) {
138                            tasks_to_migrate.push(task_ptr);
139                        } else {
140                            // Put it back if it can't run on target
141                            queue.enqueue(task_ptr);
142                        }
143                    }
144                }
145            }
146
147            // Update source CPU load
148            source_cpu_data
149                .cpu_info
150                .nr_running
151                .fetch_sub(tasks_to_migrate.len() as u32, Ordering::Relaxed);
152            source_cpu_data.cpu_info.update_load();
153        }
154
155        // Migrate collected tasks to target CPU
156        if let Some(target_cpu_data) = smp::per_cpu(target_cpu) {
157            let mut target_queue = target_cpu_data.cpu_info.ready_queue.lock();
158
159            for task_ptr in tasks_to_migrate {
160                // SAFETY: `task_ptr` is a valid NonNull<Task> that was just
161                // dequeued from the source CPU. We hold the target queue lock
162                // and update the task's migration tracking fields before
163                // enqueuing it on the target CPU.
164                unsafe {
165                    let task_mut = task_ptr.as_ptr();
166
167                    // Update task's CPU assignment
168                    (*task_mut).last_cpu = Some(source_cpu);
169                    (*task_mut).migrations += 1;
170
171                    // Enqueue on target CPU
172                    target_queue.enqueue(task_ptr);
173                    migrated += 1;
174                }
175            }
176
177            // Update target CPU load
178            target_cpu_data
179                .cpu_info
180                .nr_running
181                .fetch_add(migrated, Ordering::Relaxed);
182            target_cpu_data.cpu_info.update_load();
183
184            // Wake up target CPU if idle
185            if target_cpu_data.cpu_info.is_idle() {
186                smp::send_ipi(target_cpu, 0);
187            }
188        }
189
190        if migrated > 0 {
191            kprintln!("[SCHED] Successfully migrated tasks");
192
193            // Record migration metrics
194            for _ in 0..migrated {
195                metrics::SCHEDULER_METRICS.record_migration();
196            }
197        }
198    }
199}