1use alloc::{collections::BTreeMap, vec::Vec};
31use core::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
32
33use spin::RwLock;
34
35use crate::sync::once_lock::OnceLock;
36
37pub type NodeId = u32;
39
40pub type CpuId = u32;
42
43#[derive(Debug, Clone)]
49pub enum SratEntry {
50 ProcessorAffinity {
52 apic_id: u32,
53 domain: u32,
54 flags: u32,
55 },
56 MemoryAffinity {
58 domain: u32,
59 base: u64,
60 length: u64,
61 flags: u32,
62 },
63}
64
65pub fn parse_srat(srat_data: &[u8]) -> Vec<SratEntry> {
70 let mut entries = Vec::new();
71 let header_size = 48; if srat_data.len() < header_size {
74 return entries;
75 }
76
77 let mut offset = header_size;
78 while offset + 2 <= srat_data.len() {
79 let entry_type = srat_data[offset];
80 let entry_len = srat_data[offset + 1] as usize;
81
82 if entry_len < 2 || offset + entry_len > srat_data.len() {
83 break;
84 }
85
86 match entry_type {
87 0 => {
88 if entry_len >= 16 && offset + 16 <= srat_data.len() {
90 let domain_low = srat_data[offset + 2] as u32;
92 let domain_high = (srat_data[offset + 9] as u32)
93 | ((srat_data[offset + 10] as u32) << 8)
94 | ((srat_data[offset + 11] as u32) << 16);
95 let domain = domain_low | (domain_high << 8);
96 let apic_id = srat_data[offset + 3] as u32;
97 let flags = u32::from_le_bytes([
98 srat_data[offset + 4],
99 srat_data[offset + 5],
100 srat_data[offset + 6],
101 srat_data[offset + 7],
102 ]);
103 entries.push(SratEntry::ProcessorAffinity {
104 apic_id,
105 domain,
106 flags,
107 });
108 }
109 }
110 1 => {
111 if entry_len >= 40 && offset + 40 <= srat_data.len() {
113 let domain = u32::from_le_bytes([
114 srat_data[offset + 2],
115 srat_data[offset + 3],
116 srat_data[offset + 4],
117 srat_data[offset + 5],
118 ]);
119 let base = u64::from_le_bytes([
120 srat_data[offset + 8],
121 srat_data[offset + 9],
122 srat_data[offset + 10],
123 srat_data[offset + 11],
124 srat_data[offset + 12],
125 srat_data[offset + 13],
126 srat_data[offset + 14],
127 srat_data[offset + 15],
128 ]);
129 let length = u64::from_le_bytes([
130 srat_data[offset + 16],
131 srat_data[offset + 17],
132 srat_data[offset + 18],
133 srat_data[offset + 19],
134 srat_data[offset + 20],
135 srat_data[offset + 21],
136 srat_data[offset + 22],
137 srat_data[offset + 23],
138 ]);
139 let flags = u32::from_le_bytes([
140 srat_data[offset + 28],
141 srat_data[offset + 29],
142 srat_data[offset + 30],
143 srat_data[offset + 31],
144 ]);
145 entries.push(SratEntry::MemoryAffinity {
146 domain,
147 base,
148 length,
149 flags,
150 });
151 }
152 }
153 _ => {
154 }
156 }
157
158 offset += entry_len;
159 }
160
161 entries
162}
163
164#[derive(Debug, Clone)]
170pub struct SlitEntry {
171 pub distances: Vec<Vec<u8>>,
173}
174
175pub fn parse_slit(slit_data: &[u8]) -> SlitEntry {
179 let header_size = 36;
180 if slit_data.len() < header_size + 8 {
181 return SlitEntry {
182 distances: Vec::new(),
183 };
184 }
185
186 let num_localities = u64::from_le_bytes([
187 slit_data[header_size],
188 slit_data[header_size + 1],
189 slit_data[header_size + 2],
190 slit_data[header_size + 3],
191 slit_data[header_size + 4],
192 slit_data[header_size + 5],
193 slit_data[header_size + 6],
194 slit_data[header_size + 7],
195 ]) as usize;
196
197 let matrix_start = header_size + 8;
198 let matrix_size = num_localities * num_localities;
199
200 if slit_data.len() < matrix_start + matrix_size {
201 return SlitEntry {
202 distances: Vec::new(),
203 };
204 }
205
206 let mut distances = Vec::with_capacity(num_localities);
207 for from in 0..num_localities {
208 let mut row = Vec::with_capacity(num_localities);
209 for to in 0..num_localities {
210 row.push(slit_data[matrix_start + from * num_localities + to]);
211 }
212 distances.push(row);
213 }
214
215 SlitEntry { distances }
216}
217
218pub fn get_distance(slit: &SlitEntry, from_node: u32, to_node: u32) -> u8 {
220 let from = from_node as usize;
221 let to = to_node as usize;
222 if from < slit.distances.len() && to < slit.distances[from].len() {
223 slit.distances[from][to]
224 } else {
225 255 }
227}
228
229#[derive(Debug, Clone)]
235pub struct CpuInfo {
236 pub apic_id: u32,
238 pub processor_id: u32,
240 pub enabled: bool,
242}
243
244pub fn parse_madt_topology() -> Vec<CpuInfo> {
249 #[cfg(target_arch = "x86_64")]
250 {
251 if let Some(cpus) = crate::arch::x86_64::acpi::find_madt_cpus() {
252 return cpus
253 .into_iter()
254 .map(|(apic_id, proc_id, usable)| CpuInfo {
255 apic_id,
256 processor_id: proc_id,
257 enabled: usable,
258 })
259 .collect();
260 }
261 }
262 Vec::new()
263}
264
265#[derive(Debug, Clone)]
271pub struct NumaNode {
272 pub domain_id: u32,
274 pub cpus: Vec<u32>,
276 pub memory_base: u64,
278 pub memory_size: u64,
280}
281
282pub fn build_topology(srat: &[SratEntry], slit: &SlitEntry) -> NumaTopology {
284 let mut nodes: BTreeMap<u32, NumaNode> = BTreeMap::new();
286
287 for entry in srat {
288 match entry {
289 SratEntry::ProcessorAffinity {
290 apic_id,
291 domain,
292 flags,
293 } => {
294 if flags & 1 == 0 {
296 continue;
297 }
298 let node = nodes.entry(*domain).or_insert_with(|| NumaNode {
299 domain_id: *domain,
300 cpus: Vec::new(),
301 memory_base: 0,
302 memory_size: 0,
303 });
304 node.cpus.push(*apic_id);
305 }
306 SratEntry::MemoryAffinity {
307 domain,
308 base,
309 length,
310 flags,
311 } => {
312 if flags & 1 == 0 {
314 continue;
315 }
316 let node = nodes.entry(*domain).or_insert_with(|| NumaNode {
317 domain_id: *domain,
318 cpus: Vec::new(),
319 memory_base: 0,
320 memory_size: 0,
321 });
322 if node.memory_size == 0 {
323 node.memory_base = *base;
324 }
325 node.memory_size += *length;
326 }
327 }
328 }
329
330 let node_count = nodes.len().max(1);
331 let mut cpus_per_node = Vec::with_capacity(node_count);
332 let mut memory_per_node = Vec::with_capacity(node_count);
333
334 for node in nodes.values() {
335 cpus_per_node.push(node.cpus.clone());
336 memory_per_node.push(node.memory_size);
337 }
338
339 let mut distance_matrix = Vec::with_capacity(node_count);
341 for from in 0..node_count {
342 let mut row = Vec::with_capacity(node_count);
343 for to in 0..node_count {
344 let dist = get_distance(slit, from as u32, to as u32);
345 row.push(dist as u32);
346 }
347 distance_matrix.push(row);
348 }
349
350 if distance_matrix.is_empty() || distance_matrix[0].is_empty() {
352 distance_matrix.clear();
353 for i in 0..node_count {
354 let mut row = Vec::with_capacity(node_count);
355 for j in 0..node_count {
356 row.push(if i == j { 10 } else { 20 });
357 }
358 distance_matrix.push(row);
359 }
360 }
361
362 NumaTopology {
363 node_count,
364 cpus_per_node,
365 memory_per_node,
366 distance_matrix,
367 }
368}
369
370#[derive(Debug, Clone)]
376pub struct NumaTopology {
377 pub node_count: usize,
379 pub cpus_per_node: Vec<Vec<CpuId>>,
381 pub memory_per_node: Vec<u64>,
383 pub distance_matrix: Vec<Vec<u32>>,
385}
386
387impl NumaTopology {
388 pub fn new() -> Self {
390 Self {
391 node_count: 1,
392 cpus_per_node: Vec::new(),
393 memory_per_node: Vec::new(),
394 distance_matrix: Vec::new(),
395 }
396 }
397
398 pub fn detect() -> Self {
403 #[cfg(target_arch = "x86_64")]
405 {
406 if let Some(topo) = detect_from_acpi() {
407 return topo;
408 }
409 }
410
411 detect_uma_fallback()
413 }
414
415 pub fn cpu_to_node(&self, cpu: CpuId) -> Option<NodeId> {
417 for (node_id, cpus) in self.cpus_per_node.iter().enumerate() {
418 if cpus.contains(&cpu) {
419 return Some(node_id as NodeId);
420 }
421 }
422 None
423 }
424
425 pub fn distance(&self, from: NodeId, to: NodeId) -> u32 {
427 if from as usize >= self.node_count || to as usize >= self.node_count {
428 return u32::MAX;
429 }
430 self.distance_matrix[from as usize][to as usize]
431 }
432
433 pub fn same_node(&self, cpu1: CpuId, cpu2: CpuId) -> bool {
435 match (self.cpu_to_node(cpu1), self.cpu_to_node(cpu2)) {
436 (Some(n1), Some(n2)) => n1 == n2,
437 _ => false,
438 }
439 }
440}
441
442impl Default for NumaTopology {
443 fn default() -> Self {
444 Self::detect()
445 }
446}
447
448#[cfg(target_arch = "x86_64")]
450fn detect_from_acpi() -> Option<NumaTopology> {
451 let srat_data = crate::arch::x86_64::acpi::find_srat()?;
452 let srat_entries = parse_srat(srat_data);
453 if srat_entries.is_empty() {
454 return None;
455 }
456
457 let slit = if let Some(slit_data) = crate::arch::x86_64::acpi::find_slit() {
458 parse_slit(slit_data)
459 } else {
460 SlitEntry {
461 distances: Vec::new(),
462 }
463 };
464
465 let topo = build_topology(&srat_entries, &slit);
466 if topo.cpus_per_node.is_empty() {
467 return None;
468 }
469
470 crate::println!(
471 "[NUMA] ACPI topology: {} nodes, {} total CPUs",
472 topo.node_count,
473 topo.cpus_per_node.iter().map(|c| c.len()).sum::<usize>()
474 );
475
476 Some(topo)
477}
478
479fn detect_uma_fallback() -> NumaTopology {
481 let mut topo = NumaTopology::new();
482 let cpu_count = detect_cpu_count();
483
484 let mut cpus = Vec::new();
485 for i in 0..cpu_count {
486 cpus.push(i);
487 }
488 topo.cpus_per_node.push(cpus);
489
490 let mem_stats = crate::mm::get_memory_stats();
492 let total_bytes = (mem_stats.total_frames as u64) * (crate::mm::FRAME_SIZE as u64);
493 let node_memory = if total_bytes > 0 {
494 total_bytes
495 } else {
496 256 * 1024 * 1024
497 };
498 topo.memory_per_node.push(node_memory);
499
500 topo.distance_matrix.push(alloc::vec![10]);
502
503 topo
504}
505
506#[derive(Debug)]
512pub struct NodeLoad {
513 pub process_count: AtomicUsize,
515 pub cpu_utilization: AtomicU64,
517 pub memory_pressure: AtomicU64,
519 pub queue_depth: AtomicUsize,
521}
522
523impl NodeLoad {
524 pub const fn new() -> Self {
525 Self {
526 process_count: AtomicUsize::new(0),
527 cpu_utilization: AtomicU64::new(0),
528 memory_pressure: AtomicU64::new(0),
529 queue_depth: AtomicUsize::new(0),
530 }
531 }
532
533 pub fn add_process(&self) {
535 self.process_count.fetch_add(1, Ordering::Relaxed);
536 }
537
538 pub fn remove_process(&self) {
540 self.process_count.fetch_sub(1, Ordering::Relaxed);
541 }
542
543 pub fn load_factor(&self) -> u64 {
545 let proc_count = self.process_count.load(Ordering::Relaxed) as u64;
546 let cpu_util = self.cpu_utilization.load(Ordering::Relaxed);
547 let mem_pressure = self.memory_pressure.load(Ordering::Relaxed);
548
549 (proc_count * 1000 + cpu_util * 40 + mem_pressure * 20) / 100
551 }
552}
553
554impl Default for NodeLoad {
555 fn default() -> Self {
556 Self::new()
557 }
558}
559
560pub struct NumaScheduler {
566 topology: NumaTopology,
568 node_loads: Vec<NodeLoad>,
570 process_nodes: RwLock<BTreeMap<u64, NodeId>>,
572}
573
574impl NumaScheduler {
575 pub fn new(topology: NumaTopology) -> Self {
577 let node_count = topology.node_count;
578
579 let mut node_loads = Vec::with_capacity(node_count);
580 for _ in 0..node_count {
581 node_loads.push(NodeLoad::new());
582 }
583
584 Self {
585 topology,
586 node_loads,
587 process_nodes: RwLock::new(BTreeMap::new()),
588 }
589 }
590
591 pub fn select_cpu(&self, process_id: u64, memory_node: Option<NodeId>) -> CpuId {
593 let target_node = if let Some(mem_node) = memory_node {
594 mem_node
595 } else {
596 self.find_least_loaded_node()
597 };
598
599 self.process_nodes.write().insert(process_id, target_node);
600 self.node_loads[target_node as usize].add_process();
601
602 self.select_cpu_in_node(target_node)
603 }
604
605 fn find_least_loaded_node(&self) -> NodeId {
607 let mut min_load = u64::MAX;
608 let mut best_node = 0;
609
610 for (node_id, load) in self.node_loads.iter().enumerate() {
611 let load_factor = load.load_factor();
612 if load_factor < min_load {
613 min_load = load_factor;
614 best_node = node_id as NodeId;
615 }
616 }
617
618 best_node
619 }
620
621 fn select_cpu_in_node(&self, node: NodeId) -> CpuId {
627 let cpus = &self.topology.cpus_per_node[node as usize];
628
629 if cpus.is_empty() {
630 return 0;
631 }
632
633 let mut best_cpu = cpus[0];
635 let mut min_queue = u32::MAX;
636
637 for &cpu in cpus {
638 if let Some(cpu_data) = super::smp::per_cpu(cpu as u8) {
639 let queue_len = cpu_data
640 .cpu_info
641 .nr_running
642 .load(core::sync::atomic::Ordering::Relaxed);
643 if queue_len < min_queue {
644 min_queue = queue_len;
645 best_cpu = cpu;
646 }
647 }
648 }
649
650 if min_queue == u32::MAX {
652 static RR_COUNTER: AtomicU64 = AtomicU64::new(0);
653 let idx = RR_COUNTER.fetch_add(1, Ordering::Relaxed) as usize % cpus.len();
654 return cpus[idx];
655 }
656
657 best_cpu
658 }
659
660 pub fn should_migrate(&self, process_id: u64) -> Option<NodeId> {
662 let current_node = self.process_nodes.read().get(&process_id).copied()?;
663 let current_load = self.node_loads[current_node as usize].load_factor();
664
665 for (node_id, load) in self.node_loads.iter().enumerate() {
666 if node_id == current_node as usize {
667 continue;
668 }
669
670 let other_load = load.load_factor();
671
672 if other_load < current_load * 70 / 100 {
674 return Some(node_id as NodeId);
675 }
676 }
677
678 None
679 }
680
681 pub fn migrate_process(&self, process_id: u64, new_node: NodeId) {
683 if let Some(old_node) = self.process_nodes.write().insert(process_id, new_node) {
684 self.node_loads[old_node as usize].remove_process();
685 }
686 self.node_loads[new_node as usize].add_process();
687 }
688
689 pub fn topology(&self) -> &NumaTopology {
691 &self.topology
692 }
693
694 pub fn node_load(&self, node: NodeId) -> Option<&NodeLoad> {
696 self.node_loads.get(node as usize)
697 }
698}
699
700fn detect_cpu_count() -> u32 {
705 let madt_cpus = parse_madt_topology();
707 if !madt_cpus.is_empty() {
708 let enabled_count = madt_cpus.iter().filter(|c| c.enabled).count() as u32;
709 if enabled_count > 0 {
710 return enabled_count;
711 }
712 }
713
714 let mut count: u32 = 0;
716 for cpu_id in 0..super::smp::MAX_CPUS as u8 {
717 if super::smp::per_cpu(cpu_id).is_some() {
718 count += 1;
719 }
720 }
721 if count == 0 {
722 1
723 } else {
724 count
725 }
726}
727
728static NUMA_SCHEDULER: OnceLock<NumaScheduler> = OnceLock::new();
730
731pub fn init() {
733 let topology = NumaTopology::detect();
734 let scheduler = NumaScheduler::new(topology);
735
736 if NUMA_SCHEDULER.set(scheduler).is_err() {
737 crate::kprintln!("[NUMA] Warning: NUMA scheduler already initialized, skipping");
738 }
739
740 crate::println!("[NUMA] Initialized NUMA-aware scheduler");
741}
742
743pub fn get_numa_scheduler() -> Option<&'static NumaScheduler> {
745 NUMA_SCHEDULER.get()
746}
747
748#[cfg(test)]
749mod tests {
750 #[allow(unused_imports)]
751 use alloc::vec;
752
753 use super::*;
754
755 #[test]
756 fn test_topology_detection() {
757 let topo = NumaTopology::detect();
758 assert!(topo.node_count > 0);
759 assert!(!topo.cpus_per_node.is_empty());
760 }
761
762 #[test]
763 fn test_node_load() {
764 let load = NodeLoad::new();
765 assert_eq!(load.process_count.load(Ordering::Relaxed), 0);
766
767 load.add_process();
768 assert_eq!(load.process_count.load(Ordering::Relaxed), 1);
769
770 load.remove_process();
771 assert_eq!(load.process_count.load(Ordering::Relaxed), 0);
772 }
773
774 #[test]
775 fn test_numa_scheduler() {
776 let topo = NumaTopology::detect();
777 let scheduler = NumaScheduler::new(topo);
778
779 let cpu = scheduler.select_cpu(1, None);
780 assert!(cpu < 8);
781 }
782
783 #[test]
784 fn test_parse_srat_empty() {
785 let entries = parse_srat(&[0u8; 48]);
786 assert!(entries.is_empty());
787 }
788
789 #[test]
790 fn test_parse_slit_basic() {
791 let mut data = vec![0u8; 36 + 8 + 4];
793 data[36] = 2;
795 data[44] = 10;
797 data[45] = 20;
798 data[46] = 20;
799 data[47] = 10;
800
801 let slit = parse_slit(&data);
802 assert_eq!(slit.distances.len(), 2);
803 assert_eq!(get_distance(&slit, 0, 0), 10);
804 assert_eq!(get_distance(&slit, 0, 1), 20);
805 assert_eq!(get_distance(&slit, 1, 0), 20);
806 assert_eq!(get_distance(&slit, 1, 1), 10);
807 }
808
809 #[test]
810 fn test_build_topology_basic() {
811 let srat = vec![
812 SratEntry::ProcessorAffinity {
813 apic_id: 0,
814 domain: 0,
815 flags: 1,
816 },
817 SratEntry::ProcessorAffinity {
818 apic_id: 1,
819 domain: 0,
820 flags: 1,
821 },
822 SratEntry::MemoryAffinity {
823 domain: 0,
824 base: 0,
825 length: 1024 * 1024 * 1024,
826 flags: 1,
827 },
828 ];
829 let slit = SlitEntry {
830 distances: Vec::new(),
831 };
832 let topo = build_topology(&srat, &slit);
833 assert_eq!(topo.node_count, 1);
834 assert_eq!(topo.cpus_per_node[0].len(), 2);
835 assert_eq!(topo.memory_per_node[0], 1024 * 1024 * 1024);
836 }
837
838 #[test]
839 fn test_disabled_entries_skipped() {
840 let srat = vec![
841 SratEntry::ProcessorAffinity {
842 apic_id: 0,
843 domain: 0,
844 flags: 0, },
846 SratEntry::ProcessorAffinity {
847 apic_id: 1,
848 domain: 0,
849 flags: 1, },
851 ];
852 let slit = SlitEntry {
853 distances: Vec::new(),
854 };
855 let topo = build_topology(&srat, &slit);
856 assert_eq!(topo.cpus_per_node[0].len(), 1);
857 assert_eq!(topo.cpus_per_node[0][0], 1);
858 }
859}