⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/mm/
cache_topology.rs

1//! Cache topology detection and cache-aware memory allocation
2//!
3//! Detects CPU cache hierarchy (L1/L2/L3) using architecture-specific
4//! mechanisms:
5//! - x86_64: CPUID leaf 4 (Intel) and leaf 0x8000001D (AMD)
6//! - AArch64: hardcoded defaults (Cortex-A72)
7//! - RISC-V: hardcoded defaults
8//!
9//! Provides cache coloring support for NUMA-aware, cache-friendly page
10//! allocation.
11
12#![allow(dead_code)]
13
14use core::sync::atomic::{AtomicBool, Ordering};
15
16use spin::RwLock;
17
18use crate::error::KernelError;
19
20/// Maximum number of cache levels we track (L1D, L1I, L2, L3)
21const MAX_CACHE_LEVELS: usize = 8;
22
23/// Cache type classification
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum CacheType {
26    /// Data cache (L1D)
27    Data,
28    /// Instruction cache (L1I)
29    Instruction,
30    /// Unified cache (L2, L3)
31    Unified,
32}
33
34/// Cache level
35#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
36pub enum CacheLevel {
37    L1,
38    L2,
39    L3,
40}
41
42/// Information about a single cache level
43#[derive(Debug, Clone, Copy)]
44pub struct CacheInfo {
45    /// Cache level (L1, L2, L3)
46    pub level: CacheLevel,
47    /// Cache type (data, instruction, unified)
48    pub cache_type: CacheType,
49    /// Cache line size in bytes
50    pub line_size: u32,
51    /// Number of ways of associativity
52    pub ways: u32,
53    /// Number of sets
54    pub sets: u32,
55    /// Total cache size in bytes (line_size * ways * sets)
56    pub total_size: u64,
57}
58
59impl CacheInfo {
60    /// Create a new CacheInfo with computed total_size
61    pub const fn new(
62        level: CacheLevel,
63        cache_type: CacheType,
64        line_size: u32,
65        ways: u32,
66        sets: u32,
67    ) -> Self {
68        Self {
69            level,
70            cache_type,
71            line_size,
72            ways,
73            sets,
74            total_size: line_size as u64 * ways as u64 * sets as u64,
75        }
76    }
77}
78
79/// Complete cache topology for the current CPU
80#[derive(Debug)]
81pub struct CacheTopology {
82    /// Cache levels detected
83    caches: [Option<CacheInfo>; MAX_CACHE_LEVELS],
84    /// Number of valid cache entries
85    count: usize,
86    /// Number of cache colors for page coloring
87    num_colors: u16,
88    /// Page size used for color computation
89    page_size: u64,
90}
91
92impl CacheTopology {
93    /// Create an empty topology
94    const fn new() -> Self {
95        Self {
96            caches: [None; MAX_CACHE_LEVELS],
97            count: 0,
98            num_colors: 1,
99            page_size: 4096,
100        }
101    }
102
103    /// Add a cache level to the topology
104    fn add_cache(&mut self, info: CacheInfo) {
105        if self.count < MAX_CACHE_LEVELS {
106            self.caches[self.count] = Some(info);
107            self.count += 1;
108        }
109    }
110
111    /// Get the number of detected cache levels
112    pub fn cache_count(&self) -> usize {
113        self.count
114    }
115
116    /// Get cache info by index
117    pub fn get(&self, index: usize) -> Option<&CacheInfo> {
118        if index < self.count {
119            self.caches[index].as_ref()
120        } else {
121            None
122        }
123    }
124
125    /// Find the last-level cache (LLC), typically L3 or L2
126    pub fn last_level_cache(&self) -> Option<&CacheInfo> {
127        let mut best: Option<&CacheInfo> = None;
128        for i in 0..self.count {
129            if let Some(ref info) = self.caches[i] {
130                if info.cache_type != CacheType::Instruction {
131                    match best {
132                        None => best = Some(info),
133                        Some(b) if info.level > b.level => best = Some(info),
134                        _ => {}
135                    }
136                }
137            }
138        }
139        best
140    }
141
142    /// Find L1 data cache
143    pub fn l1_data(&self) -> Option<&CacheInfo> {
144        for i in 0..self.count {
145            if let Some(ref info) = self.caches[i] {
146                if info.level == CacheLevel::L1
147                    && (info.cache_type == CacheType::Data || info.cache_type == CacheType::Unified)
148                {
149                    return Some(info);
150                }
151            }
152        }
153        None
154    }
155
156    /// Find L2 cache
157    pub fn l2_cache(&self) -> Option<&CacheInfo> {
158        for i in 0..self.count {
159            if let Some(ref info) = self.caches[i] {
160                if info.level == CacheLevel::L2 {
161                    return Some(info);
162                }
163            }
164        }
165        None
166    }
167
168    /// Find L3 cache
169    pub fn l3_cache(&self) -> Option<&CacheInfo> {
170        for i in 0..self.count {
171            if let Some(ref info) = self.caches[i] {
172                if info.level == CacheLevel::L3 {
173                    return Some(info);
174                }
175            }
176        }
177        None
178    }
179
180    /// Get the number of cache colors
181    pub fn num_colors(&self) -> u16 {
182        self.num_colors
183    }
184
185    /// Compute cache colors based on LLC parameters
186    ///
187    /// Number of colors = LLC_size / (page_size * associativity)
188    /// This determines how many distinct "color bins" exist in the LLC.
189    fn compute_colors(&mut self) {
190        if let Some(llc) = self.last_level_cache() {
191            let page_size = self.page_size;
192            let assoc = llc.ways as u64;
193            if assoc > 0 && page_size > 0 {
194                let colors = llc.total_size / (page_size * assoc);
195                // Clamp to reasonable range [1, 4096]
196                self.num_colors = if colors == 0 {
197                    1
198                } else if colors > 4096 {
199                    4096
200                } else {
201                    colors as u16
202                };
203            }
204        }
205    }
206}
207
208/// Global cache topology (initialized once at boot)
209static CACHE_TOPOLOGY: RwLock<CacheTopology> = RwLock::new(CacheTopology::new());
210
211/// Whether init() has been called
212static INITIALIZED: AtomicBool = AtomicBool::new(false);
213
214// ---------------------------------------------------------------------------
215// x86_64 CPUID-based detection
216// ---------------------------------------------------------------------------
217
218/// Execute CPUID instruction with leaf and subleaf
219#[cfg(target_arch = "x86_64")]
220unsafe fn cpuid(leaf: u32, subleaf: u32) -> (u32, u32, u32, u32) {
221    let (eax, ebx, ecx, edx): (u32, u32, u32, u32);
222    // SAFETY: CPUID is always available on x86_64. We save/restore rbx because
223    // LLVM reserves it. The push/pop pair preserves the frame pointer.
224    core::arch::asm!(
225        "push rbx",
226        "cpuid",
227        "mov {ebx_out:e}, ebx",
228        "pop rbx",
229        inout("eax") leaf => eax,
230        inout("ecx") subleaf => ecx,
231        ebx_out = out(reg) ebx,
232        out("edx") edx,
233    );
234    (eax, ebx, ecx, edx)
235}
236
237/// Detect cache topology on x86_64 using CPUID
238#[cfg(target_arch = "x86_64")]
239fn detect_x86_64(topology: &mut CacheTopology) {
240    // Try Intel deterministic cache parameters (leaf 4) first
241    detect_cpuid_leaf4(topology);
242
243    // If no caches found, try AMD extended topology (leaf 0x8000001D)
244    if topology.count == 0 {
245        detect_cpuid_amd(topology);
246    }
247
248    // If still no caches found, use sensible defaults
249    if topology.count == 0 {
250        apply_x86_defaults(topology);
251    }
252}
253
254/// Parse CPUID leaf 4 (Intel deterministic cache parameters)
255#[cfg(target_arch = "x86_64")]
256fn detect_cpuid_leaf4(topology: &mut CacheTopology) {
257    // Check max supported leaf
258    let (max_leaf, _, _, _) = unsafe { cpuid(0, 0) };
259    if max_leaf < 4 {
260        return;
261    }
262
263    for subleaf in 0..MAX_CACHE_LEVELS as u32 {
264        let (eax, ebx, ecx, _edx) = unsafe { cpuid(4, subleaf) };
265
266        // Cache type in EAX[4:0]: 0=no more, 1=data, 2=instruction, 3=unified
267        let cache_type_raw = eax & 0x1F;
268        if cache_type_raw == 0 {
269            break; // No more cache levels
270        }
271
272        let cache_type = match cache_type_raw {
273            1 => CacheType::Data,
274            2 => CacheType::Instruction,
275            3 => CacheType::Unified,
276            _ => continue,
277        };
278
279        // Cache level in EAX[7:5]
280        let level_raw = (eax >> 5) & 0x7;
281        let level = match level_raw {
282            1 => CacheLevel::L1,
283            2 => CacheLevel::L2,
284            3 => CacheLevel::L3,
285            _ => continue,
286        };
287
288        // EBX: line_size = EBX[11:0] + 1, partitions = EBX[21:12] + 1, ways =
289        // EBX[31:22] + 1
290        let line_size = (ebx & 0xFFF) + 1;
291        let partitions = ((ebx >> 12) & 0x3FF) + 1;
292        let ways = ((ebx >> 22) & 0x3FF) + 1;
293
294        // ECX: sets = ECX + 1
295        let sets = ecx + 1;
296
297        // Total size = ways * partitions * line_size * sets
298        let total_size = ways as u64 * partitions as u64 * line_size as u64 * sets as u64;
299
300        topology.add_cache(CacheInfo {
301            level,
302            cache_type,
303            line_size,
304            ways,
305            sets,
306            total_size,
307        });
308    }
309}
310
311/// Parse CPUID leaf 0x8000001D (AMD cache topology)
312#[cfg(target_arch = "x86_64")]
313fn detect_cpuid_amd(topology: &mut CacheTopology) {
314    // Check max extended leaf
315    let (max_ext, _, _, _) = unsafe { cpuid(0x80000000, 0) };
316    if max_ext < 0x8000001D {
317        return;
318    }
319
320    for subleaf in 0..MAX_CACHE_LEVELS as u32 {
321        let (eax, ebx, ecx, _edx) = unsafe { cpuid(0x8000001D, subleaf) };
322
323        // Same encoding as leaf 4
324        let cache_type_raw = eax & 0x1F;
325        if cache_type_raw == 0 {
326            break;
327        }
328
329        let cache_type = match cache_type_raw {
330            1 => CacheType::Data,
331            2 => CacheType::Instruction,
332            3 => CacheType::Unified,
333            _ => continue,
334        };
335
336        let level_raw = (eax >> 5) & 0x7;
337        let level = match level_raw {
338            1 => CacheLevel::L1,
339            2 => CacheLevel::L2,
340            3 => CacheLevel::L3,
341            _ => continue,
342        };
343
344        let line_size = (ebx & 0xFFF) + 1;
345        let partitions = ((ebx >> 12) & 0x3FF) + 1;
346        let ways = ((ebx >> 22) & 0x3FF) + 1;
347        let sets = ecx + 1;
348
349        let total_size = ways as u64 * partitions as u64 * line_size as u64 * sets as u64;
350
351        topology.add_cache(CacheInfo {
352            level,
353            cache_type,
354            line_size,
355            ways,
356            sets,
357            total_size,
358        });
359    }
360}
361
362/// Apply sensible x86_64 defaults when CPUID detection fails
363#[cfg(target_arch = "x86_64")]
364fn apply_x86_defaults(topology: &mut CacheTopology) {
365    // Generic modern x86_64 defaults
366    topology.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64)); // 32KB
367    topology.add_cache(CacheInfo::new(
368        CacheLevel::L1,
369        CacheType::Instruction,
370        64,
371        8,
372        64,
373    )); // 32KB
374    topology.add_cache(CacheInfo::new(
375        CacheLevel::L2,
376        CacheType::Unified,
377        64,
378        4,
379        1024,
380    )); // 256KB
381    topology.add_cache(CacheInfo::new(
382        CacheLevel::L3,
383        CacheType::Unified,
384        64,
385        16,
386        16384,
387    )); // 16MB
388}
389
390// ---------------------------------------------------------------------------
391// AArch64 hardcoded defaults (Cortex-A72)
392// ---------------------------------------------------------------------------
393
394#[cfg(target_arch = "aarch64")]
395fn detect_aarch64(topology: &mut CacheTopology) {
396    // Cortex-A72 cache parameters:
397    // L1I: 48KB, 3-way, 64B line -> sets = 48*1024/(3*64) = 256
398    topology.add_cache(CacheInfo::new(
399        CacheLevel::L1,
400        CacheType::Instruction,
401        64,
402        3,
403        256,
404    )); // 48KB
405
406    // L1D: 32KB, 2-way, 64B line -> sets = 32*1024/(2*64) = 256
407    topology.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 2, 256)); // 32KB
408
409    // L2: 1MB unified, 16-way, 64B line -> sets = 1*1024*1024/(16*64) = 1024
410    topology.add_cache(CacheInfo::new(
411        CacheLevel::L2,
412        CacheType::Unified,
413        64,
414        16,
415        1024,
416    )); // 1MB
417}
418
419// ---------------------------------------------------------------------------
420// RISC-V hardcoded defaults
421// ---------------------------------------------------------------------------
422
423#[cfg(target_arch = "riscv64")]
424fn detect_riscv64(topology: &mut CacheTopology) {
425    // Generic RISC-V defaults (SiFive U74-like)
426    // L1I: 32KB, 4-way, 64B line -> sets = 128
427    topology.add_cache(CacheInfo::new(
428        CacheLevel::L1,
429        CacheType::Instruction,
430        64,
431        4,
432        128,
433    )); // 32KB
434
435    // L1D: 32KB, 8-way, 64B line -> sets = 64
436    topology.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64)); // 32KB
437
438    // L2: 2MB unified, 16-way, 64B line -> sets = 2048
439    topology.add_cache(CacheInfo::new(
440        CacheLevel::L2,
441        CacheType::Unified,
442        64,
443        16,
444        2048,
445    )); // 2MB
446}
447
448// ---------------------------------------------------------------------------
449// Cache coloring
450// ---------------------------------------------------------------------------
451
452/// Determine which cache color a physical frame belongs to.
453///
454/// The color is derived from the physical address bits that index into
455/// the last-level cache. Frames with the same color compete for the
456/// same cache sets.
457pub fn frame_color(phys_addr: u64) -> u16 {
458    let topology = CACHE_TOPOLOGY.read();
459    let num_colors = topology.num_colors;
460    if num_colors <= 1 {
461        return 0;
462    }
463
464    // Color = (phys_addr / page_size) % num_colors
465    // This selects the LLC set group that this page maps to
466    let page_index = phys_addr / topology.page_size;
467    (page_index % num_colors as u64) as u16
468}
469
470/// Suggest a preferred cache color for a given process.
471///
472/// Distributes processes across colors to minimize LLC contention.
473/// Uses a simple modulo mapping: process_id % num_colors.
474pub fn preferred_color(process_id: u64) -> u16 {
475    let topology = CACHE_TOPOLOGY.read();
476    let num_colors = topology.num_colors;
477    if num_colors <= 1 {
478        return 0;
479    }
480    (process_id % num_colors as u64) as u16
481}
482
483/// Get the total number of cache colors available
484pub fn num_colors() -> u16 {
485    CACHE_TOPOLOGY.read().num_colors
486}
487
488// ---------------------------------------------------------------------------
489// Initialization and accessors
490// ---------------------------------------------------------------------------
491
492/// Initialize the cache topology subsystem.
493///
494/// Detects CPU cache hierarchy and computes cache coloring parameters.
495/// Must be called once during early kernel boot.
496pub fn init() -> Result<(), KernelError> {
497    if INITIALIZED.load(Ordering::Acquire) {
498        return Err(KernelError::AlreadyExists {
499            resource: "cache_topology",
500            id: 0,
501        });
502    }
503
504    let mut topology = CACHE_TOPOLOGY.write();
505
506    #[cfg(target_arch = "x86_64")]
507    detect_x86_64(&mut topology);
508
509    #[cfg(target_arch = "aarch64")]
510    detect_aarch64(&mut topology);
511
512    #[cfg(target_arch = "riscv64")]
513    detect_riscv64(&mut topology);
514
515    topology.compute_colors();
516
517    // Log detected topology
518    #[cfg(target_arch = "x86_64")]
519    {
520        kprintln!(
521            "[CACHE] Detected {} cache levels, {} colors",
522            topology.count,
523            topology.num_colors
524        );
525        for i in 0..topology.count {
526            if let Some(ref info) = topology.caches[i] {
527                kprintln!(
528                    "[CACHE]   {:?} {:?}: {}KB, {}B line, {}-way, {} sets",
529                    info.level,
530                    info.cache_type,
531                    info.total_size / 1024,
532                    info.line_size,
533                    info.ways,
534                    info.sets
535                );
536            }
537        }
538    }
539
540    drop(topology);
541    INITIALIZED.store(true, Ordering::Release);
542    Ok(())
543}
544
545/// Access the global cache topology (read-only).
546///
547/// Returns a read guard to the topology. Panics if called before init().
548pub fn get_cache_info() -> spin::RwLockReadGuard<'static, CacheTopology> {
549    CACHE_TOPOLOGY.read()
550}
551
552/// Check if cache topology has been initialized
553pub fn is_initialized() -> bool {
554    INITIALIZED.load(Ordering::Acquire)
555}
556
557// ---------------------------------------------------------------------------
558// Unit tests
559// ---------------------------------------------------------------------------
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564
565    #[test]
566    fn test_cache_info_new() {
567        let info = CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64);
568        assert_eq!(info.line_size, 64);
569        assert_eq!(info.ways, 8);
570        assert_eq!(info.sets, 64);
571        assert_eq!(info.total_size, 64 * 8 * 64); // 32KB
572        assert_eq!(info.level, CacheLevel::L1);
573        assert_eq!(info.cache_type, CacheType::Data);
574    }
575
576    #[test]
577    fn test_cache_info_l3() {
578        let info = CacheInfo::new(CacheLevel::L3, CacheType::Unified, 64, 16, 16384);
579        assert_eq!(info.total_size, 64 * 16 * 16384); // 16MB
580    }
581
582    #[test]
583    fn test_topology_add_and_get() {
584        let mut topo = CacheTopology::new();
585        assert_eq!(topo.cache_count(), 0);
586        assert!(topo.get(0).is_none());
587
588        topo.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64));
589        assert_eq!(topo.cache_count(), 1);
590        assert!(topo.get(0).is_some());
591        assert_eq!(topo.get(0).unwrap().level, CacheLevel::L1);
592        assert!(topo.get(1).is_none());
593    }
594
595    #[test]
596    fn test_topology_last_level_cache() {
597        let mut topo = CacheTopology::new();
598        topo.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64));
599        topo.add_cache(CacheInfo::new(
600            CacheLevel::L1,
601            CacheType::Instruction,
602            64,
603            8,
604            64,
605        ));
606        topo.add_cache(CacheInfo::new(
607            CacheLevel::L2,
608            CacheType::Unified,
609            64,
610            4,
611            1024,
612        ));
613        topo.add_cache(CacheInfo::new(
614            CacheLevel::L3,
615            CacheType::Unified,
616            64,
617            16,
618            16384,
619        ));
620
621        let llc = topo.last_level_cache().unwrap();
622        assert_eq!(llc.level, CacheLevel::L3);
623    }
624
625    #[test]
626    fn test_topology_llc_skips_instruction_cache() {
627        let mut topo = CacheTopology::new();
628        topo.add_cache(CacheInfo::new(
629            CacheLevel::L1,
630            CacheType::Instruction,
631            64,
632            8,
633            64,
634        ));
635        topo.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64));
636
637        let llc = topo.last_level_cache().unwrap();
638        assert_eq!(llc.cache_type, CacheType::Data);
639    }
640
641    #[test]
642    fn test_topology_l1_data() {
643        let mut topo = CacheTopology::new();
644        topo.add_cache(CacheInfo::new(
645            CacheLevel::L1,
646            CacheType::Instruction,
647            64,
648            8,
649            64,
650        ));
651        topo.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64));
652        topo.add_cache(CacheInfo::new(
653            CacheLevel::L2,
654            CacheType::Unified,
655            64,
656            4,
657            1024,
658        ));
659
660        let l1d = topo.l1_data().unwrap();
661        assert_eq!(l1d.cache_type, CacheType::Data);
662        assert_eq!(l1d.level, CacheLevel::L1);
663    }
664
665    #[test]
666    fn test_color_computation() {
667        let mut topo = CacheTopology::new();
668        // L3: 16MB, 16-way, 64B line, 16384 sets
669        topo.add_cache(CacheInfo::new(
670            CacheLevel::L3,
671            CacheType::Unified,
672            64,
673            16,
674            16384,
675        ));
676        topo.page_size = 4096;
677        topo.compute_colors();
678
679        // num_colors = 16MB / (4096 * 16) = 16777216 / 65536 = 256
680        assert_eq!(topo.num_colors, 256);
681    }
682
683    #[test]
684    fn test_color_computation_small_cache() {
685        let mut topo = CacheTopology::new();
686        // L2: 256KB, 4-way, 64B line, 1024 sets
687        topo.add_cache(CacheInfo::new(
688            CacheLevel::L2,
689            CacheType::Unified,
690            64,
691            4,
692            1024,
693        ));
694        topo.page_size = 4096;
695        topo.compute_colors();
696
697        // num_colors = 256KB / (4096 * 4) = 262144 / 16384 = 16
698        assert_eq!(topo.num_colors, 16);
699    }
700
701    #[test]
702    fn test_color_computation_no_cache() {
703        let mut topo = CacheTopology::new();
704        topo.compute_colors();
705        assert_eq!(topo.num_colors, 1);
706    }
707
708    #[test]
709    fn test_frame_color_distribution() {
710        // Simulate color computation with known parameters
711        // With 256 colors and 4KB pages:
712        // Page 0 -> color 0, Page 1 -> color 1, ..., Page 255 -> color 255, Page 256 ->
713        // color 0
714        let page_size: u64 = 4096;
715        let num_colors: u64 = 256;
716
717        let color0 = ((0 * page_size) / page_size) % num_colors;
718        let color1 = ((1 * page_size) / page_size) % num_colors;
719        let color255 = ((255 * page_size) / page_size) % num_colors;
720        let color256 = ((256 * page_size) / page_size) % num_colors;
721
722        assert_eq!(color0, 0);
723        assert_eq!(color1, 1);
724        assert_eq!(color255, 255);
725        assert_eq!(color256, 0); // Wraps around
726    }
727
728    #[test]
729    fn test_preferred_color_distribution() {
730        // Processes should distribute across colors
731        let num_colors: u64 = 16;
732        for pid in 0..32u64 {
733            let color = pid % num_colors;
734            assert!(color < num_colors);
735        }
736        // Different PIDs get different colors (within num_colors range)
737        assert_ne!(0u64 % num_colors, 1u64 % num_colors);
738        assert_eq!(0u64 % num_colors, 16u64 % num_colors);
739    }
740
741    #[test]
742    fn test_cache_level_ordering() {
743        assert!(CacheLevel::L1 < CacheLevel::L2);
744        assert!(CacheLevel::L2 < CacheLevel::L3);
745    }
746
747    #[test]
748    fn test_topology_max_capacity() {
749        let mut topo = CacheTopology::new();
750        for _ in 0..MAX_CACHE_LEVELS + 2 {
751            topo.add_cache(CacheInfo::new(CacheLevel::L1, CacheType::Data, 64, 8, 64));
752        }
753        // Should cap at MAX_CACHE_LEVELS
754        assert_eq!(topo.cache_count(), MAX_CACHE_LEVELS);
755    }
756}