veridian_kernel/perf/pmu.rs
1//! Hardware Performance Monitoring Unit (PMU) Driver
2//!
3//! Provides access to hardware performance counters for profiling and
4//! optimization. Supports x86_64 (IA32_PERFEVTSELx / IA32_PMCx MSRs),
5//! AArch64 (PMCR_EL0, PMCNTENSET_EL0), and RISC-V (mcycle, minstret).
6
7#![allow(dead_code)]
8//! Performance events that can be counted:
9//! - Instructions retired
10//! - CPU cycles
11//! - Cache misses (L1, L2, LLC)
12//! - Branch mispredictions
13//! - TLB misses
14
15use core::sync::atomic::{AtomicBool, AtomicU8, Ordering};
16
17/// Whether the PMU has been initialized.
18static PMU_INITIALIZED: AtomicBool = AtomicBool::new(false);
19
20/// Number of available general-purpose performance counters.
21static NUM_COUNTERS: AtomicU8 = AtomicU8::new(0);
22
23// ---------------------------------------------------------------------------
24// Performance Event Selectors
25// ---------------------------------------------------------------------------
26
27/// Performance events that can be monitored.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub(crate) enum PmuEvent {
30 /// CPU cycles (unhalted core cycles).
31 Cycles,
32 /// Instructions retired.
33 InstructionsRetired,
34 /// L1 data cache misses.
35 L1DCacheMisses,
36 /// L2 cache misses (unified).
37 L2CacheMisses,
38 /// Last-level cache misses.
39 LlcMisses,
40 /// Branch mispredictions.
41 BranchMispredicts,
42 /// Instruction TLB misses.
43 ITlbMisses,
44 /// Data TLB misses.
45 DTlbMisses,
46}
47
48impl PmuEvent {
49 /// Convert to x86_64 architectural performance event selector.
50 ///
51 /// Returns (event_select, unit_mask) for IA32_PERFEVTSELx programming.
52 /// These are Intel Architectural Performance Events (CPUID leaf 0x0A).
53 #[cfg(target_arch = "x86_64")]
54 fn to_x86_evtsel(self) -> (u8, u8) {
55 match self {
56 Self::Cycles => (0x3C, 0x00), // UnHalted Core Cycles
57 Self::InstructionsRetired => (0xC0, 0x00), // Instructions Retired
58 Self::L1DCacheMisses => (0xCB, 0x01), // MEM_LOAD_RETIRED.L1_MISS
59 Self::L2CacheMisses => (0xCB, 0x04), // MEM_LOAD_RETIRED.L2_MISS
60 Self::LlcMisses => (0x2E, 0x41), // LONGEST_LAT_CACHE.MISS
61 Self::BranchMispredicts => (0xC5, 0x00), // BR_MISP_RETIRED.ALL_BRANCHES
62 Self::ITlbMisses => (0x85, 0x01), // ITLB_MISSES.MISS_CAUSES_A_WALK
63 Self::DTlbMisses => (0x08, 0x01), // DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK
64 }
65 }
66}
67
68/// A PMU counter configuration.
69#[derive(Debug, Clone)]
70pub(crate) struct PmuCounter {
71 /// Counter index (0..NUM_COUNTERS-1).
72 pub(crate) index: u8,
73 /// The event being counted.
74 pub(crate) event: PmuEvent,
75 /// Whether this counter is currently active.
76 pub(crate) active: bool,
77}
78
79/// PMU counter sample (snapshot of a single counter).
80#[derive(Debug, Clone, Copy)]
81pub(crate) struct PmuSample {
82 /// The event type.
83 pub(crate) event: PmuEvent,
84 /// Counter value.
85 pub(crate) count: u64,
86}
87
88// ---------------------------------------------------------------------------
89// Initialization
90// ---------------------------------------------------------------------------
91
92/// Initialize the PMU subsystem.
93///
94/// Detects the number of available performance counters and their
95/// capabilities via CPUID (x86_64) or system register reads (ARM/RISC-V).
96pub(crate) fn init() {
97 if PMU_INITIALIZED.load(Ordering::Acquire) {
98 return;
99 }
100
101 #[cfg(target_arch = "x86_64")]
102 {
103 init_x86_64();
104 }
105
106 #[cfg(target_arch = "aarch64")]
107 {
108 init_aarch64();
109 }
110
111 #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
112 {
113 init_riscv();
114 }
115
116 PMU_INITIALIZED.store(true, Ordering::Release);
117}
118
119/// Check if PMU is initialized.
120pub(crate) fn is_initialized() -> bool {
121 PMU_INITIALIZED.load(Ordering::Acquire)
122}
123
124/// Get the number of general-purpose performance counters.
125pub(crate) fn num_counters() -> u8 {
126 NUM_COUNTERS.load(Ordering::Relaxed)
127}
128
129// ---------------------------------------------------------------------------
130// x86_64 PMU
131// ---------------------------------------------------------------------------
132
133#[cfg(target_arch = "x86_64")]
134fn init_x86_64() {
135 // CPUID leaf 0x0A: Architectural Performance Monitoring.
136 // SAFETY: CPUID is a read-only instruction with no side effects.
137 let cpuid = unsafe { core::arch::x86_64::__cpuid(0x0A) };
138
139 let version_id = cpuid.eax & 0xFF;
140 let num_gp_counters = ((cpuid.eax >> 8) & 0xFF) as u8;
141 let counter_width = ((cpuid.eax >> 16) & 0xFF) as u8;
142
143 NUM_COUNTERS.store(num_gp_counters, Ordering::Relaxed);
144
145 println!(
146 "[PMU] x86_64: version={}, counters={}, width={} bits",
147 version_id, num_gp_counters, counter_width
148 );
149}
150
151/// Configure a performance counter on x86_64.
152///
153/// Programs IA32_PERFEVTSELx with the event selector, unit mask,
154/// and enable bits. The counter starts counting immediately.
155#[cfg(target_arch = "x86_64")]
156pub(crate) fn configure_counter(counter: u8, event: PmuEvent) -> bool {
157 let num = num_counters();
158 if counter >= num {
159 return false;
160 }
161
162 let (evt_sel, umask) = event.to_x86_evtsel();
163
164 // IA32_PERFEVTSELx MSR: base 0x186 + counter index.
165 // Bits: [7:0] = EventSelect, [15:8] = UMask, [16] = USR, [17] = OS,
166 // [22] = EN (enable).
167 let evtsel_msr = 0x186 + counter as u32;
168 let value: u64 = (evt_sel as u64)
169 | ((umask as u64) << 8)
170 | (1 << 16) // Count in user mode
171 | (1 << 17) // Count in kernel mode
172 | (1 << 22); // Enable counter
173
174 // Clear the counter first.
175 let pmc_msr = 0xC1 + counter as u32; // IA32_PMCx
176 crate::arch::x86_64::msr::wrmsr(pmc_msr, 0);
177
178 // Program the event selector.
179 crate::arch::x86_64::msr::wrmsr(evtsel_msr, value);
180
181 true
182}
183
184/// Read a performance counter value on x86_64.
185#[cfg(target_arch = "x86_64")]
186pub(crate) fn read_counter(counter: u8) -> u64 {
187 if counter >= num_counters() {
188 return 0;
189 }
190 let pmc_msr = 0xC1 + counter as u32;
191 crate::arch::x86_64::msr::rdmsr(pmc_msr)
192}
193
194/// Stop (disable) a performance counter on x86_64.
195#[cfg(target_arch = "x86_64")]
196pub(crate) fn stop_counter(counter: u8) {
197 if counter >= num_counters() {
198 return;
199 }
200 let evtsel_msr = 0x186 + counter as u32;
201 crate::arch::x86_64::msr::wrmsr(evtsel_msr, 0);
202}
203
204// ---------------------------------------------------------------------------
205// AArch64 PMU
206// ---------------------------------------------------------------------------
207
208#[cfg(target_arch = "aarch64")]
209fn init_aarch64() {
210 // Read PMCR_EL0 to get the number of event counters.
211 let pmcr: u64;
212 // SAFETY: PMCR_EL0 is a read-only system register accessible from EL1.
213 unsafe {
214 core::arch::asm!("mrs {}, PMCR_EL0", out(reg) pmcr);
215 }
216 let n = ((pmcr >> 11) & 0x1F) as u8;
217 NUM_COUNTERS.store(n, Ordering::Relaxed);
218 println!("[PMU] AArch64: {} event counters", n);
219}
220
221/// Read the cycle counter on AArch64 (PMCCNTR_EL0).
222#[cfg(target_arch = "aarch64")]
223pub(crate) fn read_cycle_counter() -> u64 {
224 let val: u64;
225 // SAFETY: Reading PMCCNTR_EL0, a read-only performance counter register.
226 unsafe {
227 core::arch::asm!("mrs {}, PMCCNTR_EL0", out(reg) val);
228 }
229 val
230}
231
232// ---------------------------------------------------------------------------
233// RISC-V PMU
234// ---------------------------------------------------------------------------
235
236#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
237fn init_riscv() {
238 // RISC-V has fixed counters: mcycle, minstret, plus optional HPM counters.
239 NUM_COUNTERS.store(2, Ordering::Relaxed);
240 println!("[PMU] RISC-V: cycle + instret counters");
241}
242
243/// Read the cycle counter on RISC-V (mcycle CSR).
244#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
245pub(crate) fn read_cycle_counter() -> u64 {
246 let val: u64;
247 // SAFETY: Reading mcycle CSR is a read-only operation.
248 unsafe {
249 core::arch::asm!("csrr {}, mcycle", out(reg) val);
250 }
251 val
252}
253
254/// Read the instruction counter on RISC-V (minstret CSR).
255#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
256pub(crate) fn read_instret_counter() -> u64 {
257 let val: u64;
258 // SAFETY: Reading minstret CSR is a read-only operation.
259 unsafe {
260 core::arch::asm!("csrr {}, minstret", out(reg) val);
261 }
262 val
263}
264
265// ---------------------------------------------------------------------------
266// Sampling Profiler
267// ---------------------------------------------------------------------------
268
269/// Maximum number of instruction pointer samples per buffer.
270pub(crate) const MAX_SAMPLES: usize = 4096;
271
272/// A sample captured by the sampling profiler.
273#[derive(Debug, Clone, Copy)]
274pub(crate) struct ProfileSample {
275 /// Instruction pointer at sample time.
276 pub(crate) ip: u64,
277 /// CPU ID where the sample was taken.
278 pub(crate) cpu: u8,
279 /// Process ID (0 for kernel).
280 pub(crate) pid: u64,
281}
282
283/// Per-CPU sample buffer.
284pub(crate) struct SampleBuffer {
285 /// Sample storage.
286 pub(crate) samples: [ProfileSample; MAX_SAMPLES],
287 /// Number of samples collected.
288 pub(crate) count: usize,
289 /// Whether sampling is active.
290 pub(crate) active: bool,
291}
292
293impl SampleBuffer {
294 /// Create a new empty sample buffer.
295 #[allow(clippy::new_without_default)]
296 pub const fn new() -> Self {
297 Self {
298 samples: [ProfileSample {
299 ip: 0,
300 cpu: 0,
301 pid: 0,
302 }; MAX_SAMPLES],
303 count: 0,
304 active: false,
305 }
306 }
307
308 /// Record a sample. Returns false if buffer is full.
309 pub(crate) fn record(&mut self, ip: u64, cpu: u8, pid: u64) -> bool {
310 if self.count >= MAX_SAMPLES {
311 return false;
312 }
313 self.samples[self.count] = ProfileSample { ip, cpu, pid };
314 self.count += 1;
315 true
316 }
317
318 /// Clear the sample buffer.
319 pub(crate) fn clear(&mut self) {
320 self.count = 0;
321 }
322}