⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/perf/
mod.rs

1//! Performance optimization and monitoring
2//!
3//! Provides tools for profiling, optimization, and performance analysis.
4//! Includes per-CPU run-queue instrumentation and IPC workload stats.
5
6#![allow(dead_code)]
7
8pub mod bench;
9pub mod pmu;
10pub mod trace;
11
12use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
13
14use crate::{error::KernelError, mm::cache_aligned::CacheAligned};
15
16/// Performance counters (snapshot view)
17#[derive(Debug, Default, Clone, Copy)]
18pub(crate) struct PerfCounters {
19    pub(crate) syscalls: u64,
20    pub(crate) context_switches: u64,
21    pub(crate) page_faults: u64,
22    pub(crate) interrupts: u64,
23    pub(crate) ipc_messages: u64,
24}
25
26/// Atomic performance counters for safe concurrent access.
27///
28/// Each counter is wrapped in `CacheAligned` to prevent false sharing
29/// between CPU cores when multiple counters are incremented concurrently
30/// from different cores (e.g., syscall count on CPU 0, interrupt count on CPU
31/// 1).
32static SYSCALL_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
33static CONTEXT_SWITCH_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
34static PAGE_FAULT_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
35static INTERRUPT_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
36static IPC_MESSAGE_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
37
38/// Increment syscall counter
39#[inline(always)]
40pub(crate) fn count_syscall() {
41    SYSCALL_COUNT.fetch_add(1, Ordering::Relaxed);
42}
43
44/// Increment context switch counter
45#[inline(always)]
46pub(crate) fn count_context_switch() {
47    CONTEXT_SWITCH_COUNT.fetch_add(1, Ordering::Relaxed);
48}
49
50/// Increment page fault counter
51#[inline(always)]
52pub(crate) fn count_page_fault() {
53    PAGE_FAULT_COUNT.fetch_add(1, Ordering::Relaxed);
54}
55
56/// Increment interrupt counter
57#[inline(always)]
58pub(crate) fn count_interrupt() {
59    INTERRUPT_COUNT.fetch_add(1, Ordering::Relaxed);
60}
61
62/// Get performance statistics as a point-in-time snapshot
63pub(crate) fn get_stats() -> PerfCounters {
64    PerfCounters {
65        syscalls: SYSCALL_COUNT.load(Ordering::Relaxed),
66        context_switches: CONTEXT_SWITCH_COUNT.load(Ordering::Relaxed),
67        page_faults: PAGE_FAULT_COUNT.load(Ordering::Relaxed),
68        interrupts: INTERRUPT_COUNT.load(Ordering::Relaxed),
69        ipc_messages: IPC_MESSAGE_COUNT.load(Ordering::Relaxed),
70    }
71}
72
73/// Reset performance counters
74pub(crate) fn reset_stats() {
75    SYSCALL_COUNT.store(0, Ordering::Relaxed);
76    CONTEXT_SWITCH_COUNT.store(0, Ordering::Relaxed);
77    PAGE_FAULT_COUNT.store(0, Ordering::Relaxed);
78    INTERRUPT_COUNT.store(0, Ordering::Relaxed);
79    IPC_MESSAGE_COUNT.store(0, Ordering::Relaxed);
80}
81
82/// Performance profiler
83pub(crate) struct Profiler {
84    start_time: u64,
85    /// Read in end() via println! which is a no-op on some architectures.
86    #[cfg_attr(not(target_arch = "x86_64"), allow(dead_code))]
87    name: &'static str,
88}
89
90impl Profiler {
91    /// Start profiling a section
92    pub(crate) fn start(name: &'static str) -> Self {
93        Self {
94            start_time: crate::test_framework::read_timestamp(),
95            name,
96        }
97    }
98
99    /// End profiling and print results
100    pub(crate) fn end(self) {
101        let _elapsed = crate::test_framework::read_timestamp() - self.start_time;
102        println!("[PERF] {} took {} cycles", self.name, _elapsed);
103    }
104}
105
106// ---------------------------------------------------------------------------
107// Per-CPU Run-Queue Instrumentation
108// ---------------------------------------------------------------------------
109
110/// Maximum CPUs for run-queue stats tracking.
111const MAX_RQ_CPUS: usize = 16;
112
113/// Per-CPU run-queue statistics.
114pub(crate) struct RunQueueStats {
115    /// Total enqueue operations on this CPU.
116    pub(crate) enqueue_count: AtomicU64,
117    /// Total dequeue operations on this CPU.
118    pub(crate) dequeue_count: AtomicU64,
119    /// High-water mark for queue length.
120    pub(crate) max_length: AtomicU32,
121    /// Cumulative wait ticks across all dequeued tasks.
122    pub(crate) total_wait_ticks: AtomicU64,
123}
124
125impl RunQueueStats {
126    /// Create new zeroed stats.
127    pub const fn new() -> Self {
128        Self {
129            enqueue_count: AtomicU64::new(0),
130            dequeue_count: AtomicU64::new(0),
131            max_length: AtomicU32::new(0),
132            total_wait_ticks: AtomicU64::new(0),
133        }
134    }
135}
136
137impl Default for RunQueueStats {
138    fn default() -> Self {
139        Self::new()
140    }
141}
142
143/// Global per-CPU run-queue stats array.
144#[allow(clippy::declare_interior_mutable_const)]
145static RQ_STATS: [RunQueueStats; MAX_RQ_CPUS] = {
146    const INIT: RunQueueStats = RunQueueStats::new();
147    [INIT; MAX_RQ_CPUS]
148};
149
150/// Record an enqueue operation for a CPU's run queue.
151#[inline(always)]
152pub(crate) fn record_enqueue(cpu_id: usize, queue_len: u32) {
153    if cpu_id < MAX_RQ_CPUS {
154        RQ_STATS[cpu_id]
155            .enqueue_count
156            .fetch_add(1, Ordering::Relaxed);
157        // Update high-water mark (best-effort CAS)
158        let current_max = RQ_STATS[cpu_id].max_length.load(Ordering::Relaxed);
159        if queue_len > current_max {
160            let _ = RQ_STATS[cpu_id].max_length.compare_exchange(
161                current_max,
162                queue_len,
163                Ordering::Relaxed,
164                Ordering::Relaxed,
165            );
166        }
167    }
168}
169
170/// Record a dequeue operation with wait time.
171#[inline(always)]
172pub(crate) fn record_dequeue(cpu_id: usize, wait_ticks: u64) {
173    if cpu_id < MAX_RQ_CPUS {
174        RQ_STATS[cpu_id]
175            .dequeue_count
176            .fetch_add(1, Ordering::Relaxed);
177        RQ_STATS[cpu_id]
178            .total_wait_ticks
179            .fetch_add(wait_ticks, Ordering::Relaxed);
180    }
181}
182
183/// Aggregated scheduler profile from all CPUs.
184#[derive(Debug, Default, Clone, Copy)]
185pub(crate) struct SchedulerProfile {
186    /// Average wait ticks per dequeue across all CPUs.
187    pub(crate) avg_wait_ticks: u64,
188    /// Maximum queue length seen on any CPU.
189    pub(crate) max_queue_length: u32,
190    /// Total enqueues across all CPUs.
191    pub(crate) total_enqueues: u64,
192    /// Total dequeues across all CPUs.
193    pub(crate) total_dequeues: u64,
194}
195
196/// Collect aggregated scheduler stats from all CPUs.
197pub(crate) fn get_scheduler_stats() -> SchedulerProfile {
198    let mut total_enq = 0u64;
199    let mut total_deq = 0u64;
200    let mut total_wait = 0u64;
201    let mut max_len = 0u32;
202
203    for stats in &RQ_STATS {
204        total_enq += stats.enqueue_count.load(Ordering::Relaxed);
205        total_deq += stats.dequeue_count.load(Ordering::Relaxed);
206        total_wait += stats.total_wait_ticks.load(Ordering::Relaxed);
207        let ml = stats.max_length.load(Ordering::Relaxed);
208        if ml > max_len {
209            max_len = ml;
210        }
211    }
212
213    let avg_wait = if total_deq > 0 {
214        total_wait / total_deq
215    } else {
216        0
217    };
218
219    SchedulerProfile {
220        avg_wait_ticks: avg_wait,
221        max_queue_length: max_len,
222        total_enqueues: total_enq,
223        total_dequeues: total_deq,
224    }
225}
226
227// ---------------------------------------------------------------------------
228// IPC Workload Profiling
229// ---------------------------------------------------------------------------
230
231/// IPC messages sent via fast path.
232static IPC_MESSAGES_SENT: AtomicU64 = AtomicU64::new(0);
233/// IPC batches flushed.
234static IPC_BATCHES_FLUSHED: AtomicU64 = AtomicU64::new(0);
235
236/// Record an IPC message sent via the fast path.
237#[inline(always)]
238pub(crate) fn record_ipc_message_sent() {
239    IPC_MESSAGES_SENT.fetch_add(1, Ordering::Relaxed);
240}
241
242/// Record an IPC batch flush.
243#[inline(always)]
244pub(crate) fn record_ipc_batch_flushed() {
245    IPC_BATCHES_FLUSHED.fetch_add(1, Ordering::Relaxed);
246}
247
248/// Get IPC workload stats: (messages_sent, batches_flushed).
249pub(crate) fn get_ipc_workload_stats() -> (u64, u64) {
250    (
251        IPC_MESSAGES_SENT.load(Ordering::Relaxed),
252        IPC_BATCHES_FLUSHED.load(Ordering::Relaxed),
253    )
254}
255
256// ---------------------------------------------------------------------------
257// Optimization Reporting
258// ---------------------------------------------------------------------------
259
260/// Optimize memory allocator.
261///
262/// Collects allocation statistics and logs fragmentation metrics.
263pub(crate) fn optimize_memory() {
264    println!("[PERF] Optimizing memory allocator...");
265    let stats = crate::mm::get_memory_stats();
266    let used = stats.total_frames.saturating_sub(stats.free_frames);
267    let utilization = if stats.total_frames > 0 {
268        (used * 100) / stats.total_frames
269    } else {
270        0
271    };
272    println!(
273        "[PERF]   Memory: {} total, {} free, {} cached, {}% used",
274        stats.total_frames, stats.free_frames, stats.cached_frames, utilization
275    );
276}
277
278/// Optimize scheduler.
279///
280/// Reports per-CPU run-queue instrumentation data: average wait time,
281/// max queue depth, and total enqueue/dequeue counts.
282pub(crate) fn optimize_scheduler() {
283    println!("[PERF] Optimizing scheduler...");
284    let counters = get_stats();
285    let sched_profile = get_scheduler_stats();
286    println!(
287        "[PERF]   Scheduler: {} context switches, {} syscalls",
288        counters.context_switches, counters.syscalls
289    );
290    println!(
291        "[PERF]   Run-queue: avg_wait={} ticks, max_depth={}, enq={}, deq={}",
292        sched_profile.avg_wait_ticks,
293        sched_profile.max_queue_length,
294        sched_profile.total_enqueues,
295        sched_profile.total_dequeues
296    );
297}
298
299/// Optimize IPC.
300///
301/// Reports IPC message throughput and batch flush statistics.
302pub(crate) fn optimize_ipc() {
303    println!("[PERF] Optimizing IPC...");
304    let counters = get_stats();
305    let (msgs_sent, batches) = get_ipc_workload_stats();
306    println!("[PERF]   IPC: {} messages delivered", counters.ipc_messages);
307    println!(
308        "[PERF]   IPC workload: {} fast-path sends, {} batch flushes",
309        msgs_sent, batches
310    );
311}
312
313/// Initialize performance subsystem
314pub(crate) fn init() -> Result<(), KernelError> {
315    println!("[PERF] Initializing performance subsystem...");
316
317    reset_stats();
318
319    // Apply optimizations
320    optimize_memory();
321    optimize_scheduler();
322    optimize_ipc();
323
324    println!("[PERF] Performance subsystem initialized");
325    Ok(())
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_counters() {
334        reset_stats();
335        count_syscall();
336        count_context_switch();
337        let stats = get_stats();
338        assert_eq!(stats.syscalls, 1);
339        assert_eq!(stats.context_switches, 1);
340    }
341
342    #[test]
343    fn test_profiler() {
344        let p = Profiler::start("test");
345        // Do some work
346        for _ in 0..1000 {
347            core::hint::black_box(42);
348        }
349        p.end();
350    }
351
352    #[test]
353    fn test_run_queue_stats() {
354        record_enqueue(0, 5);
355        record_enqueue(0, 10);
356        record_dequeue(0, 100);
357        let profile = get_scheduler_stats();
358        assert!(profile.total_enqueues >= 2);
359        assert!(profile.total_dequeues >= 1);
360    }
361
362    #[test]
363    fn test_ipc_workload_stats() {
364        record_ipc_message_sent();
365        record_ipc_batch_flushed();
366        let (msgs, batches) = get_ipc_workload_stats();
367        assert!(msgs >= 1);
368        assert!(batches >= 1);
369    }
370}