1#![allow(dead_code)]
7
8pub mod bench;
9pub mod pmu;
10pub mod trace;
11
12use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
13
14use crate::{error::KernelError, mm::cache_aligned::CacheAligned};
15
16#[derive(Debug, Default, Clone, Copy)]
18pub(crate) struct PerfCounters {
19 pub(crate) syscalls: u64,
20 pub(crate) context_switches: u64,
21 pub(crate) page_faults: u64,
22 pub(crate) interrupts: u64,
23 pub(crate) ipc_messages: u64,
24}
25
26static SYSCALL_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
33static CONTEXT_SWITCH_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
34static PAGE_FAULT_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
35static INTERRUPT_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
36static IPC_MESSAGE_COUNT: CacheAligned<AtomicU64> = CacheAligned::new(AtomicU64::new(0));
37
38#[inline(always)]
40pub(crate) fn count_syscall() {
41 SYSCALL_COUNT.fetch_add(1, Ordering::Relaxed);
42}
43
44#[inline(always)]
46pub(crate) fn count_context_switch() {
47 CONTEXT_SWITCH_COUNT.fetch_add(1, Ordering::Relaxed);
48}
49
50#[inline(always)]
52pub(crate) fn count_page_fault() {
53 PAGE_FAULT_COUNT.fetch_add(1, Ordering::Relaxed);
54}
55
56#[inline(always)]
58pub(crate) fn count_interrupt() {
59 INTERRUPT_COUNT.fetch_add(1, Ordering::Relaxed);
60}
61
62pub(crate) fn get_stats() -> PerfCounters {
64 PerfCounters {
65 syscalls: SYSCALL_COUNT.load(Ordering::Relaxed),
66 context_switches: CONTEXT_SWITCH_COUNT.load(Ordering::Relaxed),
67 page_faults: PAGE_FAULT_COUNT.load(Ordering::Relaxed),
68 interrupts: INTERRUPT_COUNT.load(Ordering::Relaxed),
69 ipc_messages: IPC_MESSAGE_COUNT.load(Ordering::Relaxed),
70 }
71}
72
73pub(crate) fn reset_stats() {
75 SYSCALL_COUNT.store(0, Ordering::Relaxed);
76 CONTEXT_SWITCH_COUNT.store(0, Ordering::Relaxed);
77 PAGE_FAULT_COUNT.store(0, Ordering::Relaxed);
78 INTERRUPT_COUNT.store(0, Ordering::Relaxed);
79 IPC_MESSAGE_COUNT.store(0, Ordering::Relaxed);
80}
81
82pub(crate) struct Profiler {
84 start_time: u64,
85 #[cfg_attr(not(target_arch = "x86_64"), allow(dead_code))]
87 name: &'static str,
88}
89
90impl Profiler {
91 pub(crate) fn start(name: &'static str) -> Self {
93 Self {
94 start_time: crate::test_framework::read_timestamp(),
95 name,
96 }
97 }
98
99 pub(crate) fn end(self) {
101 let _elapsed = crate::test_framework::read_timestamp() - self.start_time;
102 println!("[PERF] {} took {} cycles", self.name, _elapsed);
103 }
104}
105
106const MAX_RQ_CPUS: usize = 16;
112
113pub(crate) struct RunQueueStats {
115 pub(crate) enqueue_count: AtomicU64,
117 pub(crate) dequeue_count: AtomicU64,
119 pub(crate) max_length: AtomicU32,
121 pub(crate) total_wait_ticks: AtomicU64,
123}
124
125impl RunQueueStats {
126 pub const fn new() -> Self {
128 Self {
129 enqueue_count: AtomicU64::new(0),
130 dequeue_count: AtomicU64::new(0),
131 max_length: AtomicU32::new(0),
132 total_wait_ticks: AtomicU64::new(0),
133 }
134 }
135}
136
137impl Default for RunQueueStats {
138 fn default() -> Self {
139 Self::new()
140 }
141}
142
143#[allow(clippy::declare_interior_mutable_const)]
145static RQ_STATS: [RunQueueStats; MAX_RQ_CPUS] = {
146 const INIT: RunQueueStats = RunQueueStats::new();
147 [INIT; MAX_RQ_CPUS]
148};
149
150#[inline(always)]
152pub(crate) fn record_enqueue(cpu_id: usize, queue_len: u32) {
153 if cpu_id < MAX_RQ_CPUS {
154 RQ_STATS[cpu_id]
155 .enqueue_count
156 .fetch_add(1, Ordering::Relaxed);
157 let current_max = RQ_STATS[cpu_id].max_length.load(Ordering::Relaxed);
159 if queue_len > current_max {
160 let _ = RQ_STATS[cpu_id].max_length.compare_exchange(
161 current_max,
162 queue_len,
163 Ordering::Relaxed,
164 Ordering::Relaxed,
165 );
166 }
167 }
168}
169
170#[inline(always)]
172pub(crate) fn record_dequeue(cpu_id: usize, wait_ticks: u64) {
173 if cpu_id < MAX_RQ_CPUS {
174 RQ_STATS[cpu_id]
175 .dequeue_count
176 .fetch_add(1, Ordering::Relaxed);
177 RQ_STATS[cpu_id]
178 .total_wait_ticks
179 .fetch_add(wait_ticks, Ordering::Relaxed);
180 }
181}
182
183#[derive(Debug, Default, Clone, Copy)]
185pub(crate) struct SchedulerProfile {
186 pub(crate) avg_wait_ticks: u64,
188 pub(crate) max_queue_length: u32,
190 pub(crate) total_enqueues: u64,
192 pub(crate) total_dequeues: u64,
194}
195
196pub(crate) fn get_scheduler_stats() -> SchedulerProfile {
198 let mut total_enq = 0u64;
199 let mut total_deq = 0u64;
200 let mut total_wait = 0u64;
201 let mut max_len = 0u32;
202
203 for stats in &RQ_STATS {
204 total_enq += stats.enqueue_count.load(Ordering::Relaxed);
205 total_deq += stats.dequeue_count.load(Ordering::Relaxed);
206 total_wait += stats.total_wait_ticks.load(Ordering::Relaxed);
207 let ml = stats.max_length.load(Ordering::Relaxed);
208 if ml > max_len {
209 max_len = ml;
210 }
211 }
212
213 let avg_wait = if total_deq > 0 {
214 total_wait / total_deq
215 } else {
216 0
217 };
218
219 SchedulerProfile {
220 avg_wait_ticks: avg_wait,
221 max_queue_length: max_len,
222 total_enqueues: total_enq,
223 total_dequeues: total_deq,
224 }
225}
226
227static IPC_MESSAGES_SENT: AtomicU64 = AtomicU64::new(0);
233static IPC_BATCHES_FLUSHED: AtomicU64 = AtomicU64::new(0);
235
236#[inline(always)]
238pub(crate) fn record_ipc_message_sent() {
239 IPC_MESSAGES_SENT.fetch_add(1, Ordering::Relaxed);
240}
241
242#[inline(always)]
244pub(crate) fn record_ipc_batch_flushed() {
245 IPC_BATCHES_FLUSHED.fetch_add(1, Ordering::Relaxed);
246}
247
248pub(crate) fn get_ipc_workload_stats() -> (u64, u64) {
250 (
251 IPC_MESSAGES_SENT.load(Ordering::Relaxed),
252 IPC_BATCHES_FLUSHED.load(Ordering::Relaxed),
253 )
254}
255
256pub(crate) fn optimize_memory() {
264 println!("[PERF] Optimizing memory allocator...");
265 let stats = crate::mm::get_memory_stats();
266 let used = stats.total_frames.saturating_sub(stats.free_frames);
267 let utilization = if stats.total_frames > 0 {
268 (used * 100) / stats.total_frames
269 } else {
270 0
271 };
272 println!(
273 "[PERF] Memory: {} total, {} free, {} cached, {}% used",
274 stats.total_frames, stats.free_frames, stats.cached_frames, utilization
275 );
276}
277
278pub(crate) fn optimize_scheduler() {
283 println!("[PERF] Optimizing scheduler...");
284 let counters = get_stats();
285 let sched_profile = get_scheduler_stats();
286 println!(
287 "[PERF] Scheduler: {} context switches, {} syscalls",
288 counters.context_switches, counters.syscalls
289 );
290 println!(
291 "[PERF] Run-queue: avg_wait={} ticks, max_depth={}, enq={}, deq={}",
292 sched_profile.avg_wait_ticks,
293 sched_profile.max_queue_length,
294 sched_profile.total_enqueues,
295 sched_profile.total_dequeues
296 );
297}
298
299pub(crate) fn optimize_ipc() {
303 println!("[PERF] Optimizing IPC...");
304 let counters = get_stats();
305 let (msgs_sent, batches) = get_ipc_workload_stats();
306 println!("[PERF] IPC: {} messages delivered", counters.ipc_messages);
307 println!(
308 "[PERF] IPC workload: {} fast-path sends, {} batch flushes",
309 msgs_sent, batches
310 );
311}
312
313pub(crate) fn init() -> Result<(), KernelError> {
315 println!("[PERF] Initializing performance subsystem...");
316
317 reset_stats();
318
319 optimize_memory();
321 optimize_scheduler();
322 optimize_ipc();
323
324 println!("[PERF] Performance subsystem initialized");
325 Ok(())
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_counters() {
334 reset_stats();
335 count_syscall();
336 count_context_switch();
337 let stats = get_stats();
338 assert_eq!(stats.syscalls, 1);
339 assert_eq!(stats.context_switches, 1);
340 }
341
342 #[test]
343 fn test_profiler() {
344 let p = Profiler::start("test");
345 for _ in 0..1000 {
347 core::hint::black_box(42);
348 }
349 p.end();
350 }
351
352 #[test]
353 fn test_run_queue_stats() {
354 record_enqueue(0, 5);
355 record_enqueue(0, 10);
356 record_dequeue(0, 100);
357 let profile = get_scheduler_stats();
358 assert!(profile.total_enqueues >= 2);
359 assert!(profile.total_dequeues >= 1);
360 }
361
362 #[test]
363 fn test_ipc_workload_stats() {
364 record_ipc_message_sent();
365 record_ipc_batch_flushed();
366 let (msgs, batches) = get_ipc_workload_stats();
367 assert!(msgs >= 1);
368 assert!(batches >= 1);
369 }
370}