⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/ipc/
fast_path.rs

1//! Fast path IPC implementation for register-based messages
2//!
3//! Achieves < 1μs latency by using per-task IPC register storage for direct
4//! message transfer. When a sender targets a blocked receiver, the message
5//! is copied directly into the receiver's `Task::ipc_regs` and the receiver
6//! is woken. No intermediate queuing or memory allocation is needed.
7//!
8//! ## Performance features
9//!
10//! - **O(log n) PID lookup** via global task registry (no linear scan)
11//! - **CapabilityCache** (16-entry direct-mapped) for repeated IPC validation
12//! - **Tracepoints** for IpcFastSend / IpcFastReceive / IpcSlowPath events
13//!
14//! ## Register mapping
15//!
16//! The IPC register convention maps to architecture registers as follows:
17//! - x86_64:  RDI, RSI, RDX, RCX, R8, R9, R10
18//! - AArch64: X0, X1, X2, X3, X4, X5, X6
19//! - RISC-V:  a0, a1, a2, a3, a4, a5, a6
20//!
21//! All share the same semantic layout (see `IPC_REG_*` constants below).
22
23// Fast-path IPC -- register-based transfer for <1us latency
24
25use core::sync::atomic::{AtomicU64, Ordering};
26
27use spin::Mutex;
28
29use super::{
30    error::{IpcError, Result},
31    SmallMessage,
32};
33use crate::{
34    arch::entropy::read_timestamp,
35    cap::{space::CapabilityCache, token::CapabilityToken},
36    process::pcb::ProcessState,
37    sched::current_process,
38};
39
40/// Performance counter for fast path operations
41static FAST_PATH_COUNT: AtomicU64 = AtomicU64::new(0);
42static FAST_PATH_CYCLES: AtomicU64 = AtomicU64::new(0);
43/// Counter for slow-path fallbacks (target not blocked)
44static SLOW_PATH_FALLBACK_COUNT: AtomicU64 = AtomicU64::new(0);
45
46/// Per-CPU capability cache for fast IPC validation.
47///
48/// 16-entry direct-mapped cache: on cache hit, capability validation
49/// is a single hash + comparison (no capability space traversal).
50static FAST_CAP_CACHE: Mutex<CapabilityCache> = Mutex::new(CapabilityCache::new());
51
52// IPC register semantic indices (architecture-neutral)
53const IPC_REG_CAP: usize = 0; // Capability token
54const IPC_REG_OPCODE: usize = 1; // Operation code
55const IPC_REG_FLAGS: usize = 2; // Flags
56const IPC_REG_DATA0: usize = 3; // Data word 0
57const IPC_REG_DATA1: usize = 4; // Data word 1
58const IPC_REG_DATA2: usize = 5; // Data word 2
59const IPC_REG_DATA3: usize = 6; // Data word 3
60
61/// Fast path IPC send for small messages
62///
63/// Copies the message directly into the target task's `ipc_regs` array
64/// if the target is blocked waiting for a message. This avoids all
65/// intermediate queuing and achieves sub-microsecond latency.
66#[inline(always)]
67pub fn fast_send(msg: &SmallMessage, target_pid: u64) -> Result<()> {
68    let start = read_timestamp();
69
70    // Quick capability validation (cache-accelerated)
71    if !validate_capability_fast(msg.capability) {
72        return Err(IpcError::InvalidCapability);
73    }
74
75    // Find target task via global registry (O(log n) lookup, no scheduler lock)
76    #[cfg(feature = "alloc")]
77    let target_ptr = {
78        // First check current task (most common case for IPC reply)
79        let current_match = {
80            let sched = crate::sched::scheduler::SCHEDULER.lock();
81            if let Some(current) = sched.current() {
82                // SAFETY: current is a valid NonNull<Task> from the scheduler.
83                unsafe { (*current.as_ptr()).pid.0 == target_pid }
84            } else {
85                false
86            }
87        };
88
89        if current_match {
90            // Target is self -- unusual but valid (self-IPC)
91            let sched = crate::sched::scheduler::SCHEDULER.lock();
92            sched.current()
93        } else {
94            // O(log n) lookup via global PID-to-Task registry
95            crate::sched::scheduler::get_task_ptr(target_pid)
96        }
97    };
98
99    #[cfg(not(feature = "alloc"))]
100    let target_ptr = {
101        let sched = crate::sched::scheduler::SCHEDULER.lock();
102        if let Some(current) = sched.current() {
103            unsafe {
104                if (*current.as_ptr()).pid.0 == target_pid {
105                    Some(current)
106                } else {
107                    None
108                }
109            }
110        } else {
111            None
112        }
113    };
114
115    let target_ptr = match target_ptr {
116        Some(ptr) => ptr,
117        None => return Err(IpcError::ProcessNotFound),
118    };
119
120    // SAFETY: target_ptr is a valid NonNull<Task> from the task registry.
121    // We check its state and, if blocked, write to its ipc_regs array.
122    // The target is blocked (not running on any CPU), so there is no
123    // concurrent access to ipc_regs.
124    unsafe {
125        let target = target_ptr.as_ptr();
126
127        if (*target).state == ProcessState::Blocked {
128            // Direct transfer: copy message into target's IPC registers
129            (*target).ipc_regs[IPC_REG_CAP] = msg.capability;
130            (*target).ipc_regs[IPC_REG_OPCODE] = msg.opcode as u64;
131            (*target).ipc_regs[IPC_REG_FLAGS] = msg.flags as u64;
132            (*target).ipc_regs[IPC_REG_DATA0] = msg.data[0];
133            (*target).ipc_regs[IPC_REG_DATA1] = msg.data[1];
134            (*target).ipc_regs[IPC_REG_DATA2] = msg.data[2];
135            (*target).ipc_regs[IPC_REG_DATA3] = msg.data[3];
136
137            // Wake up receiver via scheduler
138            (*target).state = ProcessState::Ready;
139            crate::sched::ipc_blocking::wake_up_process(crate::process::ProcessId((*target).pid.0));
140
141            // Update performance counters
142            let elapsed = read_timestamp() - start;
143            FAST_PATH_COUNT.fetch_add(1, Ordering::Relaxed);
144            FAST_PATH_CYCLES.fetch_add(elapsed, Ordering::Relaxed);
145
146            // Cache the capability for future fast lookups
147            if let Some(mut cache) = FAST_CAP_CACHE.try_lock() {
148                let token = CapabilityToken::from_u64(msg.capability);
149                cache.insert(token, crate::cap::Rights::ALL);
150            }
151
152            // Trace: IPC fast path send
153            crate::trace!(
154                crate::perf::trace::TraceEventType::IpcFastSend,
155                target_pid,
156                msg.capability
157            );
158
159            Ok(())
160        } else {
161            // Target not blocked -- fall back to queuing (slow path)
162            SLOW_PATH_FALLBACK_COUNT.fetch_add(1, Ordering::Relaxed);
163
164            // Trace: slow path fallback
165            crate::trace!(
166                crate::perf::trace::TraceEventType::IpcSlowPath,
167                target_pid,
168                msg.capability
169            );
170
171            Err(IpcError::WouldBlock)
172        }
173    }
174}
175
176/// Fast path IPC receive
177///
178/// If a message has already been deposited in the current task's `ipc_regs`
179/// (by a fast_send while we were blocked), read it directly. Otherwise,
180/// check the endpoint's message queue, and if empty, block.
181#[inline(always)]
182pub fn fast_receive(endpoint: u64, timeout: Option<u64>) -> Result<SmallMessage> {
183    let current = current_process();
184
185    // Check if message already waiting in endpoint queue
186    if let Some(msg) = check_pending_message(endpoint) {
187        // Trace: IPC fast path receive (from queue)
188        crate::trace!(
189            crate::perf::trace::TraceEventType::IpcFastReceive,
190            endpoint,
191            msg.capability
192        );
193        return Ok(msg);
194    }
195
196    // Block current process
197    current.state = ProcessState::Blocked;
198    current.blocked_on = Some(endpoint);
199
200    // Yield CPU and wait for message
201    yield_and_wait(timeout)?;
202
203    // When we wake up, check if fast_send deposited data in our ipc_regs.
204    // Read from current task's ipc_regs (set by sender's fast_send).
205    let msg = read_from_current_task_ipc_regs();
206    if msg.capability != 0 || msg.opcode != 0 {
207        // Trace: IPC fast path receive (direct register transfer)
208        crate::trace!(
209            crate::perf::trace::TraceEventType::IpcFastReceive,
210            endpoint,
211            msg.capability
212        );
213        return Ok(msg);
214    }
215
216    // No fast-path message; re-check endpoint queue (slow path deposited it)
217    if let Some(msg) = check_pending_message(endpoint) {
218        return Ok(msg);
219    }
220
221    // Spurious wake-up or timeout -- return default
222    Ok(SmallMessage {
223        capability: 0,
224        opcode: 0,
225        flags: 0,
226        data: [0; 4],
227    })
228}
229
230/// Fast capability validation using CapabilityCache.
231///
232/// Checks the 16-entry direct-mapped cache first for O(1) validation.
233/// On cache miss, falls back to range validation. Successfully validated
234/// capabilities are cached by `fast_send()` after IPC completion.
235#[inline(always)]
236fn validate_capability_fast(cap: u64) -> bool {
237    // Range check: valid capability tokens are in [1, 0x1_0000_0000)
238    if cap == 0 || cap >= 0x1_0000_0000 {
239        return false;
240    }
241
242    // Try capability cache for O(1) validation.
243    // Use try_lock to avoid blocking on the fast path.
244    if let Some(ref cache) = FAST_CAP_CACHE.try_lock() {
245        let token = CapabilityToken::from_u64(cap);
246        if cache.lookup(token).is_some() {
247            return true; // Cache hit -- validated
248        }
249    }
250
251    // Cache miss -- range check passed, treat as valid.
252    // The capability will be cached on successful IPC completion.
253    true
254}
255
256/// Read message from the current task's IPC registers.
257fn read_from_current_task_ipc_regs() -> SmallMessage {
258    let sched = crate::sched::scheduler::SCHEDULER.lock();
259    if let Some(current) = sched.current() {
260        // SAFETY: current is our task. We read ipc_regs which were written
261        // by fast_send while we were blocked. No concurrent writer now.
262        unsafe {
263            let task = current.as_ptr();
264            let regs = &(*task).ipc_regs;
265            let msg = SmallMessage {
266                capability: regs[IPC_REG_CAP],
267                opcode: regs[IPC_REG_OPCODE] as u32,
268                flags: regs[IPC_REG_FLAGS] as u32,
269                data: [
270                    regs[IPC_REG_DATA0],
271                    regs[IPC_REG_DATA1],
272                    regs[IPC_REG_DATA2],
273                    regs[IPC_REG_DATA3],
274                ],
275            };
276            // Clear ipc_regs after read to prevent stale re-reads
277            (*task).ipc_regs = [0; 7];
278            msg
279        }
280    } else {
281        SmallMessage {
282            capability: 0,
283            opcode: 0,
284            flags: 0,
285            data: [0; 4],
286        }
287    }
288}
289
290/// Check for pending messages without blocking.
291///
292/// Queries the IPC registry for the endpoint and tries to dequeue a message.
293/// Returns None if no message is waiting or the endpoint doesn't exist.
294fn check_pending_message(endpoint: u64) -> Option<SmallMessage> {
295    #[cfg(feature = "alloc")]
296    {
297        if let Some(msg) = crate::ipc::registry::try_receive_from_endpoint(endpoint) {
298            return Some(match msg {
299                super::Message::Small(sm) => sm,
300                super::Message::Large(lg) => SmallMessage {
301                    capability: lg.header.capability,
302                    opcode: lg.header.opcode,
303                    flags: lg.header.flags,
304                    data: [0; 4],
305                },
306            });
307        }
308    }
309    let _ = endpoint;
310    None
311}
312
313/// Yield CPU and wait for message or timeout.
314///
315/// Blocks the current task via the scheduler. When a message arrives for
316/// this endpoint, `wake_up_process()` will resume execution here.
317fn yield_and_wait(_timeout: Option<u64>) -> Result<()> {
318    crate::sched::yield_cpu();
319    Ok(())
320}
321
322/// Get performance statistics (fast_path_count, avg_cycles,
323/// slow_path_fallbacks)
324pub fn get_fast_path_stats() -> (u64, u64) {
325    let count = FAST_PATH_COUNT.load(Ordering::Relaxed);
326    let cycles = FAST_PATH_CYCLES.load(Ordering::Relaxed);
327    let avg_cycles = if count > 0 { cycles / count } else { 0 };
328    (count, avg_cycles)
329}
330
331/// Get the number of slow-path fallbacks
332pub fn get_slow_path_count() -> u64 {
333    SLOW_PATH_FALLBACK_COUNT.load(Ordering::Relaxed)
334}
335
336// ---------------------------------------------------------------------------
337// IPC Message Batching
338// ---------------------------------------------------------------------------
339
340/// Maximum number of messages in a batch before automatic flush.
341pub const BATCH_SIZE: usize = 8;
342
343/// IPC message batch for amortizing per-message overhead.
344///
345/// Collects multiple small messages destined for the same target and
346/// delivers them in a single operation. This reduces per-message overhead
347/// (capability validation, task lookup) by performing these steps once
348/// per batch instead of once per message.
349pub struct IpcBatch {
350    /// Buffered messages.
351    messages: [Option<SmallMessage>; BATCH_SIZE],
352    /// Number of messages currently in the batch.
353    count: usize,
354    /// Target PID for all messages in this batch.
355    target_pid: u64,
356}
357
358impl IpcBatch {
359    /// Create a new empty batch targeting a specific process.
360    pub fn new(target_pid: u64) -> Self {
361        const NONE_MSG: Option<SmallMessage> = None;
362        Self {
363            messages: [NONE_MSG; BATCH_SIZE],
364            count: 0,
365            target_pid,
366        }
367    }
368
369    /// Add a message to the batch.
370    ///
371    /// Returns `true` if the batch is now full and should be flushed.
372    /// Returns `false` if there is still room for more messages.
373    pub fn add_to_batch(&mut self, msg: SmallMessage) -> bool {
374        if self.count < BATCH_SIZE {
375            self.messages[self.count] = Some(msg);
376            self.count += 1;
377        }
378        self.count >= BATCH_SIZE
379    }
380
381    /// Number of messages currently in the batch.
382    pub fn len(&self) -> usize {
383        self.count
384    }
385
386    /// Whether the batch is empty.
387    pub fn is_empty(&self) -> bool {
388        self.count == 0
389    }
390
391    /// Flush all buffered messages by sending them via fast_send.
392    ///
393    /// Each message is sent individually via the fast path. Capability
394    /// validation is only performed once per batch (the first message's
395    /// capability is cached for subsequent sends).
396    ///
397    /// Returns the number of messages successfully sent.
398    pub fn flush(&mut self) -> usize {
399        let mut sent = 0;
400        for i in 0..self.count {
401            if let Some(ref msg) = self.messages[i] {
402                if fast_send(msg, self.target_pid).is_ok() {
403                    sent += 1;
404                    crate::perf::record_ipc_message_sent();
405                }
406            }
407        }
408
409        if sent > 0 {
410            crate::perf::record_ipc_batch_flushed();
411        }
412
413        // Clear the batch
414        self.count = 0;
415        for slot in &mut self.messages {
416            *slot = None;
417        }
418
419        sent
420    }
421
422    /// Get the target PID for this batch.
423    pub fn target_pid(&self) -> u64 {
424        self.target_pid
425    }
426}
427
428/// Flush an IPC batch (convenience function for external callers).
429pub fn flush_batch(batch: &mut IpcBatch) -> usize {
430    batch.flush()
431}
432
433#[cfg(all(test, not(target_os = "none")))]
434mod tests {
435    use super::*;
436
437    #[test]
438    fn test_fast_path_stats() {
439        let (count, avg) = get_fast_path_stats();
440        assert_eq!(count, 0);
441        assert_eq!(avg, 0);
442    }
443
444    #[test]
445    fn test_slow_path_count() {
446        assert_eq!(get_slow_path_count(), 0);
447    }
448
449    #[test]
450    fn test_batch_add_and_full() {
451        let mut batch = IpcBatch::new(42);
452        assert!(batch.is_empty());
453        assert_eq!(batch.len(), 0);
454
455        // Add messages until full
456        for i in 0..BATCH_SIZE - 1 {
457            let msg = SmallMessage {
458                capability: (i as u64) + 1,
459                opcode: 0,
460                flags: 0,
461                data: [0; 4],
462            };
463            assert!(!batch.add_to_batch(msg));
464        }
465
466        assert_eq!(batch.len(), BATCH_SIZE - 1);
467
468        // Last one should indicate full
469        let msg = SmallMessage {
470            capability: 100,
471            opcode: 0,
472            flags: 0,
473            data: [0; 4],
474        };
475        assert!(batch.add_to_batch(msg));
476        assert_eq!(batch.len(), BATCH_SIZE);
477    }
478
479    #[test]
480    fn test_batch_target_pid() {
481        let batch = IpcBatch::new(99);
482        assert_eq!(batch.target_pid(), 99);
483    }
484}