⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/fs/
eventfd.rs

1//! eventfd -- Event notification file descriptor
2//!
3//! Provides a file descriptor for event wait/notify, commonly used by
4//! epoll-based event loops (Qt6, glib). Supports both counter and
5//! semaphore semantics.
6//!
7//! ## Syscall Interface
8//! - `eventfd_create(initval, flags) -> fd`  (syscall 330)
9//! - Read/write via standard `read(2)` / `write(2)` on returned fd
10//!
11//! ## Semantics
12//! - **write**: Adds the 8-byte unsigned integer to the internal counter.
13//!   Blocks (or returns EAGAIN) if the counter would overflow `u64::MAX - 1`.
14//! - **read**: Returns the current counter as an 8-byte unsigned integer and
15//!   resets it to zero. In semaphore mode, returns 1 and decrements by 1.
16//!   Blocks (or returns EAGAIN) if the counter is zero.
17
18#![allow(dead_code)]
19
20use alloc::collections::BTreeMap;
21use core::sync::atomic::{AtomicU64, Ordering};
22
23use spin::Mutex;
24
25use crate::syscall::{SyscallError, SyscallResult};
26
27/// Maximum number of eventfd instances system-wide.
28const MAX_EVENTFD_INSTANCES: usize = 4096;
29
30/// EFD_SEMAPHORE flag: read returns 1 and decrements (instead of draining).
31pub const EFD_SEMAPHORE: u32 = 1;
32/// EFD_NONBLOCK flag: reads/writes return EAGAIN instead of blocking.
33pub const EFD_NONBLOCK: u32 = 0x800;
34/// EFD_CLOEXEC flag: set close-on-exec (tracked but not enforced in kernel).
35pub const EFD_CLOEXEC: u32 = 0x80000;
36
37/// Internal eventfd instance.
38struct EventFdInstance {
39    /// Current counter value.
40    counter: u64,
41    /// Whether semaphore mode is active.
42    semaphore: bool,
43    /// Whether non-blocking mode is active.
44    nonblock: bool,
45    /// Owner process ID.
46    owner_pid: u64,
47}
48
49/// Global registry of eventfd instances, keyed by a monotonic ID.
50static EVENTFD_REGISTRY: Mutex<BTreeMap<u32, EventFdInstance>> = Mutex::new(BTreeMap::new());
51
52/// Next ID for eventfd allocation.
53static NEXT_EVENTFD_ID: AtomicU64 = AtomicU64::new(1);
54
55/// Create a new eventfd.
56///
57/// # Arguments
58/// - `initval`: Initial counter value.
59/// - `flags`: Combination of `EFD_SEMAPHORE`, `EFD_NONBLOCK`, `EFD_CLOEXEC`.
60///
61/// # Returns
62/// The eventfd ID (used as a pseudo-fd) on success.
63pub fn eventfd_create(initval: u32, flags: u32) -> SyscallResult {
64    let pid = crate::process::current_process()
65        .map(|p| p.pid.0)
66        .unwrap_or(0);
67
68    let semaphore = (flags & EFD_SEMAPHORE) != 0;
69    let nonblock = (flags & EFD_NONBLOCK) != 0;
70
71    let instance = EventFdInstance {
72        counter: initval as u64,
73        semaphore,
74        nonblock,
75        owner_pid: pid,
76    };
77
78    let id = NEXT_EVENTFD_ID.fetch_add(1, Ordering::Relaxed) as u32;
79
80    let mut registry = EVENTFD_REGISTRY.lock();
81    if registry.len() >= MAX_EVENTFD_INSTANCES {
82        return Err(SyscallError::OutOfMemory);
83    }
84    registry.insert(id, instance);
85    Ok(id as usize)
86}
87
88/// Read from an eventfd. Returns the counter value as a u64.
89///
90/// In normal mode: returns the full counter and resets to 0.
91/// In semaphore mode: returns 1 and decrements by 1.
92/// If counter is 0 and nonblock is set, returns EAGAIN (WouldBlock).
93/// If blocking mode, busy-waits with scheduler yield until counter > 0
94/// (capped at 30s to prevent permanent hangs).
95pub fn eventfd_read(efd_id: u32) -> Result<u64, SyscallError> {
96    let start = crate::timer::get_uptime_ms();
97    const MAX_BLOCK_MS: u64 = 30_000;
98
99    loop {
100        let mut registry = EVENTFD_REGISTRY.lock();
101        let instance = registry
102            .get_mut(&efd_id)
103            .ok_or(SyscallError::BadFileDescriptor)?;
104
105        if instance.counter > 0 {
106            return if instance.semaphore {
107                instance.counter = instance.counter.saturating_sub(1);
108                Ok(1)
109            } else {
110                let val = instance.counter;
111                instance.counter = 0;
112                Ok(val)
113            };
114        }
115
116        if instance.nonblock {
117            return Err(SyscallError::WouldBlock);
118        }
119
120        // Release lock before yielding
121        drop(registry);
122
123        if crate::timer::get_uptime_ms() - start >= MAX_BLOCK_MS {
124            return Err(SyscallError::WouldBlock);
125        }
126
127        crate::sched::yield_cpu();
128    }
129}
130
131/// Write to an eventfd. Adds `value` to the internal counter.
132///
133/// If the addition would overflow `u64::MAX - 1`, returns EAGAIN when
134/// nonblock is set, otherwise busy-waits until a read drains the counter
135/// enough (capped at 30s).
136pub fn eventfd_write(efd_id: u32, value: u64) -> SyscallResult {
137    if value == u64::MAX {
138        return Err(SyscallError::InvalidArgument);
139    }
140
141    let start = crate::timer::get_uptime_ms();
142    const MAX_BLOCK_MS: u64 = 30_000;
143    let max = u64::MAX - 1;
144
145    loop {
146        let mut registry = EVENTFD_REGISTRY.lock();
147        let instance = registry
148            .get_mut(&efd_id)
149            .ok_or(SyscallError::BadFileDescriptor)?;
150
151        if instance.counter <= max - value {
152            instance.counter = instance.counter.saturating_add(value);
153            return Ok(0);
154        }
155
156        if instance.nonblock {
157            return Err(SyscallError::WouldBlock);
158        }
159
160        drop(registry);
161
162        if crate::timer::get_uptime_ms() - start >= MAX_BLOCK_MS {
163            return Err(SyscallError::WouldBlock);
164        }
165
166        crate::sched::yield_cpu();
167    }
168}
169
170/// Query whether an eventfd is readable (counter > 0).
171/// Used by epoll to check readiness without consuming data.
172pub fn is_readable(efd_id: u32) -> bool {
173    let registry = EVENTFD_REGISTRY.lock();
174    registry.get(&efd_id).is_some_and(|i| i.counter > 0)
175}
176
177/// Query whether an eventfd is writable (counter < u64::MAX - 1).
178pub fn is_writable(efd_id: u32) -> bool {
179    let registry = EVENTFD_REGISTRY.lock();
180    registry
181        .get(&efd_id)
182        .is_some_and(|i| i.counter < u64::MAX - 1)
183}
184
185/// Close (destroy) an eventfd instance.
186pub fn eventfd_close(efd_id: u32) -> SyscallResult {
187    let mut registry = EVENTFD_REGISTRY.lock();
188    registry
189        .remove(&efd_id)
190        .ok_or(SyscallError::BadFileDescriptor)?;
191    Ok(0)
192}
193
194// ── VfsNode adapter ────────────────────────────────────────────────────
195
196use alloc::{sync::Arc, vec::Vec};
197
198use super::{DirEntry, Metadata, NodeType, Permissions, VfsNode};
199use crate::error::KernelError;
200
201/// VfsNode wrapper around an eventfd instance.
202///
203/// This allows eventfd to be inserted into a process's file table so that
204/// standard read()/write()/close()/epoll work on it. musl's eventfd2()
205/// syscall expects a real file descriptor.
206pub struct EventFdNode {
207    efd_id: u32,
208}
209
210impl EventFdNode {
211    pub fn new(efd_id: u32) -> Self {
212        Self { efd_id }
213    }
214}
215
216impl VfsNode for EventFdNode {
217    fn node_type(&self) -> NodeType {
218        NodeType::CharDevice
219    }
220
221    fn read(&self, _offset: usize, buffer: &mut [u8]) -> Result<usize, KernelError> {
222        if buffer.len() < 8 {
223            return Err(KernelError::InvalidArgument {
224                name: "buflen",
225                value: "must be at least 8 bytes for eventfd",
226            });
227        }
228        let val = eventfd_read(self.efd_id).map_err(|e| match e {
229            SyscallError::WouldBlock => KernelError::WouldBlock,
230            _ => KernelError::FsError(crate::error::FsError::BadFileDescriptor),
231        })?;
232        buffer[..8].copy_from_slice(&val.to_le_bytes());
233        Ok(8)
234    }
235
236    fn write(&self, _offset: usize, data: &[u8]) -> Result<usize, KernelError> {
237        if data.len() < 8 {
238            return Err(KernelError::InvalidArgument {
239                name: "buflen",
240                value: "must be at least 8 bytes for eventfd",
241            });
242        }
243        let val =
244            u64::from_le_bytes(
245                data[..8]
246                    .try_into()
247                    .map_err(|_| KernelError::InvalidArgument {
248                        name: "data",
249                        value: "invalid byte slice for u64",
250                    })?,
251            );
252        eventfd_write(self.efd_id, val).map_err(|e| match e {
253            SyscallError::WouldBlock => KernelError::WouldBlock,
254            SyscallError::InvalidArgument => KernelError::InvalidArgument {
255                name: "value",
256                value: "u64::MAX is not a valid eventfd value",
257            },
258            _ => KernelError::FsError(crate::error::FsError::BadFileDescriptor),
259        })?;
260        Ok(8)
261    }
262
263    fn poll_readiness(&self) -> u16 {
264        let mut events = 0u16;
265        if is_readable(self.efd_id) {
266            events |= 0x0001; // POLLIN
267        }
268        if is_writable(self.efd_id) {
269            events |= 0x0004; // POLLOUT
270        }
271        events
272    }
273
274    fn metadata(&self) -> Result<Metadata, KernelError> {
275        Ok(Metadata {
276            size: 0,
277            node_type: NodeType::CharDevice,
278            permissions: Permissions::from_mode(0o666),
279            uid: 0,
280            gid: 0,
281            created: 0,
282            modified: 0,
283            accessed: 0,
284            inode: 0,
285        })
286    }
287
288    fn readdir(&self) -> Result<Vec<DirEntry>, KernelError> {
289        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
290    }
291
292    fn lookup(&self, _name: &str) -> Result<Arc<dyn VfsNode>, KernelError> {
293        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
294    }
295
296    fn create(
297        &self,
298        _name: &str,
299        _permissions: Permissions,
300    ) -> Result<Arc<dyn VfsNode>, KernelError> {
301        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
302    }
303
304    fn mkdir(
305        &self,
306        _name: &str,
307        _permissions: Permissions,
308    ) -> Result<Arc<dyn VfsNode>, KernelError> {
309        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
310    }
311
312    fn unlink(&self, _name: &str) -> Result<(), KernelError> {
313        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
314    }
315
316    fn truncate(&self, _size: usize) -> Result<(), KernelError> {
317        Err(KernelError::PermissionDenied {
318            operation: "truncate eventfd",
319        })
320    }
321}
322
323impl Drop for EventFdNode {
324    fn drop(&mut self) {
325        let _ = eventfd_close(self.efd_id);
326    }
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332
333    #[test]
334    fn test_eventfd_create_and_read() {
335        // Reset state
336        EVENTFD_REGISTRY.lock().clear();
337
338        let id = eventfd_create(42, 0).unwrap() as u32;
339        let val = eventfd_read(id).unwrap();
340        assert_eq!(val, 42);
341
342        // Counter should be 0 after read
343        assert!(eventfd_read(id).is_err());
344    }
345
346    #[test]
347    fn test_eventfd_semaphore_mode() {
348        EVENTFD_REGISTRY.lock().clear();
349
350        let id = eventfd_create(3, EFD_SEMAPHORE).unwrap() as u32;
351
352        // Each read returns 1 and decrements
353        assert_eq!(eventfd_read(id).unwrap(), 1);
354        assert_eq!(eventfd_read(id).unwrap(), 1);
355        assert_eq!(eventfd_read(id).unwrap(), 1);
356
357        // Now counter is 0
358        assert!(eventfd_read(id).is_err());
359    }
360
361    #[test]
362    fn test_eventfd_write_accumulates() {
363        EVENTFD_REGISTRY.lock().clear();
364
365        let id = eventfd_create(0, 0).unwrap() as u32;
366        eventfd_write(id, 10).unwrap();
367        eventfd_write(id, 20).unwrap();
368
369        let val = eventfd_read(id).unwrap();
370        assert_eq!(val, 30);
371    }
372
373    #[test]
374    fn test_eventfd_close() {
375        EVENTFD_REGISTRY.lock().clear();
376
377        let id = eventfd_create(0, 0).unwrap() as u32;
378        eventfd_close(id).unwrap();
379
380        // Should fail after close
381        assert!(eventfd_read(id).is_err());
382        assert!(eventfd_close(id).is_err());
383    }
384
385    #[test]
386    fn test_eventfd_nonblock_on_empty() {
387        EVENTFD_REGISTRY.lock().clear();
388
389        let id = eventfd_create(0, EFD_NONBLOCK).unwrap() as u32;
390        match eventfd_read(id) {
391            Err(SyscallError::WouldBlock) => {} // expected
392            other => panic!("Expected WouldBlock, got {:?}", other),
393        }
394    }
395
396    #[test]
397    fn test_eventfd_write_overflow() {
398        EVENTFD_REGISTRY.lock().clear();
399
400        let id = eventfd_create(0, EFD_NONBLOCK).unwrap() as u32;
401        // Write near max
402        eventfd_write(id, u64::MAX - 2).unwrap();
403        // This should fail (would overflow past MAX-1)
404        assert!(eventfd_write(id, 2).is_err());
405    }
406
407    #[test]
408    fn test_eventfd_write_max_rejected() {
409        EVENTFD_REGISTRY.lock().clear();
410
411        let id = eventfd_create(0, 0).unwrap() as u32;
412        assert!(eventfd_write(id, u64::MAX).is_err());
413    }
414}