⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/net/
epoll.rs

1//! epoll I/O multiplexing
2//!
3//! Linux-compatible epoll implementation for event-driven I/O.
4//! Used by Rust's mio/tokio for async I/O on VeridianOS.
5//!
6//! # Architecture
7//!
8//! Each epoll instance is an fd-based object containing:
9//! - An interest list (fds being monitored + event masks)
10//! - A ready list (fds with pending events)
11//!
12//! Supports both level-triggered (default) and edge-triggered modes.
13
14extern crate alloc;
15
16use alloc::collections::BTreeMap;
17
18use spin::Mutex;
19
20use crate::error::KernelError;
21
22/// Maximum number of epoll instances system-wide.
23const MAX_EPOLL_INSTANCES: usize = 256;
24
25/// Maximum number of fds per epoll instance.
26const MAX_EPOLL_FDS: usize = 1024;
27
28// ============================================================================
29// Event flags (matching Linux epoll.h values)
30// ============================================================================
31
32/// Available for read.
33pub const EPOLLIN: u32 = 0x001;
34/// Available for write.
35pub const EPOLLOUT: u32 = 0x004;
36/// Error condition.
37pub const EPOLLERR: u32 = 0x008;
38/// Hang up (peer closed connection).
39pub const EPOLLHUP: u32 = 0x010;
40/// Edge-triggered mode.
41pub const EPOLLET: u32 = 1 << 31;
42/// One-shot mode (disables after first event).
43pub const EPOLLONESHOT: u32 = 1 << 30;
44
45// ============================================================================
46// epoll_ctl operations
47// ============================================================================
48
49/// Add fd to interest list.
50pub const EPOLL_CTL_ADD: u32 = 1;
51/// Remove fd from interest list.
52pub const EPOLL_CTL_DEL: u32 = 2;
53/// Modify events for an fd.
54pub const EPOLL_CTL_MOD: u32 = 3;
55
56// ============================================================================
57// Data structures
58// ============================================================================
59
60/// Event structure passed to/from user space (matches Linux struct
61/// epoll_event).
62#[repr(C)]
63#[derive(Debug, Clone, Copy)]
64pub struct EpollEvent {
65    /// Event flags (EPOLLIN, EPOLLOUT, etc.)
66    pub events: u32,
67    /// User data (typically the fd or a pointer)
68    pub data: u64,
69}
70
71/// Internal entry in the interest list.
72#[derive(Debug, Clone)]
73struct InterestEntry {
74    /// File descriptor being monitored.
75    fd: i32,
76    /// Requested event mask.
77    events: u32,
78    /// User data to return with events.
79    data: u64,
80    /// Whether this entry uses edge-triggered mode.
81    edge_triggered: bool,
82    /// Whether this is a one-shot entry.
83    one_shot: bool,
84    /// Whether a one-shot entry has already fired.
85    disabled: bool,
86}
87
88/// An epoll instance.
89struct EpollInstance {
90    /// Interest list: fd -> entry.
91    interest: BTreeMap<i32, InterestEntry>,
92    /// ID of this instance (for lookup).
93    _id: u32,
94    /// Owning process PID.
95    _owner_pid: u64,
96}
97
98impl EpollInstance {
99    fn new(id: u32, owner_pid: u64) -> Self {
100        Self {
101            interest: BTreeMap::new(),
102            _id: id,
103            _owner_pid: owner_pid,
104        }
105    }
106
107    /// Add an fd to the interest list.
108    fn ctl_add(&mut self, fd: i32, event: &EpollEvent) -> Result<(), KernelError> {
109        if self.interest.contains_key(&fd) {
110            return Err(KernelError::AlreadyExists {
111                resource: "epoll fd entry",
112                id: fd as u64,
113            });
114        }
115        if self.interest.len() >= MAX_EPOLL_FDS {
116            return Err(KernelError::ResourceExhausted {
117                resource: "epoll interest list",
118            });
119        }
120
121        let entry = InterestEntry {
122            fd,
123            events: event.events & !(EPOLLET | EPOLLONESHOT),
124            data: event.data,
125            edge_triggered: event.events & EPOLLET != 0,
126            one_shot: event.events & EPOLLONESHOT != 0,
127            disabled: false,
128        };
129        self.interest.insert(fd, entry);
130        Ok(())
131    }
132
133    /// Remove an fd from the interest list.
134    fn ctl_del(&mut self, fd: i32) -> Result<(), KernelError> {
135        self.interest.remove(&fd).ok_or(KernelError::NotFound {
136            resource: "epoll fd entry",
137            id: fd as u64,
138        })?;
139        Ok(())
140    }
141
142    /// Modify events for an fd.
143    fn ctl_mod(&mut self, fd: i32, event: &EpollEvent) -> Result<(), KernelError> {
144        let entry = self.interest.get_mut(&fd).ok_or(KernelError::NotFound {
145            resource: "epoll fd entry",
146            id: fd as u64,
147        })?;
148
149        entry.events = event.events & !(EPOLLET | EPOLLONESHOT);
150        entry.data = event.data;
151        entry.edge_triggered = event.events & EPOLLET != 0;
152        entry.one_shot = event.events & EPOLLONESHOT != 0;
153        entry.disabled = false;
154        Ok(())
155    }
156
157    /// Poll for ready events. Returns the number of ready fds.
158    ///
159    /// Checks each fd in the interest list against the current fd state
160    /// (via `poll_fd_readiness`). For level-triggered fds, events fire
161    /// every time the condition is true. For edge-triggered, events fire
162    /// only on state transitions (simplified: fire once then require re-arm
163    /// via EPOLL_CTL_MOD).
164    fn poll_events(&mut self, events: &mut [EpollEvent]) -> usize {
165        let max_events = events.len();
166        let mut count = 0;
167
168        for entry in self.interest.values_mut() {
169            if count >= max_events {
170                break;
171            }
172            if entry.disabled {
173                continue;
174            }
175
176            let ready = poll_fd_readiness(entry.fd);
177            let matched = ready & entry.events;
178
179            if matched != 0 {
180                events[count] = EpollEvent {
181                    events: matched,
182                    data: entry.data,
183                };
184                count += 1;
185
186                if entry.one_shot {
187                    entry.disabled = true;
188                }
189            }
190        }
191
192        count
193    }
194}
195
196// ============================================================================
197// Global epoll registry
198// ============================================================================
199
200/// Global registry of all epoll instances.
201static EPOLL_REGISTRY: Mutex<Option<EpollRegistry>> = Mutex::new(None);
202
203struct EpollRegistry {
204    instances: BTreeMap<u32, EpollInstance>,
205    next_id: u32,
206}
207
208impl EpollRegistry {
209    fn new() -> Self {
210        Self {
211            instances: BTreeMap::new(),
212            next_id: 1,
213        }
214    }
215}
216
217/// Initialize the epoll subsystem.
218pub fn init() -> Result<(), KernelError> {
219    let mut reg = EPOLL_REGISTRY.lock();
220    if reg.is_some() {
221        return Ok(());
222    }
223    *reg = Some(EpollRegistry::new());
224    Ok(())
225}
226
227// ============================================================================
228// Public API (called from syscall handlers)
229// ============================================================================
230
231/// Create a new epoll instance. Returns the epoll ID (used as a pseudo-fd).
232pub fn epoll_create(owner_pid: u64) -> Result<u32, KernelError> {
233    let mut reg_guard = EPOLL_REGISTRY.lock();
234    let reg = reg_guard
235        .as_mut()
236        .ok_or(KernelError::NotInitialized { subsystem: "epoll" })?;
237
238    if reg.instances.len() >= MAX_EPOLL_INSTANCES {
239        return Err(KernelError::ResourceExhausted {
240            resource: "epoll instances",
241        });
242    }
243
244    let id = reg.next_id;
245    reg.next_id += 1;
246    reg.instances.insert(id, EpollInstance::new(id, owner_pid));
247    Ok(id)
248}
249
250/// Perform a control operation on an epoll instance.
251pub fn epoll_ctl(
252    epoll_id: u32,
253    op: u32,
254    fd: i32,
255    event: Option<&EpollEvent>,
256) -> Result<(), KernelError> {
257    let mut reg_guard = EPOLL_REGISTRY.lock();
258    let reg = reg_guard
259        .as_mut()
260        .ok_or(KernelError::NotInitialized { subsystem: "epoll" })?;
261
262    let instance = reg
263        .instances
264        .get_mut(&epoll_id)
265        .ok_or(KernelError::NotFound {
266            resource: "epoll instance",
267            id: epoll_id as u64,
268        })?;
269
270    match op {
271        EPOLL_CTL_ADD => {
272            let ev = event.ok_or(KernelError::InvalidArgument {
273                name: "event",
274                value: "required for EPOLL_CTL_ADD",
275            })?;
276            instance.ctl_add(fd, ev)
277        }
278        EPOLL_CTL_DEL => instance.ctl_del(fd),
279        EPOLL_CTL_MOD => {
280            let ev = event.ok_or(KernelError::InvalidArgument {
281                name: "event",
282                value: "required for EPOLL_CTL_MOD",
283            })?;
284            instance.ctl_mod(fd, ev)
285        }
286        _ => Err(KernelError::InvalidArgument {
287            name: "op",
288            value: "invalid epoll_ctl operation",
289        }),
290    }
291}
292
293/// Wait for events on an epoll instance.
294///
295/// Returns the number of ready events written to `events`.
296/// If `timeout_ms` is 0, returns immediately (non-blocking poll).
297/// If `timeout_ms` is -1, waits up to 30s (capped to prevent permanent hangs).
298/// Otherwise waits up to `timeout_ms` milliseconds.
299pub fn epoll_wait(
300    epoll_id: u32,
301    events: &mut [EpollEvent],
302    timeout_ms: i32,
303) -> Result<usize, KernelError> {
304    let start = crate::timer::get_uptime_ms();
305    // Cap infinite wait to 30 seconds to prevent permanent hangs
306    let max_wait_ms: u64 = if timeout_ms < 0 {
307        30_000
308    } else {
309        timeout_ms as u64
310    };
311
312    loop {
313        let count = {
314            let mut reg_guard = EPOLL_REGISTRY.lock();
315            let reg = reg_guard
316                .as_mut()
317                .ok_or(KernelError::NotInitialized { subsystem: "epoll" })?;
318
319            let instance = reg
320                .instances
321                .get_mut(&epoll_id)
322                .ok_or(KernelError::NotFound {
323                    resource: "epoll instance",
324                    id: epoll_id as u64,
325                })?;
326
327            instance.poll_events(events)
328        }; // Drop lock before yielding
329
330        if count > 0 || timeout_ms == 0 {
331            return Ok(count);
332        }
333
334        if crate::timer::get_uptime_ms() - start >= max_wait_ms {
335            return Ok(0);
336        }
337
338        crate::sched::yield_cpu();
339    }
340}
341
342/// Destroy an epoll instance.
343pub fn epoll_destroy(epoll_id: u32) -> Result<(), KernelError> {
344    let mut reg_guard = EPOLL_REGISTRY.lock();
345    let reg = reg_guard
346        .as_mut()
347        .ok_or(KernelError::NotInitialized { subsystem: "epoll" })?;
348
349    reg.instances
350        .remove(&epoll_id)
351        .ok_or(KernelError::NotFound {
352            resource: "epoll instance",
353            id: epoll_id as u64,
354        })?;
355    Ok(())
356}
357
358// ============================================================================
359// Internal: fd readiness polling
360// ============================================================================
361
362/// Query the readiness of a file descriptor.
363///
364/// Checks the kernel's fd state (pipe buffers, socket receive queues, etc.)
365/// and returns the matching event flags. Also checks special fd types
366/// (eventfd, timerfd, signalfd) which use pseudo-fd IDs from their own
367/// registries.
368fn poll_fd_readiness(fd: i32) -> u32 {
369    // All fd types (eventfd, timerfd, signalfd, pipes, sockets, files) are
370    // now VfsNode-backed in the process file table. poll_readiness() on
371    // each VfsNode handles type-specific readiness checking.
372    let proc = match crate::process::current_process() {
373        Some(p) => p,
374        None => return EPOLLERR,
375    };
376
377    let file_table = proc.file_table.lock();
378    let file = match file_table.get(fd as usize) {
379        Some(f) => f,
380        None => return EPOLLERR | EPOLLHUP,
381    };
382
383    // Use VfsNode::poll_readiness() for actual buffer state checking.
384    // Maps POLL* flags (u16) to EPOLL* flags (u32) -- same bit positions.
385    let readiness = file.node.poll_readiness() as u32;
386    let mut ready = 0u32;
387    if readiness & 0x0001 != 0 {
388        ready |= EPOLLIN;
389    }
390    if readiness & 0x0004 != 0 {
391        ready |= EPOLLOUT;
392    }
393    if readiness & 0x0008 != 0 {
394        ready |= EPOLLERR;
395    }
396    if readiness & 0x0010 != 0 {
397        ready |= EPOLLHUP;
398    }
399
400    ready
401}
402
403// ============================================================================
404// VfsNode adapter -- allows epoll fd to live in process file table
405// ============================================================================
406
407use alloc::{sync::Arc, vec::Vec};
408
409use crate::fs::{DirEntry, Metadata, NodeType, Permissions, VfsNode};
410
411/// VfsNode wrapper around an epoll instance.
412///
413/// Allows epoll_create1() to return a real file descriptor. musl expects
414/// to be able to close() the epoll fd. read()/write() are not supported.
415pub struct EpollNode {
416    epoll_id: u32,
417}
418
419impl EpollNode {
420    pub fn new(epoll_id: u32) -> Self {
421        Self { epoll_id }
422    }
423
424    /// Get the internal epoll ID (for epoll_ctl/epoll_wait).
425    pub fn epoll_id(&self) -> u32 {
426        self.epoll_id
427    }
428}
429
430impl VfsNode for EpollNode {
431    fn node_type(&self) -> NodeType {
432        NodeType::CharDevice
433    }
434
435    fn read(&self, _offset: usize, _buffer: &mut [u8]) -> Result<usize, KernelError> {
436        Err(KernelError::PermissionDenied {
437            operation: "read epoll",
438        })
439    }
440
441    fn write(&self, _offset: usize, _data: &[u8]) -> Result<usize, KernelError> {
442        Err(KernelError::PermissionDenied {
443            operation: "write epoll",
444        })
445    }
446
447    fn as_any(&self) -> Option<&dyn core::any::Any> {
448        Some(self)
449    }
450
451    fn metadata(&self) -> Result<Metadata, KernelError> {
452        Ok(Metadata {
453            size: 0,
454            node_type: NodeType::CharDevice,
455            permissions: Permissions::from_mode(0o666),
456            uid: 0,
457            gid: 0,
458            created: 0,
459            modified: 0,
460            accessed: 0,
461            inode: 0,
462        })
463    }
464
465    fn readdir(&self) -> Result<Vec<DirEntry>, KernelError> {
466        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
467    }
468
469    fn lookup(&self, _name: &str) -> Result<Arc<dyn VfsNode>, KernelError> {
470        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
471    }
472
473    fn create(
474        &self,
475        _name: &str,
476        _permissions: Permissions,
477    ) -> Result<Arc<dyn VfsNode>, KernelError> {
478        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
479    }
480
481    fn mkdir(
482        &self,
483        _name: &str,
484        _permissions: Permissions,
485    ) -> Result<Arc<dyn VfsNode>, KernelError> {
486        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
487    }
488
489    fn unlink(&self, _name: &str) -> Result<(), KernelError> {
490        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
491    }
492
493    fn truncate(&self, _size: usize) -> Result<(), KernelError> {
494        Err(KernelError::PermissionDenied {
495            operation: "truncate epoll",
496        })
497    }
498}
499
500impl Drop for EpollNode {
501    fn drop(&mut self) {
502        let _ = epoll_destroy(self.epoll_id);
503    }
504}