⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/fs/
timerfd.rs

1//! timerfd -- Timer notification file descriptor
2//!
3//! Provides file descriptors that deliver timer expiration notifications,
4//! integrable with epoll/poll. Used by Qt6 for frame pacing and event
5//! loop timeouts, and by KWin for compositor frame scheduling.
6//!
7//! ## Syscall Interface
8//! - `timerfd_create(clockid, flags) -> fd`     (syscall 331)
9//! - `timerfd_settime(fd, flags, new, old) -> 0` (syscall 332)
10//! - `timerfd_gettime(fd, curr) -> 0`            (syscall 333)
11//! - Read via standard `read(2)` on returned fd
12//!
13//! ## Semantics
14//! - **read**: Returns the number of expirations since last read as a u64.
15//!   Returns EAGAIN if no expirations and non-blocking, otherwise blocks.
16//! - Timer resolution is based on kernel uptime (TSC-derived).
17
18#![allow(dead_code)]
19
20use alloc::collections::BTreeMap;
21use core::sync::atomic::{AtomicU64, Ordering};
22
23use spin::Mutex;
24
25use crate::syscall::{SyscallError, SyscallResult};
26
27/// Maximum number of timerfd instances system-wide.
28const MAX_TIMERFD_INSTANCES: usize = 4096;
29
30/// Clock IDs (subset of POSIX clocks).
31pub const CLOCK_REALTIME: u32 = 0;
32pub const CLOCK_MONOTONIC: u32 = 1;
33
34/// TFD_NONBLOCK: Return EAGAIN instead of blocking.
35pub const TFD_NONBLOCK: u32 = 0x800;
36/// TFD_CLOEXEC: Set close-on-exec.
37pub const TFD_CLOEXEC: u32 = 0x80000;
38
39/// TFD_TIMER_ABSTIME: Interpret new_value.it_value as absolute time.
40pub const TFD_TIMER_ABSTIME: u32 = 1;
41
42/// Time specification matching `struct timespec` layout.
43#[repr(C)]
44#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
45pub struct Timespec {
46    pub tv_sec: i64,
47    pub tv_nsec: i64,
48}
49
50impl Timespec {
51    pub fn to_ns(&self) -> u64 {
52        (self.tv_sec as u64)
53            .saturating_mul(1_000_000_000)
54            .saturating_add(self.tv_nsec as u64)
55    }
56
57    pub fn is_zero(&self) -> bool {
58        self.tv_sec == 0 && self.tv_nsec == 0
59    }
60}
61
62/// Timer interval specification matching `struct itimerspec`.
63#[repr(C)]
64#[derive(Debug, Clone, Copy, Default)]
65pub struct Itimerspec {
66    /// Interval for periodic timer (0 = one-shot).
67    pub it_interval: Timespec,
68    /// Initial expiration time.
69    pub it_value: Timespec,
70}
71
72/// Internal timerfd state.
73struct TimerFdInstance {
74    /// Clock type (CLOCK_REALTIME or CLOCK_MONOTONIC).
75    clock_id: u32,
76    /// Whether non-blocking mode is active.
77    nonblock: bool,
78    /// Current timer specification.
79    spec: Itimerspec,
80    /// Absolute expiration time in nanoseconds (monotonic).
81    next_expiry_ns: u64,
82    /// Number of expirations accumulated since last read.
83    expirations: u64,
84    /// Whether the timer is armed.
85    armed: bool,
86    /// Owner process ID.
87    owner_pid: u64,
88}
89
90/// Global registry of timerfd instances.
91static TIMERFD_REGISTRY: Mutex<BTreeMap<u32, TimerFdInstance>> = Mutex::new(BTreeMap::new());
92
93/// Next ID for timerfd allocation.
94static NEXT_TIMERFD_ID: AtomicU64 = AtomicU64::new(1);
95
96/// Get current monotonic time in nanoseconds from kernel uptime.
97fn monotonic_now_ns() -> u64 {
98    // Use the kernel's uptime counter (TSC-based on x86_64)
99    let uptime_ms = crate::timer::get_uptime_ms();
100    uptime_ms.saturating_mul(1_000_000)
101}
102
103/// Create a new timerfd.
104///
105/// # Arguments
106/// - `clockid`: `CLOCK_REALTIME` or `CLOCK_MONOTONIC`.
107/// - `flags`: Combination of `TFD_NONBLOCK`, `TFD_CLOEXEC`.
108///
109/// # Returns
110/// The timerfd ID on success.
111pub fn timerfd_create(clockid: u32, flags: u32) -> SyscallResult {
112    if clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC {
113        return Err(SyscallError::InvalidArgument);
114    }
115
116    let pid = crate::process::current_process()
117        .map(|p| p.pid.0)
118        .unwrap_or(0);
119
120    let nonblock = (flags & TFD_NONBLOCK) != 0;
121
122    let instance = TimerFdInstance {
123        clock_id: clockid,
124        nonblock,
125        spec: Itimerspec::default(),
126        next_expiry_ns: 0,
127        expirations: 0,
128        armed: false,
129        owner_pid: pid,
130    };
131
132    let id = NEXT_TIMERFD_ID.fetch_add(1, Ordering::Relaxed) as u32;
133
134    let mut registry = TIMERFD_REGISTRY.lock();
135    if registry.len() >= MAX_TIMERFD_INSTANCES {
136        return Err(SyscallError::OutOfMemory);
137    }
138    registry.insert(id, instance);
139    Ok(id as usize)
140}
141
142/// Arm or disarm a timerfd.
143///
144/// # Arguments
145/// - `tfd_id`: Timer fd ID.
146/// - `flags`: `TFD_TIMER_ABSTIME` for absolute time.
147/// - `new_spec`: New timer specification.
148///
149/// # Returns
150/// The previous timer specification via `old_spec` (if non-null).
151pub fn timerfd_settime(
152    tfd_id: u32,
153    flags: u32,
154    new_spec: &Itimerspec,
155    old_spec: Option<&mut Itimerspec>,
156) -> SyscallResult {
157    let mut registry = TIMERFD_REGISTRY.lock();
158    let instance = registry
159        .get_mut(&tfd_id)
160        .ok_or(SyscallError::BadFileDescriptor)?;
161
162    // Return old value if requested
163    if let Some(old) = old_spec {
164        *old = instance.spec;
165    }
166
167    instance.spec = *new_spec;
168    instance.expirations = 0;
169
170    if new_spec.it_value.is_zero() {
171        // Disarm the timer
172        instance.armed = false;
173        instance.next_expiry_ns = 0;
174    } else {
175        instance.armed = true;
176        let now = monotonic_now_ns();
177
178        if (flags & TFD_TIMER_ABSTIME) != 0 {
179            // Absolute time
180            instance.next_expiry_ns = new_spec.it_value.to_ns();
181        } else {
182            // Relative time
183            instance.next_expiry_ns = now.saturating_add(new_spec.it_value.to_ns());
184        }
185    }
186
187    Ok(0)
188}
189
190/// Get the current timer specification.
191pub fn timerfd_gettime(tfd_id: u32) -> Result<Itimerspec, SyscallError> {
192    let registry = TIMERFD_REGISTRY.lock();
193    let instance = registry
194        .get(&tfd_id)
195        .ok_or(SyscallError::BadFileDescriptor)?;
196
197    if !instance.armed {
198        return Ok(Itimerspec::default());
199    }
200
201    let now = monotonic_now_ns();
202    let remaining_ns = instance.next_expiry_ns.saturating_sub(now);
203
204    Ok(Itimerspec {
205        it_interval: instance.spec.it_interval,
206        it_value: Timespec {
207            tv_sec: (remaining_ns / 1_000_000_000) as i64,
208            tv_nsec: (remaining_ns % 1_000_000_000) as i64,
209        },
210    })
211}
212
213/// Read from a timerfd -- returns number of expirations since last read.
214///
215/// Checks the timer against current time and accumulates expirations.
216/// If nonblock is set, returns EAGAIN immediately when no expirations.
217/// In blocking mode, busy-waits with scheduler yield until the timer
218/// fires (capped at 30s to prevent permanent hangs).
219pub fn timerfd_read(tfd_id: u32) -> Result<u64, SyscallError> {
220    let start = crate::timer::get_uptime_ms();
221    const MAX_BLOCK_MS: u64 = 30_000;
222
223    loop {
224        let mut registry = TIMERFD_REGISTRY.lock();
225        let instance = registry
226            .get_mut(&tfd_id)
227            .ok_or(SyscallError::BadFileDescriptor)?;
228
229        if !instance.armed {
230            if instance.nonblock {
231                return Err(SyscallError::WouldBlock);
232            }
233            // Timer not armed and blocking -- wait for it to be armed
234            drop(registry);
235            if crate::timer::get_uptime_ms() - start >= MAX_BLOCK_MS {
236                return Err(SyscallError::WouldBlock);
237            }
238            crate::sched::yield_cpu();
239            continue;
240        }
241
242        // Check for expirations against current TSC-based time
243        let now = monotonic_now_ns();
244        if now >= instance.next_expiry_ns {
245            let interval_ns = instance.spec.it_interval.to_ns();
246            if interval_ns > 0 {
247                let elapsed = now - instance.next_expiry_ns;
248                let extra_expirations = elapsed / interval_ns;
249                instance.expirations = instance.expirations.saturating_add(1 + extra_expirations);
250                instance.next_expiry_ns = instance
251                    .next_expiry_ns
252                    .saturating_add((1 + extra_expirations) * interval_ns);
253            } else {
254                instance.expirations = instance.expirations.saturating_add(1);
255                instance.armed = false;
256            }
257        }
258
259        if instance.expirations > 0 {
260            let count = instance.expirations;
261            instance.expirations = 0;
262            return Ok(count);
263        }
264
265        if instance.nonblock {
266            return Err(SyscallError::WouldBlock);
267        }
268
269        // Release lock, yield, and retry
270        drop(registry);
271        if crate::timer::get_uptime_ms() - start >= MAX_BLOCK_MS {
272            return Err(SyscallError::WouldBlock);
273        }
274        crate::sched::yield_cpu();
275    }
276}
277
278/// Query whether a timerfd is readable (timer has expired).
279/// Used by epoll to check readiness without consuming data.
280pub fn is_readable(tfd_id: u32) -> bool {
281    let registry = TIMERFD_REGISTRY.lock();
282    let instance = match registry.get(&tfd_id) {
283        Some(i) => i,
284        None => return false,
285    };
286    if !instance.armed {
287        return false;
288    }
289    let now = monotonic_now_ns();
290    now >= instance.next_expiry_ns || instance.expirations > 0
291}
292
293/// Close (destroy) a timerfd instance.
294pub fn timerfd_close(tfd_id: u32) -> SyscallResult {
295    let mut registry = TIMERFD_REGISTRY.lock();
296    registry
297        .remove(&tfd_id)
298        .ok_or(SyscallError::BadFileDescriptor)?;
299    Ok(0)
300}
301
302// ── VfsNode adapter ────────────────────────────────────────────────────
303
304use alloc::{sync::Arc, vec::Vec};
305
306use super::{DirEntry, Metadata, NodeType, Permissions, VfsNode};
307use crate::error::KernelError;
308
309/// VfsNode wrapper around a timerfd instance.
310///
311/// This allows timerfd to be inserted into a process's file table so that
312/// standard read()/close()/epoll work on it. musl's timerfd_create()
313/// syscall expects a real file descriptor.
314pub struct TimerFdNode {
315    tfd_id: u32,
316}
317
318impl TimerFdNode {
319    pub fn new(tfd_id: u32) -> Self {
320        Self { tfd_id }
321    }
322
323    /// Get the internal timerfd ID (needed for timerfd_settime/gettime).
324    pub fn tfd_id(&self) -> u32 {
325        self.tfd_id
326    }
327}
328
329impl VfsNode for TimerFdNode {
330    fn node_type(&self) -> NodeType {
331        NodeType::CharDevice
332    }
333
334    fn read(&self, _offset: usize, buffer: &mut [u8]) -> Result<usize, KernelError> {
335        if buffer.len() < 8 {
336            return Err(KernelError::InvalidArgument {
337                name: "buflen",
338                value: "must be at least 8 bytes for timerfd",
339            });
340        }
341        let val = timerfd_read(self.tfd_id).map_err(|e| match e {
342            SyscallError::WouldBlock => KernelError::WouldBlock,
343            _ => KernelError::FsError(crate::error::FsError::BadFileDescriptor),
344        })?;
345        buffer[..8].copy_from_slice(&val.to_le_bytes());
346        Ok(8)
347    }
348
349    fn write(&self, _offset: usize, _data: &[u8]) -> Result<usize, KernelError> {
350        // timerfd is not writable via write(2)
351        Err(KernelError::PermissionDenied {
352            operation: "write timerfd",
353        })
354    }
355
356    fn poll_readiness(&self) -> u16 {
357        let mut events = 0u16;
358        if is_readable(self.tfd_id) {
359            events |= 0x0001; // POLLIN
360        }
361        events
362    }
363
364    fn metadata(&self) -> Result<Metadata, KernelError> {
365        Ok(Metadata {
366            size: 0,
367            node_type: NodeType::CharDevice,
368            permissions: Permissions::from_mode(0o666),
369            uid: 0,
370            gid: 0,
371            created: 0,
372            modified: 0,
373            accessed: 0,
374            inode: 0,
375        })
376    }
377
378    fn readdir(&self) -> Result<Vec<DirEntry>, KernelError> {
379        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
380    }
381
382    fn lookup(&self, _name: &str) -> Result<Arc<dyn VfsNode>, KernelError> {
383        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
384    }
385
386    fn create(
387        &self,
388        _name: &str,
389        _permissions: Permissions,
390    ) -> Result<Arc<dyn VfsNode>, KernelError> {
391        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
392    }
393
394    fn mkdir(
395        &self,
396        _name: &str,
397        _permissions: Permissions,
398    ) -> Result<Arc<dyn VfsNode>, KernelError> {
399        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
400    }
401
402    fn unlink(&self, _name: &str) -> Result<(), KernelError> {
403        Err(KernelError::FsError(crate::error::FsError::NotADirectory))
404    }
405
406    fn as_any(&self) -> Option<&dyn core::any::Any> {
407        Some(self)
408    }
409
410    fn truncate(&self, _size: usize) -> Result<(), KernelError> {
411        Err(KernelError::PermissionDenied {
412            operation: "truncate timerfd",
413        })
414    }
415}
416
417impl Drop for TimerFdNode {
418    fn drop(&mut self) {
419        let _ = timerfd_close(self.tfd_id);
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    #[test]
428    fn test_timerfd_create_monotonic() {
429        TIMERFD_REGISTRY.lock().clear();
430
431        let id = timerfd_create(CLOCK_MONOTONIC, 0).unwrap();
432        assert!(id > 0);
433    }
434
435    #[test]
436    fn test_timerfd_create_invalid_clock() {
437        TIMERFD_REGISTRY.lock().clear();
438
439        assert!(timerfd_create(99, 0).is_err());
440    }
441
442    #[test]
443    fn test_timerfd_disarm() {
444        TIMERFD_REGISTRY.lock().clear();
445
446        let id = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK).unwrap() as u32;
447
448        // Arm with 1 second
449        let spec = Itimerspec {
450            it_value: Timespec {
451                tv_sec: 1,
452                tv_nsec: 0,
453            },
454            it_interval: Timespec::default(),
455        };
456        timerfd_settime(id, 0, &spec, None).unwrap();
457
458        // Disarm
459        let zero = Itimerspec::default();
460        timerfd_settime(id, 0, &zero, None).unwrap();
461
462        // Read should fail (disarmed)
463        assert!(timerfd_read(id).is_err());
464    }
465
466    #[test]
467    fn test_timerfd_gettime_disarmed() {
468        TIMERFD_REGISTRY.lock().clear();
469
470        let id = timerfd_create(CLOCK_MONOTONIC, 0).unwrap() as u32;
471        let current = timerfd_gettime(id).unwrap();
472        assert!(current.it_value.is_zero());
473    }
474
475    #[test]
476    fn test_timerfd_close() {
477        TIMERFD_REGISTRY.lock().clear();
478
479        let id = timerfd_create(CLOCK_MONOTONIC, 0).unwrap() as u32;
480        timerfd_close(id).unwrap();
481        assert!(timerfd_gettime(id).is_err());
482    }
483
484    #[test]
485    fn test_timespec_to_ns() {
486        let ts = Timespec {
487            tv_sec: 1,
488            tv_nsec: 500_000_000,
489        };
490        assert_eq!(ts.to_ns(), 1_500_000_000);
491    }
492
493    #[test]
494    fn test_timespec_zero() {
495        let ts = Timespec::default();
496        assert!(ts.is_zero());
497        assert_eq!(ts.to_ns(), 0);
498    }
499}