⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/fs/
blockfs.rs

1//! Block-based persistent filesystem (BlockFS)
2//!
3//! A simple ext2-like filesystem with:
4//! - Superblock with metadata
5//! - Inode table for file/directory metadata
6//! - Block allocation bitmap
7//! - Data blocks for file content
8
9// Allow dead code for filesystem methods not yet called from higher layers
10#![allow(
11    dead_code,
12    clippy::manual_div_ceil,
13    clippy::slow_vector_initialization,
14    clippy::manual_saturating_arithmetic,
15    clippy::implicit_saturating_sub
16)]
17
18use alloc::{
19    collections::BTreeSet,
20    string::{String, ToString},
21    sync::Arc,
22    vec,
23    vec::Vec,
24};
25use core::mem::size_of;
26
27use spin::Mutex;
28#[cfg(not(target_arch = "aarch64"))]
29use spin::RwLock;
30
31#[cfg(target_arch = "aarch64")]
32use super::bare_lock::RwLock;
33use super::{DirEntry, Filesystem, Metadata, NodeType, Permissions, VfsNode};
34use crate::error::{FsError, KernelError};
35
36/// Block size (4KB)
37pub const BLOCK_SIZE: usize = 4096;
38
39/// Number of direct block pointers in a DiskInode
40pub const DIRECT_BLOCKS: usize = 12;
41
42/// Number of block pointers that fit in one indirect block (4096 / 4 = 1024)
43pub const PTRS_PER_BLOCK: usize = BLOCK_SIZE / size_of::<u32>();
44
45/// Maximum file size addressable via direct blocks only: 12 * 4KB = 48KB
46pub const DIRECT_MAX_BLOCKS: usize = DIRECT_BLOCKS;
47
48/// Maximum file size addressable via direct + single indirect:
49/// 12 + 1024 = 1036 blocks = ~4MB
50pub const SINGLE_INDIRECT_MAX_BLOCKS: usize = DIRECT_BLOCKS + PTRS_PER_BLOCK;
51
52/// Maximum file size addressable via direct + single + double indirect:
53/// 12 + 1024 + 1024*1024 = 1_049_612 blocks = ~4GB
54pub const DOUBLE_INDIRECT_MAX_BLOCKS: usize =
55    DIRECT_BLOCKS + PTRS_PER_BLOCK + PTRS_PER_BLOCK * PTRS_PER_BLOCK;
56
57/// Magic number for BlockFS
58pub const BLOCKFS_MAGIC: u32 = 0x424C4B46; // "BLKF"
59
60/// Maximum filename length
61pub const MAX_FILENAME_LEN: usize = 255;
62
63/// Number of 512-byte virtio sectors per 4KB BlockFS block
64const SECTORS_PER_BLOCK: usize = BLOCK_SIZE / 512;
65
66/// On-disk superblock lives in block 0
67const SUPERBLOCK_BLOCK: u32 = 0;
68
69/// Serialized superblock size in bytes (fixed layout, LE)
70const SUPERBLOCK_SERIALIZED_SIZE: usize = 62;
71
72/// On-disk DiskInode size (96 bytes, repr(C), no padding gaps)
73const DISK_INODE_SIZE: usize = 96;
74
75/// Number of DiskInodes that fit in one 4KB block
76const INODES_PER_BLOCK: usize = BLOCK_SIZE / DISK_INODE_SIZE; // 42
77
78/// Compute number of blocks needed for the block bitmap.
79/// Each byte covers 8 blocks, each block is 4096 bytes = 32768 bits.
80fn bitmap_blocks(total_blocks: u32) -> u32 {
81    let bits_needed = total_blocks as usize;
82    let bytes_needed = (bits_needed + 7) / 8;
83    ((bytes_needed + BLOCK_SIZE - 1) / BLOCK_SIZE) as u32
84}
85
86/// Compute number of blocks needed for the inode table.
87fn inode_table_blocks(inode_count: u32) -> u32 {
88    ((inode_count as usize * DISK_INODE_SIZE + BLOCK_SIZE - 1) / BLOCK_SIZE) as u32
89}
90
91/// Compute first_data_block from total_blocks and inode_count.
92/// Layout: [superblock(1)] [bitmap(N)] [inode_table(M)] [data...]
93fn computed_first_data_block(total_blocks: u32, inode_count: u32) -> u32 {
94    1 + bitmap_blocks(total_blocks) + inode_table_blocks(inode_count)
95}
96
97// ---------------------------------------------------------------------------
98// Disk backend trait -- abstracts block-level I/O for persistence
99// ---------------------------------------------------------------------------
100
101/// Trait for a block-level disk backend that BlockFS can sync to.
102///
103/// Operates on BlockFS-sized blocks (4KB). Implementations are responsible for
104/// translating to the underlying device's sector size (typically 512 bytes).
105pub trait DiskBackend: Send + Sync {
106    /// Read a single 4KB block from the disk.
107    ///
108    /// `block_num` is the 0-based BlockFS block index.
109    /// `buf` must be at least `BLOCK_SIZE` (4096) bytes.
110    fn read_block(&self, block_num: u64, buf: &mut [u8]) -> Result<(), KernelError>;
111
112    /// Write a single 4KB block to the disk.
113    ///
114    /// `block_num` is the 0-based BlockFS block index.
115    /// `data` must be at least `BLOCK_SIZE` (4096) bytes.
116    fn write_block(&self, block_num: u64, data: &[u8]) -> Result<(), KernelError>;
117
118    /// Total capacity in BlockFS-sized blocks (4KB each).
119    fn block_count(&self) -> u64;
120
121    /// Whether the device is read-only.
122    fn is_read_only(&self) -> bool;
123}
124
125/// Adapter that wraps the global virtio-blk device as a `DiskBackend`.
126///
127/// Translates 4KB BlockFS blocks into 512-byte virtio sector reads/writes.
128pub struct VirtioBlockBackend;
129
130impl DiskBackend for VirtioBlockBackend {
131    fn read_block(&self, block_num: u64, buf: &mut [u8]) -> Result<(), KernelError> {
132        if buf.len() < BLOCK_SIZE {
133            return Err(KernelError::InvalidArgument {
134                name: "buf",
135                value: "buffer must be at least 4096 bytes",
136            });
137        }
138
139        let device_lock =
140            crate::drivers::virtio::blk::get_device().ok_or(KernelError::NotInitialized {
141                subsystem: "virtio-blk",
142            })?;
143        let mut device = device_lock.lock();
144
145        let base_sector = block_num * SECTORS_PER_BLOCK as u64;
146        for i in 0..SECTORS_PER_BLOCK {
147            let sector = base_sector + i as u64;
148            let offset = i * 512;
149            device.read_block(sector, &mut buf[offset..offset + 512])?;
150        }
151
152        Ok(())
153    }
154
155    fn write_block(&self, block_num: u64, data: &[u8]) -> Result<(), KernelError> {
156        if data.len() < BLOCK_SIZE {
157            return Err(KernelError::InvalidArgument {
158                name: "data",
159                value: "data must be at least 4096 bytes",
160            });
161        }
162
163        let device_lock =
164            crate::drivers::virtio::blk::get_device().ok_or(KernelError::NotInitialized {
165                subsystem: "virtio-blk",
166            })?;
167        let mut device = device_lock.lock();
168
169        let base_sector = block_num * SECTORS_PER_BLOCK as u64;
170        for i in 0..SECTORS_PER_BLOCK {
171            let sector = base_sector + i as u64;
172            let offset = i * 512;
173            device.write_block(sector, &data[offset..offset + 512])?;
174        }
175
176        Ok(())
177    }
178
179    fn block_count(&self) -> u64 {
180        match crate::drivers::virtio::blk::get_device() {
181            Some(lock) => {
182                let device = lock.lock();
183                device.capacity_sectors() / SECTORS_PER_BLOCK as u64
184            }
185            None => 0,
186        }
187    }
188
189    fn is_read_only(&self) -> bool {
190        match crate::drivers::virtio::blk::get_device() {
191            Some(lock) => {
192                let device = lock.lock();
193                device.is_read_only()
194            }
195            None => true, // No device = effectively read-only
196        }
197    }
198}
199
200/// Superblock structure
201#[repr(C)]
202#[derive(Debug, Clone, Copy)]
203pub struct Superblock {
204    pub magic: u32,
205    pub block_count: u32,
206    pub inode_count: u32,
207    pub free_blocks: u32,
208    pub free_inodes: u32,
209    pub first_data_block: u32,
210    pub block_size: u32,
211    pub inode_size: u16,
212    pub blocks_per_group: u32,
213    pub inodes_per_group: u32,
214    pub mount_time: u64,
215    pub write_time: u64,
216    pub mount_count: u16,
217    pub max_mount_count: u16,
218    pub state: u16,
219    pub errors: u16,
220}
221
222impl Superblock {
223    pub fn new(block_count: u32, inode_count: u32) -> Self {
224        let first_data = computed_first_data_block(block_count, inode_count);
225        Self {
226            magic: BLOCKFS_MAGIC,
227            block_count,
228            inode_count,
229            free_blocks: block_count.saturating_sub(first_data),
230            free_inodes: inode_count - 1, // Reserve root inode
231            first_data_block: first_data,
232            block_size: BLOCK_SIZE as u32,
233            inode_size: size_of::<DiskInode>() as u16,
234            blocks_per_group: 8192,
235            inodes_per_group: 2048,
236            mount_time: 0,
237            write_time: 0,
238            mount_count: 0,
239            max_mount_count: 100,
240            state: 1, // Clean
241            errors: 0,
242        }
243    }
244
245    pub fn is_valid(&self) -> bool {
246        self.magic == BLOCKFS_MAGIC
247    }
248}
249
250/// On-disk inode structure
251#[repr(C)]
252#[derive(Debug, Clone, Copy)]
253pub struct DiskInode {
254    pub mode: u16,
255    pub uid: u16,
256    pub size: u32,
257    pub atime: u32,
258    pub ctime: u32,
259    pub mtime: u32,
260    pub dtime: u32,
261    pub gid: u16,
262    pub links_count: u16,
263    pub blocks: u32,
264    pub flags: u32,
265    pub direct_blocks: [u32; 12],
266    pub indirect_block: u32,
267    pub double_indirect_block: u32,
268    pub triple_indirect_block: u32,
269}
270
271impl DiskInode {
272    pub fn new(mode: u16, uid: u16, gid: u16) -> Self {
273        Self {
274            mode,
275            uid,
276            gid,
277            size: 0,
278            atime: 0,
279            ctime: 0,
280            mtime: 0,
281            dtime: 0,
282            links_count: 1,
283            blocks: 0,
284            flags: 0,
285            direct_blocks: [0; 12],
286            indirect_block: 0,
287            double_indirect_block: 0,
288            triple_indirect_block: 0,
289        }
290    }
291
292    pub fn is_dir(&self) -> bool {
293        (self.mode & 0x4000) != 0
294    }
295
296    pub fn is_file(&self) -> bool {
297        (self.mode & 0x8000) != 0
298    }
299
300    pub fn is_symlink(&self) -> bool {
301        (self.mode & 0xA000) == 0xA000
302    }
303
304    pub fn node_type(&self) -> NodeType {
305        if self.is_dir() {
306            NodeType::Directory
307        } else if self.is_symlink() {
308            NodeType::Symlink
309        } else {
310            NodeType::File
311        }
312    }
313}
314
315/// Size of the fixed header in a DiskDirEntry (inode + rec_len + name_len +
316/// file_type)
317pub const DIR_ENTRY_HEADER_SIZE: usize = 8;
318
319/// On-disk directory entry (ext2-style variable-length record)
320///
321/// Layout:
322///   - inode:     4 bytes (inode number, 0 = deleted entry)
323///   - rec_len:   2 bytes (total record length, always 4-byte aligned)
324///   - name_len:  1 byte  (actual name length)
325///   - file_type: 1 byte  (1=file, 2=directory)
326///   - name:      up to 255 bytes
327#[repr(C)]
328#[derive(Debug, Clone, Copy)]
329pub struct DiskDirEntry {
330    pub inode: u32,
331    pub rec_len: u16,
332    pub name_len: u8,
333    pub file_type: u8,
334    pub name: [u8; 255],
335}
336
337impl DiskDirEntry {
338    /// File type constant for regular files
339    pub const FT_REG_FILE: u8 = 1;
340    /// File type constant for directories
341    pub const FT_DIR: u8 = 2;
342    /// File type constant for symlinks
343    pub const FT_SYMLINK: u8 = 7;
344
345    /// Create a new directory entry
346    pub fn new(inode: u32, name: &str, file_type: u8) -> Self {
347        let name_bytes = name.as_bytes();
348        let name_len = name_bytes.len().min(MAX_FILENAME_LEN) as u8;
349        let rec_len = align4(DIR_ENTRY_HEADER_SIZE + name_len as usize) as u16;
350
351        let mut entry = Self {
352            inode,
353            rec_len,
354            name_len,
355            file_type,
356            name: [0u8; 255],
357        };
358
359        let copy_len = name_len as usize;
360        entry.name[..copy_len].copy_from_slice(&name_bytes[..copy_len]);
361        entry
362    }
363
364    /// Get the name as a string slice
365    pub fn name_str(&self) -> &str {
366        let slice = &self.name[..self.name_len as usize];
367        core::str::from_utf8(slice).unwrap_or("")
368    }
369
370    /// Convert file_type to NodeType
371    pub fn node_type(&self) -> NodeType {
372        match self.file_type {
373            Self::FT_DIR => NodeType::Directory,
374            Self::FT_SYMLINK => NodeType::Symlink,
375            _ => NodeType::File,
376        }
377    }
378}
379
380/// Align a value up to the next 4-byte boundary
381fn align4(val: usize) -> usize {
382    (val + 3) & !3
383}
384
385/// Block allocation bitmap
386pub struct BlockBitmap {
387    bitmap: Vec<u8>,
388    total_blocks: usize,
389}
390
391impl BlockBitmap {
392    pub fn new(total_blocks: usize) -> Self {
393        let bitmap_size = (total_blocks + 7) / 8;
394        let mut bitmap = Vec::new();
395        bitmap.resize(bitmap_size, 0);
396
397        Self {
398            bitmap,
399            total_blocks,
400        }
401    }
402
403    pub fn allocate_block(&mut self) -> Option<u32> {
404        for (byte_idx, byte) in self.bitmap.iter_mut().enumerate() {
405            if *byte != 0xFF {
406                for bit in 0..8 {
407                    if (*byte & (1 << bit)) == 0 {
408                        *byte |= 1 << bit;
409                        let block_num = (byte_idx * 8 + bit) as u32;
410                        if (block_num as usize) < self.total_blocks {
411                            return Some(block_num);
412                        }
413                    }
414                }
415            }
416        }
417        None
418    }
419
420    pub fn free_block(&mut self, block: u32) {
421        let byte_idx = (block / 8) as usize;
422        let bit = (block % 8) as usize;
423        if byte_idx < self.bitmap.len() {
424            self.bitmap[byte_idx] &= !(1 << bit);
425        }
426    }
427
428    pub fn is_allocated(&self, block: u32) -> bool {
429        let byte_idx = (block / 8) as usize;
430        let bit = (block % 8) as usize;
431        if byte_idx < self.bitmap.len() {
432            (self.bitmap[byte_idx] & (1 << bit)) != 0
433        } else {
434            false
435        }
436    }
437}
438
439/// BlockFS node implementation
440pub struct BlockFsNode {
441    inode_num: u32,
442    fs: Arc<RwLock<BlockFsInner>>,
443}
444
445impl BlockFsNode {
446    pub fn new(inode_num: u32, fs: Arc<RwLock<BlockFsInner>>) -> Self {
447        Self { inode_num, fs }
448    }
449}
450
451impl VfsNode for BlockFsNode {
452    fn node_type(&self) -> NodeType {
453        self.metadata()
454            .map(|m| m.node_type)
455            .unwrap_or(NodeType::File)
456    }
457
458    fn read(&self, offset: usize, buffer: &mut [u8]) -> Result<usize, KernelError> {
459        let fs = self.fs.read();
460        fs.read_inode(self.inode_num, offset, buffer)
461    }
462
463    fn write(&self, offset: usize, data: &[u8]) -> Result<usize, KernelError> {
464        let mut fs = self.fs.write();
465        fs.write_inode(self.inode_num, offset, data)
466    }
467
468    fn metadata(&self) -> Result<Metadata, KernelError> {
469        let fs = self.fs.read();
470        fs.get_metadata(self.inode_num)
471    }
472
473    fn readdir(&self) -> Result<Vec<DirEntry>, KernelError> {
474        let fs = self.fs.read();
475        fs.readdir(self.inode_num)
476    }
477
478    fn lookup(&self, name: &str) -> Result<Arc<dyn VfsNode>, KernelError> {
479        let fs = self.fs.read();
480        let child_inode = fs.lookup_in_dir(self.inode_num, name)?;
481        Ok(Arc::new(BlockFsNode::new(child_inode, self.fs.clone())))
482    }
483
484    fn create(
485        &self,
486        name: &str,
487        permissions: Permissions,
488    ) -> Result<Arc<dyn VfsNode>, KernelError> {
489        let mut fs = self.fs.write();
490        let new_inode = fs.create_file(self.inode_num, name, permissions)?;
491        Ok(Arc::new(BlockFsNode::new(new_inode, self.fs.clone())))
492    }
493
494    fn mkdir(&self, name: &str, permissions: Permissions) -> Result<Arc<dyn VfsNode>, KernelError> {
495        let mut fs = self.fs.write();
496        let new_inode = fs.create_directory(self.inode_num, name, permissions)?;
497        Ok(Arc::new(BlockFsNode::new(new_inode, self.fs.clone())))
498    }
499
500    fn unlink(&self, name: &str) -> Result<(), KernelError> {
501        let mut fs = self.fs.write();
502        fs.unlink_from_dir(self.inode_num, name)
503    }
504
505    fn truncate(&self, size: usize) -> Result<(), KernelError> {
506        let mut fs = self.fs.write();
507        fs.truncate_inode(self.inode_num, size)
508    }
509
510    fn symlink(&self, name: &str, target: &str) -> Result<Arc<dyn VfsNode>, KernelError> {
511        let mut fs = self.fs.write();
512        let new_inode = fs.create_symlink(self.inode_num, name, target)?;
513        Ok(Arc::new(BlockFsNode::new(new_inode, self.fs.clone())))
514    }
515
516    /// Read the target of a symbolic link in BlockFS.
517    ///
518    /// Delegates to `BlockFsInner::read_symlink` which checks whether
519    /// this inode has the symlink mode bit set (0xA000) and, if so, reads
520    /// the target path from the inode's data blocks.
521    ///
522    /// Returns `FsError::NotASymlink` if this node is not a symlink.
523    fn readlink(&self) -> Result<String, KernelError> {
524        let fs = self.fs.read();
525        fs.read_symlink(self.inode_num)
526    }
527
528    fn chmod(&self, permissions: Permissions) -> Result<(), KernelError> {
529        let mut fs = self.fs.write();
530        fs.chmod_inode(self.inode_num, permissions)
531    }
532
533    fn link(&self, name: &str, target: Arc<dyn VfsNode>) -> Result<(), KernelError> {
534        let mut fs = self.fs.write();
535        fs.link_in_dir(self.inode_num, name, target)
536    }
537}
538
539/// Internal BlockFS state
540pub struct BlockFsInner {
541    superblock: Superblock,
542    block_bitmap: BlockBitmap,
543    inode_table: Vec<DiskInode>,
544    block_data: Vec<Vec<u8>>, // In-memory block storage (RAM cache)
545    /// Set of block indices that have been modified since the last sync.
546    /// Used to track which blocks need to be written to the disk backend.
547    dirty_blocks: BTreeSet<usize>,
548    /// Optional disk backend for persistence. When `Some`, `sync()` writes
549    /// dirty blocks to this device. When `None`, BlockFS operates as a pure
550    /// RAM filesystem (all data lost on reboot).
551    disk: Option<Arc<Mutex<dyn DiskBackend>>>,
552}
553
554impl BlockFsInner {
555    pub fn new(block_count: u32, inode_count: u32) -> Self {
556        let first_data = computed_first_data_block(block_count, inode_count);
557        let mut superblock = Superblock::new(block_count, inode_count);
558        superblock.first_data_block = first_data;
559        superblock.free_blocks = block_count.saturating_sub(first_data);
560
561        let mut block_bitmap = BlockBitmap::new(block_count as usize);
562
563        // Mark metadata blocks (0..first_data_block) as allocated
564        for _b in 0..first_data {
565            block_bitmap.allocate_block();
566        }
567
568        let mut inode_table = Vec::new();
569        inode_table.resize(inode_count as usize, DiskInode::new(0, 0, 0));
570
571        // Initialize root directory (inode 0)
572        // links_count = 2: one for itself (".") and one from the parent (root is its
573        // own parent)
574        let mut root_inode = DiskInode::new(0x41ED, 0, 0); // Directory, rwxr-xr-x
575        root_inode.links_count = 2;
576        inode_table[0] = root_inode;
577
578        // Initialize block storage (sparse -- blocks materialized on first write)
579        let mut block_data = Vec::with_capacity(block_count as usize);
580        for _ in 0..block_count {
581            block_data.push(Vec::new());
582        }
583
584        let mut fs = Self {
585            superblock,
586            block_bitmap,
587            inode_table,
588            block_data,
589            dirty_blocks: BTreeSet::new(),
590            disk: None,
591        };
592
593        // Create "." and ".." entries in the root directory (both point to inode 0)
594        if let Err(_e) = fs.write_dir_entry(0, 0, ".", DiskDirEntry::FT_DIR) {
595            crate::println!(
596                "[BLOCKFS] Warning: failed to create '.' root dir entry: {:?}",
597                _e
598            );
599        }
600        if let Err(_e) = fs.write_dir_entry(0, 0, "..", DiskDirEntry::FT_DIR) {
601            crate::println!(
602                "[BLOCKFS] Warning: failed to create '..' root dir entry: {:?}",
603                _e
604            );
605        }
606
607        fs
608    }
609
610    fn allocate_inode(&mut self) -> Option<u32> {
611        for (idx, inode) in self.inode_table.iter().enumerate() {
612            if inode.links_count == 0 && idx > 0 {
613                // Don't allocate root
614                self.superblock.free_inodes -= 1;
615                return Some(idx as u32);
616            }
617        }
618        None
619    }
620
621    fn allocate_block(&mut self) -> Option<u32> {
622        let block = self.block_bitmap.allocate_block()?;
623        self.superblock.free_blocks -= 1;
624        self.materialize_block(block as usize);
625        Some(block)
626    }
627
628    fn free_block(&mut self, block: u32) {
629        self.block_bitmap.free_block(block);
630        self.superblock.free_blocks += 1;
631        // Freed blocks no longer need syncing (data is logically gone)
632        self.dirty_blocks.remove(&(block as usize));
633    }
634
635    /// Mark a block as dirty so it will be written to the disk backend on sync.
636    fn mark_dirty(&mut self, block_num: u32) {
637        self.dirty_blocks.insert(block_num as usize);
638    }
639
640    /// Ensure the block at `idx` is materialized (has 4KB allocated).
641    /// Called before any write to a block. No-op if already materialized.
642    fn materialize_block(&mut self, idx: usize) {
643        if idx < self.block_data.len() && self.block_data[idx].is_empty() {
644            self.block_data[idx] = vec![0u8; BLOCK_SIZE];
645        }
646    }
647
648    /// Get a read-only reference to a block's data.
649    /// Returns a reference to the shared zero block for unmaterialized entries,
650    /// avoiding the need to allocate memory for blocks that have never been
651    /// written.
652    fn block_ref(&self, idx: usize) -> &[u8] {
653        if idx < self.block_data.len() && !self.block_data[idx].is_empty() {
654            &self.block_data[idx]
655        } else {
656            &ZERO_BLOCK
657        }
658    }
659
660    /// Sync all dirty blocks and metadata to the disk backend.
661    ///
662    /// If no disk backend is configured, this is a no-op. Returns the number
663    /// of blocks synced on success.
664    fn sync_to_disk(&mut self) -> Result<usize, KernelError> {
665        let disk = match self.disk {
666            Some(ref d) => d.clone(),
667            None => return Ok(0), // No backend -- pure RAM mode
668        };
669
670        let backend = disk.lock();
671        if backend.is_read_only() {
672            return Err(KernelError::FsError(FsError::ReadOnly));
673        }
674
675        let device_blocks = backend.block_count();
676        let mut synced = 0usize;
677
678        // Write all dirty data blocks
679        let dirty: Vec<usize> = self.dirty_blocks.iter().copied().collect();
680        for block_idx in &dirty {
681            if *block_idx >= self.block_data.len() {
682                continue; // Skip invalid indices
683            }
684            if (*block_idx as u64) >= device_blocks {
685                // Block beyond device capacity -- skip but warn
686                crate::println!(
687                    "[BLOCKFS] Warning: dirty block {} exceeds device capacity {}",
688                    block_idx,
689                    device_blocks
690                );
691                continue;
692            }
693            // Skip unmaterialized (sparse) blocks -- they contain only zeros
694            if self.block_data[*block_idx].is_empty() {
695                continue;
696            }
697            backend.write_block(*block_idx as u64, &self.block_data[*block_idx])?;
698            synced += 1;
699        }
700
701        // Clear dirty set after successful write
702        self.dirty_blocks.clear();
703
704        // Update superblock write time and mount count
705        self.superblock.write_time = crate::arch::timer::read_hw_timestamp();
706
707        // Write metadata (superblock, bitmap, inode table)
708        self.serialize_superblock(&*backend)?;
709        self.serialize_bitmap(&*backend)?;
710        self.serialize_inode_table(&*backend)?;
711        synced += 1; // Count metadata as one sync unit
712
713        Ok(synced)
714    }
715
716    /// Load filesystem data from the disk backend into memory.
717    ///
718    /// Reads all data blocks from disk into the in-memory `block_data` array.
719    /// This should be called after attaching a disk backend to populate the
720    /// in-memory cache with persisted data.
721    fn load_from_disk(&mut self) -> Result<usize, KernelError> {
722        let disk = match self.disk {
723            Some(ref d) => d.clone(),
724            None => return Ok(0),
725        };
726
727        let backend = disk.lock();
728        let device_blocks = backend.block_count();
729        let fs_blocks = self.block_data.len() as u64;
730        let blocks_to_read = fs_blocks.min(device_blocks) as usize;
731        let mut loaded = 0usize;
732
733        for block_idx in 0..blocks_to_read {
734            if self.block_bitmap.is_allocated(block_idx as u32) {
735                self.materialize_block(block_idx);
736                backend.read_block(block_idx as u64, &mut self.block_data[block_idx])?;
737                loaded += 1;
738            }
739        }
740
741        Ok(loaded)
742    }
743
744    // --- On-disk metadata serialization ---
745
746    /// Serialize the superblock to block 0 on disk (62 bytes, LE).
747    fn serialize_superblock(&self, backend: &dyn DiskBackend) -> Result<(), KernelError> {
748        let mut buf = [0u8; BLOCK_SIZE];
749        let sb = &self.superblock;
750
751        buf[0..4].copy_from_slice(&sb.magic.to_le_bytes());
752        buf[4..8].copy_from_slice(&sb.block_count.to_le_bytes());
753        buf[8..12].copy_from_slice(&sb.inode_count.to_le_bytes());
754        buf[12..16].copy_from_slice(&sb.free_blocks.to_le_bytes());
755        buf[16..20].copy_from_slice(&sb.free_inodes.to_le_bytes());
756        buf[20..24].copy_from_slice(&sb.first_data_block.to_le_bytes());
757        buf[24..28].copy_from_slice(&sb.block_size.to_le_bytes());
758        buf[28..30].copy_from_slice(&sb.inode_size.to_le_bytes());
759        buf[30..34].copy_from_slice(&sb.blocks_per_group.to_le_bytes());
760        buf[34..38].copy_from_slice(&sb.inodes_per_group.to_le_bytes());
761        buf[38..46].copy_from_slice(&sb.mount_time.to_le_bytes());
762        buf[46..54].copy_from_slice(&sb.write_time.to_le_bytes());
763        buf[54..56].copy_from_slice(&sb.mount_count.to_le_bytes());
764        buf[56..58].copy_from_slice(&sb.max_mount_count.to_le_bytes());
765        buf[58..60].copy_from_slice(&sb.state.to_le_bytes());
766        buf[60..62].copy_from_slice(&sb.errors.to_le_bytes());
767
768        backend.write_block(SUPERBLOCK_BLOCK as u64, &buf)
769    }
770
771    /// Deserialize the superblock from block 0. Returns the parsed superblock.
772    fn deserialize_superblock(backend: &dyn DiskBackend) -> Result<Superblock, KernelError> {
773        let mut buf = [0u8; BLOCK_SIZE];
774        backend.read_block(SUPERBLOCK_BLOCK as u64, &mut buf)?;
775
776        let magic = u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]);
777        if magic != BLOCKFS_MAGIC {
778            return Err(KernelError::FsError(FsError::CorruptedData));
779        }
780
781        Ok(Superblock {
782            magic,
783            block_count: u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]),
784            inode_count: u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]),
785            free_blocks: u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
786            free_inodes: u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]),
787            first_data_block: u32::from_le_bytes([buf[20], buf[21], buf[22], buf[23]]),
788            block_size: u32::from_le_bytes([buf[24], buf[25], buf[26], buf[27]]),
789            inode_size: u16::from_le_bytes([buf[28], buf[29]]),
790            blocks_per_group: u32::from_le_bytes([buf[30], buf[31], buf[32], buf[33]]),
791            inodes_per_group: u32::from_le_bytes([buf[34], buf[35], buf[36], buf[37]]),
792            mount_time: u64::from_le_bytes([
793                buf[38], buf[39], buf[40], buf[41], buf[42], buf[43], buf[44], buf[45],
794            ]),
795            write_time: u64::from_le_bytes([
796                buf[46], buf[47], buf[48], buf[49], buf[50], buf[51], buf[52], buf[53],
797            ]),
798            mount_count: u16::from_le_bytes([buf[54], buf[55]]),
799            max_mount_count: u16::from_le_bytes([buf[56], buf[57]]),
800            state: u16::from_le_bytes([buf[58], buf[59]]),
801            errors: u16::from_le_bytes([buf[60], buf[61]]),
802        })
803    }
804
805    /// Serialize the block bitmap to disk (blocks 1..1+bitmap_blocks).
806    fn serialize_bitmap(&self, backend: &dyn DiskBackend) -> Result<(), KernelError> {
807        let bm_blocks = bitmap_blocks(self.superblock.block_count);
808        let bitmap_start = SUPERBLOCK_BLOCK + 1;
809
810        for i in 0..bm_blocks {
811            let mut buf = [0u8; BLOCK_SIZE];
812            let byte_offset = i as usize * BLOCK_SIZE;
813            let bytes_remaining = self.block_bitmap.bitmap.len().saturating_sub(byte_offset);
814            let copy_len = bytes_remaining.min(BLOCK_SIZE);
815            if copy_len > 0 {
816                buf[..copy_len].copy_from_slice(
817                    &self.block_bitmap.bitmap[byte_offset..byte_offset + copy_len],
818                );
819            }
820            backend.write_block((bitmap_start + i) as u64, &buf)?;
821        }
822
823        Ok(())
824    }
825
826    /// Deserialize the block bitmap from disk.
827    fn deserialize_bitmap(
828        backend: &dyn DiskBackend,
829        total_blocks: u32,
830    ) -> Result<BlockBitmap, KernelError> {
831        let bm_blocks = bitmap_blocks(total_blocks);
832        let bitmap_start = SUPERBLOCK_BLOCK + 1;
833        let bitmap_size = (total_blocks as usize + 7) / 8;
834        let mut bitmap_data = vec![0u8; bitmap_size];
835
836        for i in 0..bm_blocks {
837            let mut buf = [0u8; BLOCK_SIZE];
838            backend.read_block((bitmap_start + i) as u64, &mut buf)?;
839
840            let byte_offset = i as usize * BLOCK_SIZE;
841            let bytes_remaining = bitmap_size.saturating_sub(byte_offset);
842            let copy_len = bytes_remaining.min(BLOCK_SIZE);
843            if copy_len > 0 {
844                bitmap_data[byte_offset..byte_offset + copy_len].copy_from_slice(&buf[..copy_len]);
845            }
846        }
847
848        Ok(BlockBitmap {
849            bitmap: bitmap_data,
850            total_blocks: total_blocks as usize,
851        })
852    }
853
854    /// Serialize a single DiskInode to 96 bytes (LE).
855    fn serialize_disk_inode(inode: &DiskInode, buf: &mut [u8]) {
856        buf[0..2].copy_from_slice(&inode.mode.to_le_bytes());
857        buf[2..4].copy_from_slice(&inode.uid.to_le_bytes());
858        buf[4..8].copy_from_slice(&inode.size.to_le_bytes());
859        buf[8..12].copy_from_slice(&inode.atime.to_le_bytes());
860        buf[12..16].copy_from_slice(&inode.ctime.to_le_bytes());
861        buf[16..20].copy_from_slice(&inode.mtime.to_le_bytes());
862        buf[20..24].copy_from_slice(&inode.dtime.to_le_bytes());
863        buf[24..26].copy_from_slice(&inode.gid.to_le_bytes());
864        buf[26..28].copy_from_slice(&inode.links_count.to_le_bytes());
865        buf[28..32].copy_from_slice(&inode.blocks.to_le_bytes());
866        buf[32..36].copy_from_slice(&inode.flags.to_le_bytes());
867        for (j, &blk) in inode.direct_blocks.iter().enumerate() {
868            let off = 36 + j * 4;
869            buf[off..off + 4].copy_from_slice(&blk.to_le_bytes());
870        }
871        buf[84..88].copy_from_slice(&inode.indirect_block.to_le_bytes());
872        buf[88..92].copy_from_slice(&inode.double_indirect_block.to_le_bytes());
873        buf[92..96].copy_from_slice(&inode.triple_indirect_block.to_le_bytes());
874    }
875
876    /// Deserialize a single DiskInode from 96 bytes (LE).
877    fn deserialize_disk_inode(buf: &[u8]) -> DiskInode {
878        let mut direct_blocks = [0u32; 12];
879        for (j, block) in direct_blocks.iter_mut().enumerate() {
880            let off = 36 + j * 4;
881            *block = u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]]);
882        }
883        DiskInode {
884            mode: u16::from_le_bytes([buf[0], buf[1]]),
885            uid: u16::from_le_bytes([buf[2], buf[3]]),
886            size: u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]),
887            atime: u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]),
888            ctime: u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
889            mtime: u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]),
890            dtime: u32::from_le_bytes([buf[20], buf[21], buf[22], buf[23]]),
891            gid: u16::from_le_bytes([buf[24], buf[25]]),
892            links_count: u16::from_le_bytes([buf[26], buf[27]]),
893            blocks: u32::from_le_bytes([buf[28], buf[29], buf[30], buf[31]]),
894            flags: u32::from_le_bytes([buf[32], buf[33], buf[34], buf[35]]),
895            direct_blocks,
896            indirect_block: u32::from_le_bytes([buf[84], buf[85], buf[86], buf[87]]),
897            double_indirect_block: u32::from_le_bytes([buf[88], buf[89], buf[90], buf[91]]),
898            triple_indirect_block: u32::from_le_bytes([buf[92], buf[93], buf[94], buf[95]]),
899        }
900    }
901
902    /// Serialize the entire inode table to disk.
903    fn serialize_inode_table(&self, backend: &dyn DiskBackend) -> Result<(), KernelError> {
904        let bm_blocks = bitmap_blocks(self.superblock.block_count);
905        let inode_start = SUPERBLOCK_BLOCK + 1 + bm_blocks;
906        let it_blocks = inode_table_blocks(self.superblock.inode_count);
907
908        for blk_idx in 0..it_blocks {
909            let mut buf = [0u8; BLOCK_SIZE];
910            let base_inode = blk_idx as usize * INODES_PER_BLOCK;
911
912            for slot in 0..INODES_PER_BLOCK {
913                let inode_idx = base_inode + slot;
914                if inode_idx >= self.inode_table.len() {
915                    break;
916                }
917                let off = slot * DISK_INODE_SIZE;
918                Self::serialize_disk_inode(
919                    &self.inode_table[inode_idx],
920                    &mut buf[off..off + DISK_INODE_SIZE],
921                );
922            }
923
924            backend.write_block((inode_start + blk_idx) as u64, &buf)?;
925        }
926
927        Ok(())
928    }
929
930    /// Deserialize the entire inode table from disk.
931    fn deserialize_inode_table(
932        backend: &dyn DiskBackend,
933        inode_count: u32,
934        total_blocks: u32,
935    ) -> Result<Vec<DiskInode>, KernelError> {
936        let bm_blocks = bitmap_blocks(total_blocks);
937        let inode_start = SUPERBLOCK_BLOCK + 1 + bm_blocks;
938        let it_blocks = inode_table_blocks(inode_count);
939        let mut inode_table = Vec::with_capacity(inode_count as usize);
940
941        for blk_idx in 0..it_blocks {
942            let mut buf = [0u8; BLOCK_SIZE];
943            backend.read_block((inode_start + blk_idx) as u64, &mut buf)?;
944
945            for slot in 0..INODES_PER_BLOCK {
946                let inode_idx = blk_idx as usize * INODES_PER_BLOCK + slot;
947                if inode_idx >= inode_count as usize {
948                    break;
949                }
950                let off = slot * DISK_INODE_SIZE;
951                inode_table.push(Self::deserialize_disk_inode(
952                    &buf[off..off + DISK_INODE_SIZE],
953                ));
954            }
955        }
956
957        Ok(inode_table)
958    }
959
960    /// Load an existing BlockFS from a disk backend.
961    ///
962    /// Reads superblock, bitmap, inode table, and all data blocks.
963    fn load_existing(backend: Arc<Mutex<dyn DiskBackend>>) -> Result<Self, KernelError> {
964        let bk = backend.lock();
965
966        // Read and validate superblock
967        let superblock = Self::deserialize_superblock(&*bk)?;
968        crate::println!(
969            "[BLOCKFS] Found existing filesystem: {} blocks, {} inodes, first_data={}",
970            superblock.block_count,
971            superblock.inode_count,
972            superblock.first_data_block
973        );
974
975        // Read bitmap
976        let block_bitmap = Self::deserialize_bitmap(&*bk, superblock.block_count)?;
977
978        // Read inode table
979        let inode_table =
980            Self::deserialize_inode_table(&*bk, superblock.inode_count, superblock.block_count)?;
981
982        // Allocate sparse in-memory block storage and read only allocated blocks
983        let block_count = superblock.block_count as usize;
984        let mut block_data = Vec::with_capacity(block_count);
985        for _ in 0..block_count {
986            block_data.push(Vec::new()); // Empty -- sparse
987        }
988
989        // Only load blocks that are marked as allocated in the bitmap
990        let device_blocks = bk.block_count();
991        let first_data = superblock.first_data_block as usize;
992        let mut loaded = 0usize;
993        for (i, block) in block_data
994            .iter_mut()
995            .enumerate()
996            .take(block_count)
997            .skip(first_data)
998        {
999            if (i as u64) >= device_blocks {
1000                break;
1001            }
1002            if block_bitmap.is_allocated(i as u32) {
1003                *block = vec![0u8; BLOCK_SIZE];
1004                bk.read_block(i as u64, block)?;
1005                loaded += 1;
1006            }
1007        }
1008
1009        crate::println!(
1010            "[BLOCKFS] Loaded {} allocated data blocks from disk (sparse, {} total)",
1011            loaded,
1012            block_count.saturating_sub(first_data)
1013        );
1014
1015        drop(bk);
1016
1017        let mut fs = Self {
1018            superblock,
1019            block_bitmap,
1020            inode_table,
1021            block_data,
1022            dirty_blocks: BTreeSet::new(),
1023            disk: Some(backend),
1024        };
1025
1026        // Update mount count and time
1027        fs.superblock.mount_count += 1;
1028        fs.superblock.mount_time = crate::arch::timer::read_hw_timestamp();
1029
1030        Ok(fs)
1031    }
1032
1033    // --- Indirect block helpers ---
1034
1035    /// Read a u32 block pointer from position `index` within an indirect block.
1036    fn read_block_ptr(&self, indirect_block: u32, index: usize) -> u32 {
1037        let block = self.block_ref(indirect_block as usize);
1038        let off = index * size_of::<u32>();
1039        u32::from_le_bytes([block[off], block[off + 1], block[off + 2], block[off + 3]])
1040    }
1041
1042    /// Write a u32 block pointer at position `index` within an indirect block.
1043    fn write_block_ptr(&mut self, indirect_block: u32, index: usize, value: u32) {
1044        let off = index * size_of::<u32>();
1045        let bytes = value.to_le_bytes();
1046        self.materialize_block(indirect_block as usize);
1047        self.block_data[indirect_block as usize][off..off + 4].copy_from_slice(&bytes);
1048        self.mark_dirty(indirect_block);
1049    }
1050
1051    /// Resolve a logical block index to a physical block number for reading.
1052    ///
1053    /// Returns `Some(physical_block)` if the block is allocated, `None` if it
1054    /// falls in a sparse hole or exceeds the addressing range.
1055    fn resolve_block(&self, inode: &DiskInode, logical_block: usize) -> Option<u32> {
1056        if logical_block < DIRECT_BLOCKS {
1057            // Direct block
1058            let blk = inode.direct_blocks[logical_block];
1059            if blk == 0 {
1060                None
1061            } else {
1062                Some(blk)
1063            }
1064        } else if logical_block < SINGLE_INDIRECT_MAX_BLOCKS {
1065            // Single indirect
1066            let indirect = inode.indirect_block;
1067            if indirect == 0 {
1068                return None;
1069            }
1070            let idx = logical_block - DIRECT_BLOCKS;
1071            let blk = self.read_block_ptr(indirect, idx);
1072            if blk == 0 {
1073                None
1074            } else {
1075                Some(blk)
1076            }
1077        } else if logical_block < DOUBLE_INDIRECT_MAX_BLOCKS {
1078            // Double indirect
1079            let dbl_indirect = inode.double_indirect_block;
1080            if dbl_indirect == 0 {
1081                return None;
1082            }
1083            let rel = logical_block - SINGLE_INDIRECT_MAX_BLOCKS;
1084            let l1_idx = rel / PTRS_PER_BLOCK;
1085            let l2_idx = rel % PTRS_PER_BLOCK;
1086            let l1_block = self.read_block_ptr(dbl_indirect, l1_idx);
1087            if l1_block == 0 {
1088                return None;
1089            }
1090            let blk = self.read_block_ptr(l1_block, l2_idx);
1091            if blk == 0 {
1092                None
1093            } else {
1094                Some(blk)
1095            }
1096        } else {
1097            // Beyond double indirect range (triple indirect not implemented)
1098            None
1099        }
1100    }
1101
1102    /// Ensure a logical block index has a physical block allocated, creating
1103    /// indirect blocks as needed. Returns the physical block number.
1104    fn ensure_block(&mut self, inode_num: u32, logical_block: usize) -> Result<u32, KernelError> {
1105        if logical_block < DIRECT_BLOCKS {
1106            // Direct block
1107            let blk = self.inode_table[inode_num as usize].direct_blocks[logical_block];
1108            if blk != 0 {
1109                return Ok(blk);
1110            }
1111            let new_blk = self
1112                .allocate_block()
1113                .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1114            self.inode_table[inode_num as usize].direct_blocks[logical_block] = new_blk;
1115            self.inode_table[inode_num as usize].blocks += 1;
1116            self.mark_dirty(new_blk);
1117            Ok(new_blk)
1118        } else if logical_block < SINGLE_INDIRECT_MAX_BLOCKS {
1119            // Single indirect
1120            let mut indirect = self.inode_table[inode_num as usize].indirect_block;
1121            if indirect == 0 {
1122                indirect = self
1123                    .allocate_block()
1124                    .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1125                // Zero the new indirect block (already zeroed by block_data init,
1126                // but be explicit for safety after reuse)
1127                for byte in &mut self.block_data[indirect as usize] {
1128                    *byte = 0;
1129                }
1130                self.mark_dirty(indirect);
1131                self.inode_table[inode_num as usize].indirect_block = indirect;
1132                self.inode_table[inode_num as usize].blocks += 1;
1133            }
1134            let idx = logical_block - DIRECT_BLOCKS;
1135            let blk = self.read_block_ptr(indirect, idx);
1136            if blk != 0 {
1137                return Ok(blk);
1138            }
1139            let new_blk = self
1140                .allocate_block()
1141                .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1142            self.write_block_ptr(indirect, idx, new_blk);
1143            self.mark_dirty(new_blk);
1144            self.inode_table[inode_num as usize].blocks += 1;
1145            Ok(new_blk)
1146        } else if logical_block < DOUBLE_INDIRECT_MAX_BLOCKS {
1147            // Double indirect
1148            let mut dbl_indirect = self.inode_table[inode_num as usize].double_indirect_block;
1149            if dbl_indirect == 0 {
1150                dbl_indirect = self
1151                    .allocate_block()
1152                    .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1153                for byte in &mut self.block_data[dbl_indirect as usize] {
1154                    *byte = 0;
1155                }
1156                self.mark_dirty(dbl_indirect);
1157                self.inode_table[inode_num as usize].double_indirect_block = dbl_indirect;
1158                self.inode_table[inode_num as usize].blocks += 1;
1159            }
1160            let rel = logical_block - SINGLE_INDIRECT_MAX_BLOCKS;
1161            let l1_idx = rel / PTRS_PER_BLOCK;
1162            let l2_idx = rel % PTRS_PER_BLOCK;
1163            let mut l1_block = self.read_block_ptr(dbl_indirect, l1_idx);
1164            if l1_block == 0 {
1165                l1_block = self
1166                    .allocate_block()
1167                    .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1168                for byte in &mut self.block_data[l1_block as usize] {
1169                    *byte = 0;
1170                }
1171                self.mark_dirty(l1_block);
1172                self.write_block_ptr(dbl_indirect, l1_idx, l1_block);
1173                self.inode_table[inode_num as usize].blocks += 1;
1174            }
1175            let blk = self.read_block_ptr(l1_block, l2_idx);
1176            if blk != 0 {
1177                return Ok(blk);
1178            }
1179            let new_blk = self
1180                .allocate_block()
1181                .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
1182            self.write_block_ptr(l1_block, l2_idx, new_blk);
1183            self.mark_dirty(new_blk);
1184            self.inode_table[inode_num as usize].blocks += 1;
1185            Ok(new_blk)
1186        } else {
1187            Err(KernelError::FsError(FsError::FileTooLarge))
1188        }
1189    }
1190
1191    // --- Inode I/O ---
1192
1193    fn read_inode(
1194        &self,
1195        inode_num: u32,
1196        offset: usize,
1197        buffer: &mut [u8],
1198    ) -> Result<usize, KernelError> {
1199        let inode = self
1200            .inode_table
1201            .get(inode_num as usize)
1202            .ok_or(KernelError::FsError(FsError::NotFound))?;
1203
1204        if offset >= inode.size as usize {
1205            return Ok(0);
1206        }
1207
1208        let to_read = buffer.len().min(inode.size as usize - offset);
1209        let mut bytes_read = 0;
1210        let mut current_offset = offset;
1211
1212        while bytes_read < to_read {
1213            let logical_block = current_offset / BLOCK_SIZE;
1214            let block_offset = current_offset % BLOCK_SIZE;
1215
1216            match self.resolve_block(inode, logical_block) {
1217                Some(block_num) => {
1218                    let block = self.block_ref(block_num as usize);
1219                    let copy_len = (BLOCK_SIZE - block_offset).min(to_read - bytes_read);
1220                    buffer[bytes_read..bytes_read + copy_len]
1221                        .copy_from_slice(&block[block_offset..block_offset + copy_len]);
1222                    bytes_read += copy_len;
1223                    current_offset += copy_len;
1224                }
1225                None => {
1226                    // Sparse hole or beyond addressing range -- fill with zeros
1227                    let copy_len = (BLOCK_SIZE - block_offset).min(to_read - bytes_read);
1228                    for byte in &mut buffer[bytes_read..bytes_read + copy_len] {
1229                        *byte = 0;
1230                    }
1231                    bytes_read += copy_len;
1232                    current_offset += copy_len;
1233                }
1234            }
1235        }
1236
1237        Ok(bytes_read)
1238    }
1239
1240    fn write_inode(
1241        &mut self,
1242        inode_num: u32,
1243        offset: usize,
1244        data: &[u8],
1245    ) -> Result<usize, KernelError> {
1246        // Collect block information in multiple passes to avoid borrow conflicts
1247        let mut blocks_needed: Vec<(usize, usize, usize)> = Vec::new();
1248        let mut current_offset = offset;
1249        let mut bytes_remaining = data.len();
1250
1251        // Determine which blocks we need (up to double-indirect limit)
1252        while bytes_remaining > 0 {
1253            let logical_block = current_offset / BLOCK_SIZE;
1254            if logical_block >= DOUBLE_INDIRECT_MAX_BLOCKS {
1255                break; // Beyond addressable range
1256            }
1257
1258            let block_offset = current_offset % BLOCK_SIZE;
1259            let copy_len = (BLOCK_SIZE - block_offset).min(bytes_remaining);
1260
1261            blocks_needed.push((logical_block, block_offset, copy_len));
1262
1263            bytes_remaining -= copy_len;
1264            current_offset += copy_len;
1265        }
1266
1267        // Ensure all required blocks are allocated and collect physical block numbers
1268        let mut block_numbers: Vec<u32> = Vec::new();
1269        for (logical_block, _, _) in &blocks_needed {
1270            let phys_block = self.ensure_block(inode_num, *logical_block)?;
1271            block_numbers.push(phys_block);
1272        }
1273
1274        // Write data to blocks
1275        let mut bytes_written = 0;
1276        for (i, (_, block_offset, copy_len)) in blocks_needed.iter().enumerate() {
1277            let block_num = block_numbers[i];
1278            self.block_data[block_num as usize][*block_offset..*block_offset + *copy_len]
1279                .copy_from_slice(&data[bytes_written..bytes_written + *copy_len]);
1280            self.mark_dirty(block_num);
1281            bytes_written += *copy_len;
1282        }
1283
1284        // Update inode size
1285        if (offset + bytes_written) > self.inode_table[inode_num as usize].size as usize {
1286            self.inode_table[inode_num as usize].size = (offset + bytes_written) as u32;
1287        }
1288
1289        Ok(bytes_written)
1290    }
1291
1292    fn get_metadata(&self, inode_num: u32) -> Result<Metadata, KernelError> {
1293        let inode = self
1294            .inode_table
1295            .get(inode_num as usize)
1296            .ok_or(KernelError::FsError(FsError::NotFound))?;
1297
1298        Ok(Metadata {
1299            node_type: inode.node_type(),
1300            size: inode.size as usize,
1301            permissions: Permissions::from_mode(inode.mode as u32),
1302            uid: inode.uid as u32,
1303            gid: inode.gid as u32,
1304            created: inode.ctime as u64,
1305            modified: inode.mtime as u64,
1306            accessed: inode.atime as u64,
1307            inode: inode_num as u64,
1308        })
1309    }
1310
1311    fn readdir(&self, inode_num: u32) -> Result<Vec<DirEntry>, KernelError> {
1312        let inode = self
1313            .inode_table
1314            .get(inode_num as usize)
1315            .ok_or(KernelError::FsError(FsError::NotFound))?;
1316
1317        if !inode.is_dir() {
1318            return Err(KernelError::FsError(FsError::NotADirectory));
1319        }
1320
1321        let mut entries = Vec::new();
1322        let dir_size = inode.size as usize;
1323
1324        // Iterate through direct blocks that contain directory entries
1325        for i in 0..12 {
1326            let block_num = inode.direct_blocks[i];
1327            if block_num == 0 {
1328                break;
1329            }
1330
1331            let block_start = i * BLOCK_SIZE;
1332            if block_start >= dir_size {
1333                break;
1334            }
1335
1336            let block = self.block_ref(block_num as usize);
1337            let block_end = BLOCK_SIZE.min(dir_size - block_start);
1338            let mut offset = 0;
1339
1340            while offset + DIR_ENTRY_HEADER_SIZE <= block_end {
1341                let entry = self.read_dir_entry(block, offset);
1342                let rec_len = entry.rec_len as usize;
1343
1344                // rec_len must be at least the header size and 4-byte aligned
1345                if rec_len < DIR_ENTRY_HEADER_SIZE || !rec_len.is_multiple_of(4) {
1346                    break;
1347                }
1348
1349                // Skip deleted entries (inode == 0) but still advance
1350                if entry.inode != 0 && entry.name_len > 0 {
1351                    entries.push(DirEntry {
1352                        name: String::from(entry.name_str()),
1353                        node_type: entry.node_type(),
1354                        inode: entry.inode as u64,
1355                    });
1356                }
1357
1358                offset += rec_len;
1359            }
1360        }
1361
1362        Ok(entries)
1363    }
1364
1365    fn lookup_in_dir(&self, dir_inode: u32, name: &str) -> Result<u32, KernelError> {
1366        // Validate inode exists and is a directory (scoped borrow)
1367        {
1368            let inode = self
1369                .inode_table
1370                .get(dir_inode as usize)
1371                .ok_or(KernelError::FsError(FsError::NotFound))?;
1372
1373            if !inode.is_dir() {
1374                return Err(KernelError::FsError(FsError::NotADirectory));
1375            }
1376        }
1377
1378        match self.find_dir_entry(dir_inode, name) {
1379            Some((entry, _, _)) => Ok(entry.inode),
1380            None => Err(KernelError::FsError(FsError::NotFound)),
1381        }
1382    }
1383
1384    fn create_file(
1385        &mut self,
1386        parent: u32,
1387        name: &str,
1388        permissions: Permissions,
1389    ) -> Result<u32, KernelError> {
1390        // Check name length
1391        if name.is_empty() || name.len() > MAX_FILENAME_LEN {
1392            return Err(KernelError::InvalidArgument {
1393                name: "filename",
1394                value: "empty or exceeds maximum length",
1395            });
1396        }
1397
1398        // Check if the name already exists in the parent directory
1399        if self.find_dir_entry(parent, name).is_some() {
1400            return Err(KernelError::FsError(FsError::AlreadyExists));
1401        }
1402
1403        let inode_num = self
1404            .allocate_inode()
1405            .ok_or(KernelError::ResourceExhausted { resource: "inodes" })?;
1406
1407        let mode = permissions_to_mode(permissions, false);
1408        self.inode_table[inode_num as usize] = DiskInode::new(mode, 0, 0);
1409
1410        // Add directory entry to parent
1411        if let Err(e) = self.write_dir_entry(parent, inode_num, name, DiskDirEntry::FT_REG_FILE) {
1412            // Roll back inode allocation on failure
1413            self.inode_table[inode_num as usize].links_count = 0;
1414            self.superblock.free_inodes += 1;
1415            return Err(e);
1416        }
1417
1418        Ok(inode_num)
1419    }
1420
1421    fn create_directory(
1422        &mut self,
1423        parent: u32,
1424        name: &str,
1425        permissions: Permissions,
1426    ) -> Result<u32, KernelError> {
1427        // Check name length
1428        if name.is_empty() || name.len() > MAX_FILENAME_LEN {
1429            return Err(KernelError::InvalidArgument {
1430                name: "dirname",
1431                value: "empty or exceeds maximum length",
1432            });
1433        }
1434
1435        // Check if the name already exists in the parent directory
1436        if self.find_dir_entry(parent, name).is_some() {
1437            return Err(KernelError::FsError(FsError::AlreadyExists));
1438        }
1439
1440        let inode_num = self
1441            .allocate_inode()
1442            .ok_or(KernelError::ResourceExhausted { resource: "inodes" })?;
1443
1444        let mode = permissions_to_mode(permissions, true);
1445        let mut new_inode = DiskInode::new(mode, 0, 0);
1446        // Directories start with link count 2 (parent's entry + self ".")
1447        new_inode.links_count = 2;
1448        self.inode_table[inode_num as usize] = new_inode;
1449
1450        // Create "." entry (self-reference) in the new directory
1451        if let Err(e) = self.write_dir_entry(inode_num, inode_num, ".", DiskDirEntry::FT_DIR) {
1452            self.inode_table[inode_num as usize].links_count = 0;
1453            self.superblock.free_inodes += 1;
1454            return Err(e);
1455        }
1456
1457        // Create ".." entry (parent reference) in the new directory
1458        if let Err(e) = self.write_dir_entry(inode_num, parent, "..", DiskDirEntry::FT_DIR) {
1459            self.inode_table[inode_num as usize].links_count = 0;
1460            self.superblock.free_inodes += 1;
1461            return Err(e);
1462        }
1463
1464        // Add entry for the new directory in the parent directory
1465        if let Err(e) = self.write_dir_entry(parent, inode_num, name, DiskDirEntry::FT_DIR) {
1466            self.inode_table[inode_num as usize].links_count = 0;
1467            self.superblock.free_inodes += 1;
1468            return Err(e);
1469        }
1470
1471        // Increment parent's link count (for the ".." entry pointing back)
1472        self.inode_table[parent as usize].links_count += 1;
1473
1474        Ok(inode_num)
1475    }
1476
1477    /// Create a symbolic link inode in the given parent directory.
1478    ///
1479    /// Allocates a new inode with symlink mode (0o120777), stores `target`
1480    /// as the inode's file data (the symlink target path), and adds a
1481    /// directory entry of type `FT_SYMLINK` in the parent.
1482    ///
1483    /// # Arguments
1484    /// - `parent`: Inode number of the parent directory.
1485    /// - `name`: Name of the symlink entry in the parent directory.
1486    /// - `target`: The target path that the symlink points to.
1487    ///
1488    /// # Returns
1489    /// The inode number of the newly created symlink.
1490    fn create_symlink(
1491        &mut self,
1492        parent: u32,
1493        name: &str,
1494        target: &str,
1495    ) -> Result<u32, KernelError> {
1496        if name.is_empty() || name.len() > MAX_FILENAME_LEN {
1497            return Err(KernelError::InvalidArgument {
1498                name: "symlink",
1499                value: "empty or too long",
1500            });
1501        }
1502        if self.find_dir_entry(parent, name).is_some() {
1503            return Err(KernelError::FsError(FsError::AlreadyExists));
1504        }
1505
1506        let inode_num = self
1507            .allocate_inode()
1508            .ok_or(KernelError::ResourceExhausted { resource: "inodes" })?;
1509
1510        // Symlink inode: mode 0o120000 | 0o777 (rwx for all)
1511        let mut inode = DiskInode::new(0o120000 | 0o777, 0, 0);
1512        inode.links_count = 1;
1513        inode.size = target.len() as u32;
1514        self.inode_table[inode_num as usize] = inode;
1515
1516        // Store target contents as file data
1517        self.write_inode(inode_num, 0, target.as_bytes())?;
1518
1519        // Add dir entry in parent
1520        if let Err(e) = self.write_dir_entry(parent, inode_num, name, DiskDirEntry::FT_SYMLINK) {
1521            self.inode_table[inode_num as usize].links_count = 0;
1522            self.superblock.free_inodes += 1;
1523            return Err(e);
1524        }
1525
1526        Ok(inode_num)
1527    }
1528
1529    fn unlink_from_dir(&mut self, parent: u32, name: &str) -> Result<(), KernelError> {
1530        // Cannot unlink "." or ".."
1531        if name == "." || name == ".." {
1532            return Err(KernelError::InvalidArgument {
1533                name: "filename",
1534                value: "cannot unlink . or ..",
1535            });
1536        }
1537
1538        // Find the entry in the parent directory
1539        let (entry, block_idx, offset) = self
1540            .find_dir_entry(parent, name)
1541            .ok_or(KernelError::FsError(FsError::NotFound))?;
1542
1543        let target_inode = entry.inode;
1544        let is_dir = entry.file_type == DiskDirEntry::FT_DIR;
1545
1546        // If unlinking a directory, check that it is empty (only "." and ".." entries)
1547        if is_dir {
1548            let child_entries = self.readdir(target_inode)?;
1549            let non_dot_count = child_entries
1550                .iter()
1551                .filter(|e| e.name != "." && e.name != "..")
1552                .count();
1553            if non_dot_count > 0 {
1554                return Err(KernelError::FsError(FsError::DirectoryNotEmpty));
1555            }
1556        }
1557
1558        // Get the block number from the parent inode (scoped borrow)
1559        let block_num = {
1560            let parent_inode = self
1561                .inode_table
1562                .get(parent as usize)
1563                .ok_or(KernelError::FsError(FsError::NotFound))?;
1564            let bn = parent_inode.direct_blocks[block_idx];
1565            if bn == 0 {
1566                return Err(KernelError::FsError(FsError::IoError));
1567            }
1568            bn
1569        };
1570
1571        // Zero out the inode field in the on-disk entry to mark it deleted
1572        self.materialize_block(block_num as usize);
1573        let block = &mut self.block_data[block_num as usize];
1574        block[offset] = 0;
1575        block[offset + 1] = 0;
1576        block[offset + 2] = 0;
1577        block[offset + 3] = 0;
1578        self.mark_dirty(block_num);
1579
1580        // Decrement link count on the target inode
1581        if let Some(target) = self.inode_table.get_mut(target_inode as usize) {
1582            if target.links_count > 0 {
1583                target.links_count -= 1;
1584            }
1585
1586            // If unlinking a directory, also decrement parent link count (for "..")
1587            if is_dir {
1588                if let Some(p) = self.inode_table.get_mut(parent as usize) {
1589                    if p.links_count > 0 {
1590                        p.links_count -= 1;
1591                    }
1592                }
1593            }
1594
1595            // If links reach 0, free all data blocks
1596            if self.inode_table[target_inode as usize].links_count == 0 {
1597                self.free_inode_blocks(target_inode);
1598            }
1599        }
1600
1601        Ok(())
1602    }
1603
1604    fn truncate_inode(&mut self, inode_num: u32, size: usize) -> Result<(), KernelError> {
1605        let old_size = {
1606            let inode = self
1607                .inode_table
1608                .get(inode_num as usize)
1609                .ok_or(KernelError::FsError(FsError::NotFound))?;
1610            inode.size as usize
1611        };
1612
1613        // Set the new size
1614        self.inode_table[inode_num as usize].size = size as u32;
1615
1616        // Free data blocks that are fully beyond the new size
1617        if size < old_size {
1618            // First logical block index that is no longer needed
1619            let first_free_block = if size == 0 {
1620                0
1621            } else {
1622                (size + BLOCK_SIZE - 1) / BLOCK_SIZE
1623            };
1624
1625            // Free direct blocks beyond the new size
1626            let direct_start = first_free_block.min(DIRECT_BLOCKS);
1627            for i in direct_start..DIRECT_BLOCKS {
1628                let block_num = self.inode_table[inode_num as usize].direct_blocks[i];
1629                if block_num != 0 {
1630                    self.free_block(block_num);
1631                    self.inode_table[inode_num as usize].direct_blocks[i] = 0;
1632                    if self.inode_table[inode_num as usize].blocks > 0 {
1633                        self.inode_table[inode_num as usize].blocks -= 1;
1634                    }
1635                }
1636            }
1637
1638            // Free single-indirect blocks beyond the new size
1639            self.truncate_single_indirect(inode_num, first_free_block);
1640
1641            // Free double-indirect blocks beyond the new size
1642            self.truncate_double_indirect(inode_num, first_free_block);
1643
1644            // If truncating to non-zero size within a block, zero the tail
1645            if size > 0 {
1646                let tail_logical_block = (size - 1) / BLOCK_SIZE;
1647                // Resolve using the inode (borrow scoped to avoid conflicts)
1648                let phys_block = {
1649                    let inode = &self.inode_table[inode_num as usize];
1650                    self.resolve_block(inode, tail_logical_block)
1651                };
1652                if let Some(block_num) = phys_block {
1653                    let zero_from = size % BLOCK_SIZE;
1654                    if zero_from > 0 {
1655                        self.materialize_block(block_num as usize);
1656                        let block = &mut self.block_data[block_num as usize];
1657                        for byte in &mut block[zero_from..BLOCK_SIZE] {
1658                            *byte = 0;
1659                        }
1660                        self.mark_dirty(block_num);
1661                    }
1662                }
1663            }
1664        }
1665
1666        Ok(())
1667    }
1668
1669    /// Read the target of a symlink inode.
1670    ///
1671    /// Reads the inode's data content (which contains the symlink target
1672    /// path stored at creation time) and returns it as a `String`.
1673    ///
1674    /// # Returns
1675    /// - `Ok(String)`: The symlink target path.
1676    /// - `Err(FsError::NotASymlink)`: The inode is not a symlink.
1677    /// - `Err(FsError::NotFound)`: The inode does not exist.
1678    /// - `Err(FsError::InvalidPath)`: The stored target is not valid UTF-8.
1679    fn read_symlink(&self, inode_num: u32) -> Result<String, KernelError> {
1680        let inode = self
1681            .inode_table
1682            .get(inode_num as usize)
1683            .ok_or(KernelError::FsError(FsError::NotFound))?;
1684        if !inode.is_symlink() {
1685            return Err(KernelError::FsError(FsError::NotASymlink));
1686        }
1687
1688        let mut buf = vec![0u8; inode.size as usize];
1689        let read = self.read_inode(inode_num, 0, &mut buf)?;
1690        buf.truncate(read);
1691        let s =
1692            core::str::from_utf8(&buf).map_err(|_| KernelError::FsError(FsError::InvalidPath))?;
1693        Ok(s.to_string())
1694    }
1695
1696    /// Change the permission bits on an inode, preserving the type bits.
1697    fn chmod_inode(&mut self, inode_num: u32, permissions: Permissions) -> Result<(), KernelError> {
1698        let inode = self
1699            .inode_table
1700            .get_mut(inode_num as usize)
1701            .ok_or(KernelError::FsError(FsError::NotFound))?;
1702
1703        // Preserve the file type bits (top 4 bits of mode), replace
1704        // the permission bits (bottom 12 bits).
1705        let type_bits = inode.mode & 0xF000;
1706        let perm_bits = permissions_to_mode(permissions, false) & 0x0FFF;
1707        inode.mode = type_bits | perm_bits;
1708        inode.ctime = crate::arch::timer::read_hw_timestamp() as u32;
1709
1710        Ok(())
1711    }
1712
1713    /// Create a hard link in a directory.
1714    ///
1715    /// Adds a new directory entry `name` in `dir_inode` pointing to the
1716    /// same inode as `target`. The target's link count is incremented.
1717    fn link_in_dir(
1718        &mut self,
1719        dir_inode: u32,
1720        name: &str,
1721        target: Arc<dyn VfsNode>,
1722    ) -> Result<(), KernelError> {
1723        if name.is_empty() || name.len() > MAX_FILENAME_LEN {
1724            return Err(KernelError::InvalidArgument {
1725                name: "filename",
1726                value: "empty or exceeds maximum length",
1727            });
1728        }
1729
1730        // Hard links to directories are not allowed (POSIX)
1731        if target.node_type() == NodeType::Directory {
1732            return Err(KernelError::FsError(FsError::IsADirectory));
1733        }
1734
1735        // Check that the name doesn't already exist
1736        if self.find_dir_entry(dir_inode, name).is_some() {
1737            return Err(KernelError::FsError(FsError::AlreadyExists));
1738        }
1739
1740        // Get the target's inode number from its metadata
1741        let target_meta = target.metadata()?;
1742        let target_inode = target_meta.inode as u32;
1743
1744        // Verify the target inode exists in our inode table (same filesystem)
1745        if target_inode as usize >= self.inode_table.len()
1746            || self.inode_table[target_inode as usize].links_count == 0
1747        {
1748            return Err(KernelError::FsError(FsError::NotSupported));
1749        }
1750
1751        // Determine file type for directory entry
1752        let file_type = if self.inode_table[target_inode as usize].is_symlink() {
1753            DiskDirEntry::FT_SYMLINK
1754        } else {
1755            DiskDirEntry::FT_REG_FILE
1756        };
1757
1758        // Add directory entry
1759        self.write_dir_entry(dir_inode, target_inode, name, file_type)?;
1760
1761        // Increment link count on the target inode
1762        self.inode_table[target_inode as usize].links_count += 1;
1763
1764        Ok(())
1765    }
1766
1767    /// Free single-indirect data blocks at or beyond `first_free_block`.
1768    /// Also frees the indirect block itself if it becomes fully empty.
1769    fn truncate_single_indirect(&mut self, inode_num: u32, first_free_block: usize) {
1770        let indirect = self.inode_table[inode_num as usize].indirect_block;
1771        if indirect == 0 {
1772            return;
1773        }
1774
1775        // If all indirect entries are being freed
1776        if first_free_block <= DIRECT_BLOCKS {
1777            // Free every data block referenced by the indirect block
1778            for idx in 0..PTRS_PER_BLOCK {
1779                let blk = self.read_block_ptr(indirect, idx);
1780                if blk != 0 {
1781                    self.free_block(blk);
1782                    if self.inode_table[inode_num as usize].blocks > 0 {
1783                        self.inode_table[inode_num as usize].blocks -= 1;
1784                    }
1785                }
1786            }
1787            // Free the indirect block itself
1788            self.free_block(indirect);
1789            self.inode_table[inode_num as usize].indirect_block = 0;
1790            if self.inode_table[inode_num as usize].blocks > 0 {
1791                self.inode_table[inode_num as usize].blocks -= 1;
1792            }
1793        } else if first_free_block < SINGLE_INDIRECT_MAX_BLOCKS {
1794            // Partial free within the single-indirect range
1795            let start_idx = first_free_block - DIRECT_BLOCKS;
1796            let mut any_remain = false;
1797            for idx in 0..PTRS_PER_BLOCK {
1798                if idx >= start_idx {
1799                    let blk = self.read_block_ptr(indirect, idx);
1800                    if blk != 0 {
1801                        self.free_block(blk);
1802                        self.write_block_ptr(indirect, idx, 0);
1803                        if self.inode_table[inode_num as usize].blocks > 0 {
1804                            self.inode_table[inode_num as usize].blocks -= 1;
1805                        }
1806                    }
1807                } else {
1808                    let blk = self.read_block_ptr(indirect, idx);
1809                    if blk != 0 {
1810                        any_remain = true;
1811                    }
1812                }
1813            }
1814            // If no entries remain, free the indirect block itself
1815            if !any_remain {
1816                self.free_block(indirect);
1817                self.inode_table[inode_num as usize].indirect_block = 0;
1818                if self.inode_table[inode_num as usize].blocks > 0 {
1819                    self.inode_table[inode_num as usize].blocks -= 1;
1820                }
1821            }
1822        }
1823        // If first_free_block >= SINGLE_INDIRECT_MAX_BLOCKS, nothing in the
1824        // single-indirect range needs freeing.
1825    }
1826
1827    /// Free double-indirect data blocks at or beyond `first_free_block`.
1828    /// Also frees level-1 indirect blocks and the double-indirect block itself
1829    /// if they become fully empty.
1830    fn truncate_double_indirect(&mut self, inode_num: u32, first_free_block: usize) {
1831        let dbl_indirect = self.inode_table[inode_num as usize].double_indirect_block;
1832        if dbl_indirect == 0 {
1833            return;
1834        }
1835
1836        // Logical block range covered by double indirect:
1837        //   [SINGLE_INDIRECT_MAX_BLOCKS .. DOUBLE_INDIRECT_MAX_BLOCKS)
1838
1839        if first_free_block <= SINGLE_INDIRECT_MAX_BLOCKS {
1840            // Free everything in double-indirect range
1841            for l1_idx in 0..PTRS_PER_BLOCK {
1842                let l1_block = self.read_block_ptr(dbl_indirect, l1_idx);
1843                if l1_block != 0 {
1844                    // Free all data blocks in this L1 indirect block
1845                    for l2_idx in 0..PTRS_PER_BLOCK {
1846                        let data_blk = self.read_block_ptr(l1_block, l2_idx);
1847                        if data_blk != 0 {
1848                            self.free_block(data_blk);
1849                            if self.inode_table[inode_num as usize].blocks > 0 {
1850                                self.inode_table[inode_num as usize].blocks -= 1;
1851                            }
1852                        }
1853                    }
1854                    // Free the L1 indirect block itself
1855                    self.free_block(l1_block);
1856                    if self.inode_table[inode_num as usize].blocks > 0 {
1857                        self.inode_table[inode_num as usize].blocks -= 1;
1858                    }
1859                }
1860            }
1861            // Free the double-indirect block itself
1862            self.free_block(dbl_indirect);
1863            self.inode_table[inode_num as usize].double_indirect_block = 0;
1864            if self.inode_table[inode_num as usize].blocks > 0 {
1865                self.inode_table[inode_num as usize].blocks -= 1;
1866            }
1867        } else if first_free_block < DOUBLE_INDIRECT_MAX_BLOCKS {
1868            // Partial free within the double-indirect range
1869            let rel = first_free_block - SINGLE_INDIRECT_MAX_BLOCKS;
1870            let first_l1 = rel / PTRS_PER_BLOCK;
1871            let first_l2 = rel % PTRS_PER_BLOCK;
1872            let mut any_l1_remain = false;
1873
1874            for l1_idx in 0..PTRS_PER_BLOCK {
1875                let l1_block = self.read_block_ptr(dbl_indirect, l1_idx);
1876                if l1_block == 0 {
1877                    continue;
1878                }
1879
1880                if l1_idx < first_l1 {
1881                    // Entirely before the truncation point -- keep
1882                    any_l1_remain = true;
1883                    continue;
1884                }
1885
1886                let l2_start = if l1_idx == first_l1 { first_l2 } else { 0 };
1887
1888                let mut any_l2_remain = false;
1889                for l2_idx in 0..PTRS_PER_BLOCK {
1890                    if l2_idx >= l2_start {
1891                        let data_blk = self.read_block_ptr(l1_block, l2_idx);
1892                        if data_blk != 0 {
1893                            self.free_block(data_blk);
1894                            self.write_block_ptr(l1_block, l2_idx, 0);
1895                            if self.inode_table[inode_num as usize].blocks > 0 {
1896                                self.inode_table[inode_num as usize].blocks -= 1;
1897                            }
1898                        }
1899                    } else {
1900                        let data_blk = self.read_block_ptr(l1_block, l2_idx);
1901                        if data_blk != 0 {
1902                            any_l2_remain = true;
1903                        }
1904                    }
1905                }
1906
1907                if !any_l2_remain {
1908                    // Free the now-empty L1 indirect block
1909                    self.free_block(l1_block);
1910                    self.write_block_ptr(dbl_indirect, l1_idx, 0);
1911                    if self.inode_table[inode_num as usize].blocks > 0 {
1912                        self.inode_table[inode_num as usize].blocks -= 1;
1913                    }
1914                } else {
1915                    any_l1_remain = true;
1916                }
1917            }
1918
1919            if !any_l1_remain {
1920                self.free_block(dbl_indirect);
1921                self.inode_table[inode_num as usize].double_indirect_block = 0;
1922                if self.inode_table[inode_num as usize].blocks > 0 {
1923                    self.inode_table[inode_num as usize].blocks -= 1;
1924                }
1925            }
1926        }
1927        // If first_free_block >= DOUBLE_INDIRECT_MAX_BLOCKS, nothing in the
1928        // double-indirect range needs freeing.
1929    }
1930
1931    // --- Helper methods for directory entry operations ---
1932
1933    /// Read a DiskDirEntry from a block at the given byte offset.
1934    ///
1935    /// Parses the fixed header fields and name bytes from raw block data.
1936    fn read_dir_entry(&self, block: &[u8], offset: usize) -> DiskDirEntry {
1937        let inode = u32::from_le_bytes([
1938            block[offset],
1939            block[offset + 1],
1940            block[offset + 2],
1941            block[offset + 3],
1942        ]);
1943        let rec_len = u16::from_le_bytes([block[offset + 4], block[offset + 5]]);
1944        let name_len = block[offset + 6];
1945        let file_type = block[offset + 7];
1946
1947        let mut name = [0u8; 255];
1948        let actual_name_len = (name_len as usize).min(MAX_FILENAME_LEN);
1949        let available = block.len() - (offset + DIR_ENTRY_HEADER_SIZE);
1950        let copy_len = actual_name_len.min(available);
1951        name[..copy_len].copy_from_slice(
1952            &block[offset + DIR_ENTRY_HEADER_SIZE..offset + DIR_ENTRY_HEADER_SIZE + copy_len],
1953        );
1954
1955        DiskDirEntry {
1956            inode,
1957            rec_len,
1958            name_len,
1959            file_type,
1960            name,
1961        }
1962    }
1963
1964    /// Find a directory entry by name within a directory inode.
1965    ///
1966    /// Returns the entry, the direct block index, and the byte offset within
1967    /// that block where the entry starts. Returns None if not found.
1968    fn find_dir_entry(&self, dir_inode: u32, name: &str) -> Option<(DiskDirEntry, usize, usize)> {
1969        let inode = self.inode_table.get(dir_inode as usize)?;
1970
1971        if !inode.is_dir() {
1972            return None;
1973        }
1974
1975        let dir_size = inode.size as usize;
1976
1977        for i in 0..12 {
1978            let block_num = inode.direct_blocks[i];
1979            if block_num == 0 {
1980                break;
1981            }
1982
1983            let block_start = i * BLOCK_SIZE;
1984            if block_start >= dir_size {
1985                break;
1986            }
1987
1988            let block = self.block_ref(block_num as usize);
1989            let block_end = BLOCK_SIZE.min(dir_size - block_start);
1990            let mut offset = 0;
1991
1992            while offset + DIR_ENTRY_HEADER_SIZE <= block_end {
1993                let entry = self.read_dir_entry(block, offset);
1994                let rec_len = entry.rec_len as usize;
1995
1996                if rec_len < DIR_ENTRY_HEADER_SIZE || !rec_len.is_multiple_of(4) {
1997                    break;
1998                }
1999
2000                if entry.inode != 0 && entry.name_len > 0 && entry.name_str() == name {
2001                    return Some((entry, i, offset));
2002                }
2003
2004                offset += rec_len;
2005            }
2006        }
2007
2008        None
2009    }
2010
2011    /// Write a new directory entry into a directory inode's data blocks.
2012    ///
2013    /// Appends the entry at the end of the directory's current content.
2014    /// Allocates a new data block if needed.
2015    fn write_dir_entry(
2016        &mut self,
2017        dir_inode: u32,
2018        target_inode: u32,
2019        name: &str,
2020        file_type: u8,
2021    ) -> Result<(), KernelError> {
2022        let entry = DiskDirEntry::new(target_inode, name, file_type);
2023        let entry_size = align4(DIR_ENTRY_HEADER_SIZE + entry.name_len as usize);
2024
2025        let dir_size = self.inode_table[dir_inode as usize].size as usize;
2026
2027        // Determine which block to write into and at what offset
2028        let block_idx = dir_size / BLOCK_SIZE;
2029        let offset_in_block = dir_size % BLOCK_SIZE;
2030
2031        if block_idx >= 12 {
2032            return Err(KernelError::ResourceExhausted {
2033                resource: "directory direct blocks",
2034            });
2035        }
2036
2037        // Check if the entry fits in the current block
2038        if offset_in_block + entry_size > BLOCK_SIZE {
2039            // Need a new block; current block cannot fit this entry
2040            let next_block_idx = block_idx + 1;
2041            if next_block_idx >= 12 {
2042                return Err(KernelError::ResourceExhausted {
2043                    resource: "directory direct blocks",
2044                });
2045            }
2046
2047            // Allocate a new block if not already present
2048            if self.inode_table[dir_inode as usize].direct_blocks[next_block_idx] == 0 {
2049                let new_block = self
2050                    .allocate_block()
2051                    .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
2052                self.inode_table[dir_inode as usize].direct_blocks[next_block_idx] = new_block;
2053                self.inode_table[dir_inode as usize].blocks += 1;
2054            }
2055
2056            // Write at the start of the new block
2057            self.serialize_dir_entry(dir_inode, next_block_idx, 0, &entry, entry_size)?;
2058
2059            // Update directory size to include any padding in the old block plus the new
2060            // entry
2061            let new_size = (next_block_idx * BLOCK_SIZE) + entry_size;
2062            self.inode_table[dir_inode as usize].size = new_size as u32;
2063        } else {
2064            // Allocate the first block if needed (empty directory)
2065            if self.inode_table[dir_inode as usize].direct_blocks[block_idx] == 0 {
2066                let new_block = self
2067                    .allocate_block()
2068                    .ok_or(KernelError::ResourceExhausted { resource: "blocks" })?;
2069                self.inode_table[dir_inode as usize].direct_blocks[block_idx] = new_block;
2070                self.inode_table[dir_inode as usize].blocks += 1;
2071            }
2072
2073            self.serialize_dir_entry(dir_inode, block_idx, offset_in_block, &entry, entry_size)?;
2074
2075            // Update directory size
2076            let new_size = dir_size + entry_size;
2077            self.inode_table[dir_inode as usize].size = new_size as u32;
2078        }
2079
2080        Ok(())
2081    }
2082
2083    /// Serialize a DiskDirEntry into a specific block at a given offset.
2084    fn serialize_dir_entry(
2085        &mut self,
2086        dir_inode: u32,
2087        block_idx: usize,
2088        offset: usize,
2089        entry: &DiskDirEntry,
2090        entry_size: usize,
2091    ) -> Result<(), KernelError> {
2092        let block_num = self.inode_table[dir_inode as usize].direct_blocks[block_idx];
2093        if block_num == 0 {
2094            return Err(KernelError::FsError(FsError::IoError));
2095        }
2096
2097        self.materialize_block(block_num as usize);
2098        let block = &mut self.block_data[block_num as usize];
2099
2100        // Write inode (4 bytes, little-endian)
2101        let inode_bytes = entry.inode.to_le_bytes();
2102        block[offset..offset + 4].copy_from_slice(&inode_bytes);
2103
2104        // Write rec_len (2 bytes, little-endian) - use the padded entry_size
2105        let rec_len_bytes = (entry_size as u16).to_le_bytes();
2106        block[offset + 4..offset + 6].copy_from_slice(&rec_len_bytes);
2107
2108        // Write name_len (1 byte)
2109        block[offset + 6] = entry.name_len;
2110
2111        // Write file_type (1 byte)
2112        block[offset + 7] = entry.file_type;
2113
2114        // Write name bytes
2115        let name_len = entry.name_len as usize;
2116        block[offset + DIR_ENTRY_HEADER_SIZE..offset + DIR_ENTRY_HEADER_SIZE + name_len]
2117            .copy_from_slice(&entry.name[..name_len]);
2118
2119        // Zero-fill any padding bytes between name end and rec_len boundary
2120        let name_end = offset + DIR_ENTRY_HEADER_SIZE + name_len;
2121        let rec_end = offset + entry_size;
2122        for byte in &mut block[name_end..rec_end] {
2123            *byte = 0;
2124        }
2125
2126        self.mark_dirty(block_num);
2127
2128        Ok(())
2129    }
2130
2131    /// Free all data blocks belonging to an inode (direct + indirect).
2132    fn free_inode_blocks(&mut self, inode_num: u32) {
2133        // Free direct blocks
2134        for i in 0..DIRECT_BLOCKS {
2135            let block_num = self.inode_table[inode_num as usize].direct_blocks[i];
2136            if block_num != 0 {
2137                self.free_block(block_num);
2138                self.inode_table[inode_num as usize].direct_blocks[i] = 0;
2139            }
2140        }
2141
2142        // Free single-indirect and double-indirect blocks (first_free_block=0 frees
2143        // all)
2144        self.truncate_single_indirect(inode_num, 0);
2145        self.truncate_double_indirect(inode_num, 0);
2146
2147        self.inode_table[inode_num as usize].blocks = 0;
2148        self.inode_table[inode_num as usize].size = 0;
2149    }
2150}
2151
2152fn permissions_to_mode(perms: Permissions, is_dir: bool) -> u16 {
2153    let mut mode = 0u16;
2154
2155    if is_dir {
2156        mode |= 0x4000;
2157    } else {
2158        mode |= 0x8000;
2159    }
2160
2161    if perms.owner_read {
2162        mode |= 0o400;
2163    }
2164    if perms.owner_write {
2165        mode |= 0o200;
2166    }
2167    if perms.owner_exec {
2168        mode |= 0o100;
2169    }
2170    if perms.group_read {
2171        mode |= 0o040;
2172    }
2173    if perms.group_write {
2174        mode |= 0o020;
2175    }
2176    if perms.group_exec {
2177        mode |= 0o010;
2178    }
2179    if perms.other_read {
2180        mode |= 0o004;
2181    }
2182    if perms.other_write {
2183        mode |= 0o002;
2184    }
2185    if perms.other_exec {
2186        mode |= 0o001;
2187    }
2188
2189    mode
2190}
2191
2192/// A shared zero block for reads of unmaterialized (sparse) blocks.
2193/// Avoids allocating 4KB for every unoccupied block index.
2194static ZERO_BLOCK: [u8; BLOCK_SIZE] = [0u8; BLOCK_SIZE];
2195
2196/// BlockFS filesystem
2197pub struct BlockFs {
2198    inner: Arc<RwLock<BlockFsInner>>,
2199}
2200
2201impl BlockFs {
2202    pub fn new(block_count: u32, inode_count: u32) -> Self {
2203        Self {
2204            inner: Arc::new(RwLock::new(BlockFsInner::new(block_count, inode_count))),
2205        }
2206    }
2207
2208    pub fn format(block_count: u32, inode_count: u32) -> Result<Self, KernelError> {
2209        if block_count < 100 {
2210            return Err(KernelError::InvalidArgument {
2211                name: "block_count",
2212                value: "too small (minimum 100)",
2213            });
2214        }
2215
2216        if inode_count < 10 {
2217            return Err(KernelError::InvalidArgument {
2218                name: "inode_count",
2219                value: "too small (minimum 10)",
2220            });
2221        }
2222
2223        Ok(Self::new(block_count, inode_count))
2224    }
2225
2226    /// Open an existing BlockFS from a disk backend.
2227    ///
2228    /// Reads the superblock, validates the magic number, and loads the bitmap,
2229    /// inode table, and all data blocks into memory. The disk backend remains
2230    /// attached for subsequent sync operations.
2231    pub fn open_existing(backend: Arc<Mutex<dyn DiskBackend>>) -> Result<Self, KernelError> {
2232        let inner = BlockFsInner::load_existing(backend)?;
2233        Ok(Self {
2234            inner: Arc::new(RwLock::new(inner)),
2235        })
2236    }
2237
2238    /// Attach a disk backend for persistent storage.
2239    ///
2240    /// When a disk backend is attached, `sync()` will write all dirty blocks
2241    /// to the device. Without a backend, BlockFS operates as a pure RAM
2242    /// filesystem.
2243    ///
2244    /// If `load` is true, existing data is read from the disk into memory.
2245    pub fn set_disk_backend(
2246        &self,
2247        backend: Arc<Mutex<dyn DiskBackend>>,
2248        load: bool,
2249    ) -> Result<(), KernelError> {
2250        let mut inner = self.inner.write();
2251        inner.disk = Some(backend);
2252
2253        if load {
2254            let loaded = inner.load_from_disk()?;
2255            crate::println!("[BLOCKFS] Loaded {} blocks from disk backend", loaded);
2256        }
2257
2258        Ok(())
2259    }
2260
2261    /// Detach the disk backend. Outstanding dirty blocks will NOT be flushed;
2262    /// call `sync()` first if persistence is needed.
2263    pub fn detach_disk_backend(&self) {
2264        let mut inner = self.inner.write();
2265        inner.disk = None;
2266    }
2267
2268    /// Get the number of dirty blocks pending sync.
2269    pub fn dirty_block_count(&self) -> usize {
2270        let inner = self.inner.read();
2271        inner.dirty_blocks.len()
2272    }
2273}
2274
2275impl Filesystem for BlockFs {
2276    fn root(&self) -> Arc<dyn VfsNode> {
2277        Arc::new(BlockFsNode::new(0, self.inner.clone()))
2278    }
2279
2280    fn name(&self) -> &str {
2281        "blockfs"
2282    }
2283
2284    fn is_readonly(&self) -> bool {
2285        false
2286    }
2287
2288    fn sync(&self) -> Result<(), KernelError> {
2289        let mut inner = self.inner.write();
2290        let synced = inner.sync_to_disk()?;
2291        if synced > 0 {
2292            crate::println!("[BLOCKFS] Synced {} dirty blocks to disk", synced);
2293        }
2294        Ok(())
2295    }
2296}
2297
2298/// Initialize BlockFS
2299pub fn init() -> Result<(), KernelError> {
2300    crate::println!("[BLOCKFS] Initializing block-based filesystem...");
2301    crate::println!("[BLOCKFS] Block size: {} bytes", BLOCK_SIZE);
2302    crate::println!("[BLOCKFS] Inode size: {} bytes", size_of::<DiskInode>());
2303    crate::println!("[BLOCKFS] BlockFS initialized");
2304    Ok(())
2305}
2306
2307/// Try to attach the virtio-blk device as a disk backend for the given
2308/// BlockFS instance. Returns `true` if a device was found and attached.
2309///
2310/// This should be called after both the BlockFS and virtio-blk driver have
2311/// been initialized.
2312pub fn attach_virtio_backend(fs: &BlockFs, load_from_disk: bool) -> bool {
2313    if !crate::drivers::virtio::blk::is_initialized() {
2314        crate::println!("[BLOCKFS] No virtio-blk device available; operating in RAM-only mode");
2315        return false;
2316    }
2317
2318    let backend = Arc::new(Mutex::new(VirtioBlockBackend));
2319    match fs.set_disk_backend(backend, load_from_disk) {
2320        Ok(()) => {
2321            crate::println!("[BLOCKFS] Attached virtio-blk disk backend for persistence");
2322            true
2323        }
2324        Err(e) => {
2325            crate::println!("[BLOCKFS] Failed to attach virtio-blk backend: {:?}", e);
2326            false
2327        }
2328    }
2329}
2330
2331#[cfg(test)]
2332mod tests {
2333    use super::*;
2334
2335    #[test]
2336    fn test_superblock_creation() {
2337        let sb = Superblock::new(10000, 1000);
2338        assert_eq!(sb.magic, BLOCKFS_MAGIC);
2339        assert!(sb.is_valid());
2340        assert_eq!(sb.block_count, 10000);
2341        assert_eq!(sb.inode_count, 1000);
2342    }
2343
2344    #[test]
2345    fn test_block_bitmap() {
2346        let mut bitmap = BlockBitmap::new(100);
2347
2348        let block1 = bitmap.allocate_block().unwrap();
2349        assert!(bitmap.is_allocated(block1));
2350
2351        bitmap.free_block(block1);
2352        assert!(!bitmap.is_allocated(block1));
2353    }
2354
2355    #[test]
2356    fn test_blockfs_format() {
2357        let fs = BlockFs::format(1000, 100).unwrap();
2358        assert_eq!(fs.name(), "blockfs");
2359        assert!(!fs.is_readonly());
2360    }
2361}