⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/fs/
tar.rs

1//! TAR archive parser and VFS loader
2//!
3//! Parses ustar-format TAR archives from an in-memory byte buffer and
4//! creates corresponding files and directories in the VFS (RamFS).
5//! Supports regular files (typeflag '0' or '\0'), directories
6//! (typeflag '5'), and symlinks (typeflag '2', resolved as file copies).
7
8use alloc::{string::String, vec::Vec};
9
10use crate::{
11    error::KernelError,
12    fs::{get_vfs, Permissions},
13};
14
15/// TAR block size (every header and data region is a multiple of this).
16const BLOCK_SIZE: usize = 512;
17
18/// TAR header field offsets and sizes (ustar format).
19mod field {
20    /// File name (100 bytes, null-terminated ASCII).
21    pub const NAME_OFF: usize = 0;
22    pub const NAME_LEN: usize = 100;
23
24    /// File mode in octal ASCII (8 bytes).
25    pub const MODE_OFF: usize = 100;
26    pub const MODE_LEN: usize = 8;
27
28    /// File size in octal ASCII (12 bytes).
29    pub const SIZE_OFF: usize = 124;
30    pub const SIZE_LEN: usize = 12;
31
32    /// Type flag (1 byte): '0' or '\0' = regular file, '2' = symlink, '5' =
33    /// directory.
34    pub const TYPE_OFF: usize = 156;
35
36    /// Link name for symlinks/hard links (100 bytes, null-terminated ASCII).
37    pub const LINK_OFF: usize = 157;
38    pub const LINK_LEN: usize = 100;
39
40    /// Name prefix for paths > 100 chars (155 bytes, null-terminated).
41    pub const PREFIX_OFF: usize = 345;
42    pub const PREFIX_LEN: usize = 155;
43
44    /// Magic field ("ustar\0" for POSIX TAR).
45    pub const MAGIC_OFF: usize = 257;
46    pub const MAGIC_LEN: usize = 6;
47}
48
49/// Parse a null-terminated ASCII string from a fixed-size TAR field.
50fn parse_str(buf: &[u8]) -> &str {
51    let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
52    core::str::from_utf8(&buf[..end]).unwrap_or("")
53}
54
55/// Parse an octal ASCII number from a TAR field.
56///
57/// Handles both null/space-terminated octal strings and the GNU
58/// base-256 extension (high bit set in the first byte).
59fn parse_octal(buf: &[u8]) -> usize {
60    // GNU base-256 extension: if the high bit of the first byte is set
61    // the remaining bytes are a big-endian binary value.
62    if !buf.is_empty() && (buf[0] & 0x80) != 0 {
63        let mut val: usize = 0;
64        for &b in &buf[1..] {
65            val = val.wrapping_shl(8) | (b as usize);
66        }
67        return val;
68    }
69
70    let s = parse_str(buf).trim();
71    if s.is_empty() {
72        return 0;
73    }
74    usize::from_str_radix(s, 8).unwrap_or(0)
75}
76
77/// Check whether a 512-byte block is all zeros (end-of-archive marker).
78fn is_zero_block(block: &[u8]) -> bool {
79    block.iter().all(|&b| b == 0)
80}
81
82/// Ensure that every component of `path` exists as a directory in the VFS.
83///
84/// For example, given `/usr/local/bin`, this will create `/usr`, `/usr/local`,
85/// and `/usr/local/bin` as directories if they don't already exist.
86fn ensure_parent_dirs(path: &str) -> Result<(), KernelError> {
87    let vfs = get_vfs().read();
88
89    let mut accumulated = String::new();
90    for component in path.split('/').filter(|c| !c.is_empty()) {
91        accumulated.push('/');
92        accumulated.push_str(component);
93
94        // If this path component already exists we can skip it.
95        if vfs.resolve_path(&accumulated).is_ok() {
96            continue;
97        }
98
99        // Split into parent + name and create the directory.
100        let (parent_path, dir_name) = if let Some(pos) = accumulated.rfind('/') {
101            if pos == 0 {
102                ("/", &accumulated[1..])
103            } else {
104                (&accumulated[..pos], &accumulated[pos + 1..])
105            }
106        } else {
107            continue;
108        };
109
110        let parent_node = vfs.resolve_path(parent_path)?;
111        // Ignore AlreadyExists -- another entry may have created it.
112        let _ = parent_node.mkdir(dir_name, Permissions::default());
113    }
114
115    Ok(())
116}
117
118/// Load a TAR archive from a byte buffer into the VFS.
119///
120/// Iterates over the 512-byte headers in `data`, creating directories and
121/// files in the RamFS-backed VFS. Returns the number of entries (files +
122/// directories) successfully loaded.
123///
124/// # Arguments
125/// * `data` - The raw bytes of a TAR archive (ustar format).
126///
127/// # Returns
128/// * `Ok(count)` - Number of files/directories loaded.
129/// * `Err(KernelError)` - On parse or VFS errors.
130pub fn load_tar_to_vfs(data: &[u8]) -> Result<usize, KernelError> {
131    #[allow(unused_imports)]
132    use crate::println;
133
134    if data.len() < BLOCK_SIZE {
135        return Ok(0);
136    }
137
138    let mut offset: usize = 0;
139    let mut count: usize = 0;
140    // Deferred symlinks: (symlink_path, target_path, mode) for second-pass
141    // resolution.
142    let mut deferred_symlinks: Vec<(String, String, u32)> = Vec::new();
143
144    while offset + BLOCK_SIZE <= data.len() {
145        let header = &data[offset..offset + BLOCK_SIZE];
146
147        // Two consecutive zero blocks mark end of archive.
148        if is_zero_block(header) {
149            if offset + 2 * BLOCK_SIZE <= data.len()
150                && is_zero_block(&data[offset + BLOCK_SIZE..offset + 2 * BLOCK_SIZE])
151            {
152                break;
153            }
154            // Single zero block -- skip it.
155            offset += BLOCK_SIZE;
156            continue;
157        }
158
159        // Validate magic (optional -- some archives lack it).
160        let magic = parse_str(&header[field::MAGIC_OFF..field::MAGIC_OFF + field::MAGIC_LEN]);
161        if !magic.is_empty() && !magic.starts_with("ustar") {
162            // Not a ustar header; skip this block.
163            offset += BLOCK_SIZE;
164            continue;
165        }
166
167        // Parse header fields.
168        let prefix = parse_str(&header[field::PREFIX_OFF..field::PREFIX_OFF + field::PREFIX_LEN]);
169        let name_raw = parse_str(&header[field::NAME_OFF..field::NAME_OFF + field::NAME_LEN]);
170        let mode = parse_octal(&header[field::MODE_OFF..field::MODE_OFF + field::MODE_LEN]);
171        let size = parse_octal(&header[field::SIZE_OFF..field::SIZE_OFF + field::SIZE_LEN]);
172        let typeflag = header[field::TYPE_OFF];
173
174        // Assemble full path (prefix + name).
175        let full_name = if prefix.is_empty() {
176            String::from(name_raw)
177        } else {
178            let mut s = String::from(prefix);
179            s.push('/');
180            s.push_str(name_raw);
181            s
182        };
183
184        // Normalise: ensure the path starts with '/'.
185        let path = if full_name.starts_with('/') {
186            full_name.clone()
187        } else {
188            let mut s = String::from("/");
189            s.push_str(&full_name);
190            s
191        };
192
193        // Strip trailing '/' for directory paths.
194        let path = if path.len() > 1 && path.ends_with('/') {
195            String::from(&path[..path.len() - 1])
196        } else {
197            path
198        };
199
200        // Advance past the header block.
201        offset += BLOCK_SIZE;
202
203        match typeflag {
204            b'5' => {
205                // Directory entry.
206                ensure_parent_dirs(&path)?;
207                // The directory itself may already exist from ensure_parent_dirs.
208                let vfs = get_vfs().read();
209                if vfs.resolve_path(&path).is_err() {
210                    let (parent_path, dir_name) = split_path(&path)?;
211                    let parent = vfs.resolve_path(parent_path)?;
212                    let _ = parent.mkdir(dir_name, Permissions::from_mode(mode as u32));
213                }
214                count += 1;
215            }
216            b'0' | b'\0' => {
217                // Regular file entry.
218                // Ensure parent directories exist.
219                if let Some(pos) = path.rfind('/') {
220                    if pos > 0 {
221                        ensure_parent_dirs(&path[..pos])?;
222                    }
223                }
224
225                // Extract file data.
226                let file_data = if size > 0 && offset + size <= data.len() {
227                    &data[offset..offset + size]
228                } else {
229                    &[] as &[u8]
230                };
231
232                // Create the file in the VFS.
233                let vfs = get_vfs().read();
234                let (parent_path, file_name) = split_path(&path)?;
235                let parent = vfs.resolve_path(parent_path)?;
236
237                // Remove existing file if present (overwrite semantics).
238                let _ = parent.unlink(file_name);
239
240                let node = parent.create(file_name, Permissions::from_mode(mode as u32))?;
241                if !file_data.is_empty() {
242                    node.write(0, file_data)?;
243                }
244
245                count += 1;
246
247                // Advance past data blocks (rounded up to BLOCK_SIZE).
248                let data_blocks = size.div_ceil(BLOCK_SIZE);
249                offset += data_blocks * BLOCK_SIZE;
250            }
251            b'2' => {
252                // Symbolic link entry -- resolve as a file copy of the target.
253                // BusyBox uses symlinks (e.g. /bin/ash -> busybox) for its
254                // multi-call binary. Since VeridianOS VFS has no native symlink
255                // nodes, we copy the target file's contents to the new path.
256                let link_target_raw =
257                    parse_str(&header[field::LINK_OFF..field::LINK_OFF + field::LINK_LEN]);
258
259                // Resolve the link target to an absolute path.
260                let link_target = if link_target_raw.starts_with('/') {
261                    String::from(link_target_raw)
262                } else {
263                    // Relative symlink: resolve relative to the symlink's parent dir.
264                    if let Some(pos) = path.rfind('/') {
265                        let parent_dir = if pos == 0 { "/" } else { &path[..pos] };
266                        let mut abs = String::from(parent_dir);
267                        abs.push('/');
268                        abs.push_str(link_target_raw);
269                        abs
270                    } else {
271                        let mut abs = String::from("/");
272                        abs.push_str(link_target_raw);
273                        abs
274                    }
275                };
276
277                // Ensure parent directories for the symlink path exist.
278                if let Some(pos) = path.rfind('/') {
279                    if pos > 0 {
280                        ensure_parent_dirs(&path[..pos])?;
281                    }
282                }
283
284                // Read the target file's contents and copy them.
285                let vfs = get_vfs().read();
286                match vfs.resolve_path(&link_target) {
287                    Ok(target_node) => {
288                        // Read target file data (up to 4MB limit for safety).
289                        let target_size = target_node.metadata().map(|m| m.size).unwrap_or(0);
290                        if target_size > 0 && target_size <= 4 * 1024 * 1024 {
291                            let mut buf = alloc::vec![0u8; target_size];
292                            if let Ok(bytes_read) = target_node.read(0, &mut buf) {
293                                let (parent_path, file_name) = split_path(&path)?;
294                                let parent = vfs.resolve_path(parent_path)?;
295                                let _ = parent.unlink(file_name);
296                                let node = parent
297                                    .create(file_name, Permissions::from_mode(mode as u32))?;
298                                node.write(0, &buf[..bytes_read])?;
299                                count += 1;
300                            }
301                        } else if target_size == 0 {
302                            // Empty target -- create empty file.
303                            let (parent_path, file_name) = split_path(&path)?;
304                            let parent = vfs.resolve_path(parent_path)?;
305                            let _ = parent.unlink(file_name);
306                            let _ =
307                                parent.create(file_name, Permissions::from_mode(mode as u32))?;
308                            count += 1;
309                        }
310                    }
311                    Err(_) => {
312                        // Target doesn't exist yet -- defer to a second pass.
313                        deferred_symlinks.push((path.clone(), link_target, mode as u32));
314                    }
315                }
316
317                // Symlinks have no data blocks in the archive.
318                let data_blocks = size.div_ceil(BLOCK_SIZE);
319                offset += data_blocks * BLOCK_SIZE;
320            }
321            _ => {
322                // Unsupported type (hard link, etc.) -- skip data.
323                let data_blocks = size.div_ceil(BLOCK_SIZE);
324                offset += data_blocks * BLOCK_SIZE;
325            }
326        }
327    }
328
329    // Second pass: resolve deferred symlinks (targets that appeared after the
330    // link).
331    for (sym_path, target_path, sym_mode) in &deferred_symlinks {
332        if let Some(pos) = sym_path.rfind('/') {
333            if pos > 0 {
334                let _ = ensure_parent_dirs(&sym_path[..pos]);
335            }
336        }
337
338        let vfs = get_vfs().read();
339        if let Ok(target_node) = vfs.resolve_path(target_path) {
340            let target_size = target_node.metadata().map(|m| m.size).unwrap_or(0);
341            if target_size > 0 && target_size <= 4 * 1024 * 1024 {
342                let mut buf = alloc::vec![0u8; target_size];
343                if let Ok(bytes_read) = target_node.read(0, &mut buf) {
344                    if let Ok((parent_path, file_name)) = split_path(sym_path) {
345                        if let Ok(parent) = vfs.resolve_path(parent_path) {
346                            let _ = parent.unlink(file_name);
347                            if let Ok(node) =
348                                parent.create(file_name, Permissions::from_mode(*sym_mode))
349                            {
350                                let _ = node.write(0, &buf[..bytes_read]);
351                                count += 1;
352                            }
353                        }
354                    }
355                }
356            }
357        }
358    }
359
360    if !deferred_symlinks.is_empty() {
361        let resolved = deferred_symlinks.len();
362        println!("[TAR] Resolved {} deferred symlinks", resolved);
363    }
364
365    println!("[TAR] Loaded {} entries into VFS", count);
366    Ok(count)
367}
368
369/// Split a path into (parent, name).
370///
371/// Returns `("/", "foo")` for `/foo`, or `("/a/b", "c")` for `/a/b/c`.
372fn split_path(path: &str) -> Result<(&str, &str), KernelError> {
373    if let Some(pos) = path.rfind('/') {
374        let parent = if pos == 0 { "/" } else { &path[..pos] };
375        let name = &path[pos + 1..];
376        if name.is_empty() {
377            return Err(KernelError::FsError(crate::error::FsError::InvalidPath));
378        }
379        Ok((parent, name))
380    } else {
381        Err(KernelError::FsError(crate::error::FsError::InvalidPath))
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    // --- Helper: build a minimal ustar TAR header ---
390
391    fn make_tar_header(name: &str, size: usize, typeflag: u8, mode: u32) -> [u8; 512] {
392        let mut header = [0u8; 512];
393
394        // Name field (offset 0, 100 bytes)
395        let name_bytes = name.as_bytes();
396        let len = name_bytes.len().min(100);
397        header[..len].copy_from_slice(&name_bytes[..len]);
398
399        // Mode field (offset 100, 8 bytes) -- octal ASCII
400        let mode_str = alloc::format!("{:07o}\0", mode);
401        header[100..108].copy_from_slice(mode_str.as_bytes());
402
403        // Size field (offset 124, 12 bytes) -- octal ASCII
404        let size_str = alloc::format!("{:011o}\0", size);
405        header[124..136].copy_from_slice(size_str.as_bytes());
406
407        // Typeflag (offset 156, 1 byte)
408        header[156] = typeflag;
409
410        // Magic (offset 257, 6 bytes)
411        header[257..263].copy_from_slice(b"ustar\0");
412
413        // Version (offset 263, 2 bytes)
414        header[263..265].copy_from_slice(b"00");
415
416        // Compute checksum (offset 148, 8 bytes).
417        // Per spec: treat checksum field as spaces during calculation.
418        header[148..156].copy_from_slice(b"        ");
419        let cksum: u32 = header.iter().map(|&b| b as u32).sum();
420        let cksum_str = alloc::format!("{:06o}\0 ", cksum);
421        header[148..156].copy_from_slice(&cksum_str.as_bytes()[..8]);
422
423        header
424    }
425
426    /// Build a complete TAR archive in memory from a list of entries.
427    fn build_tar(entries: &[(&str, usize, u8, u32, &[u8])]) -> alloc::vec::Vec<u8> {
428        let mut archive = alloc::vec::Vec::new();
429        for &(name, size, typeflag, mode, data) in entries {
430            let header = make_tar_header(name, size, typeflag, mode);
431            archive.extend_from_slice(&header);
432            if !data.is_empty() {
433                archive.extend_from_slice(data);
434                // Pad to block boundary
435                let remainder = data.len() % 512;
436                if remainder != 0 {
437                    let padding = 512 - remainder;
438                    archive.extend(core::iter::repeat(0u8).take(padding));
439                }
440            }
441        }
442        // Two zero blocks to terminate
443        archive.extend(core::iter::repeat(0u8).take(1024));
444        archive
445    }
446
447    #[test]
448    fn test_parse_octal_basic() {
449        assert_eq!(parse_octal(b"0000755\0"), 0o755);
450        assert_eq!(parse_octal(b"0000644\0"), 0o644);
451        assert_eq!(parse_octal(b"00000000013\0"), 11); // 13 octal = 11 decimal
452    }
453
454    #[test]
455    fn test_parse_octal_empty() {
456        assert_eq!(parse_octal(b"\0\0\0\0"), 0);
457        assert_eq!(parse_octal(b""), 0);
458    }
459
460    #[test]
461    fn test_parse_str() {
462        assert_eq!(parse_str(b"hello\0world"), "hello");
463        assert_eq!(parse_str(b"hello"), "hello");
464        assert_eq!(parse_str(b"\0"), "");
465    }
466
467    #[test]
468    fn test_is_zero_block() {
469        let zero = [0u8; 512];
470        assert!(is_zero_block(&zero));
471
472        let mut nonzero = [0u8; 512];
473        nonzero[100] = 1;
474        assert!(!is_zero_block(&nonzero));
475    }
476
477    #[test]
478    fn test_split_path() {
479        let (parent, name) = split_path("/bin/ls").unwrap();
480        assert_eq!(parent, "/bin");
481        assert_eq!(name, "ls");
482
483        let (parent, name) = split_path("/hello").unwrap();
484        assert_eq!(parent, "/");
485        assert_eq!(name, "hello");
486    }
487
488    #[test]
489    fn test_split_path_trailing_slash_fails() {
490        assert!(split_path("/foo/").is_err());
491    }
492
493    #[test]
494    fn test_make_tar_header_magic() {
495        let header = make_tar_header("test.txt", 5, b'0', 0o644);
496        let magic = parse_str(&header[257..263]);
497        assert!(magic.starts_with("ustar"));
498    }
499
500    #[test]
501    fn test_build_tar_not_empty() {
502        let tar = build_tar(&[("file.txt", 5, b'0', 0o644, b"hello")]);
503        // At least header (512) + data (512 padded) + terminator (1024)
504        assert!(tar.len() >= 512 + 512 + 1024);
505    }
506}