veridian_kernel/pkg/format/
compression.rs1use alloc::vec::Vec;
8
9use super::Compression;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
17pub enum CompressionError {
18 InputTooShort,
20 InvalidMagic,
22 TruncatedBlock,
24 TruncatedLiterals,
26 InvalidOffset,
28 TruncatedRle,
30 UnsupportedContentSize,
32 ReservedBlockType,
34 EmptyInput,
36 InvalidDistance,
38 TruncatedEscape,
40 TruncatedMatch,
42}
43
44impl core::fmt::Display for CompressionError {
45 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
46 match self {
47 Self::InputTooShort => write!(f, "input too short"),
48 Self::InvalidMagic => write!(f, "invalid magic number"),
49 Self::TruncatedBlock => write!(f, "truncated block"),
50 Self::TruncatedLiterals => write!(f, "truncated literals"),
51 Self::InvalidOffset => write!(f, "invalid offset"),
52 Self::TruncatedRle => write!(f, "truncated RLE"),
53 Self::UnsupportedContentSize => write!(f, "unsupported content size"),
54 Self::ReservedBlockType => write!(f, "reserved block type"),
55 Self::EmptyInput => write!(f, "empty input"),
56 Self::InvalidDistance => write!(f, "invalid distance"),
57 Self::TruncatedEscape => write!(f, "truncated escape"),
58 Self::TruncatedMatch => write!(f, "truncated match"),
59 }
60 }
61}
62
63const LZ4_MAGIC: u32 = 0x184D2204;
69
70const LZ4_BLOCK_SIZE: usize = 65536;
72
73const LZ4_MIN_MATCH: usize = 4;
75
76mod lz4 {
78 use super::*;
79
80 pub fn compress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
82 if input.is_empty() {
83 return Ok(Vec::new());
84 }
85
86 let mut output = Vec::with_capacity(input.len() + 16);
87
88 output.extend_from_slice(&LZ4_MAGIC.to_le_bytes());
90 output.push(0x64); output.push(0x40); output.extend_from_slice(&(input.len() as u64).to_le_bytes());
95
96 let header_checksum = (output[4..12].iter().fold(0u8, |a, &b| a.wrapping_add(b))) >> 1;
98 output.push(header_checksum);
99
100 let mut pos = 0;
102 while pos < input.len() {
103 let block_end = core::cmp::min(pos + LZ4_BLOCK_SIZE, input.len());
104 let block = &input[pos..block_end];
105
106 let compressed_block = compress_block(block);
107
108 if compressed_block.len() >= block.len() {
110 let block_size = (block.len() as u32) | 0x80000000;
112 output.extend_from_slice(&block_size.to_le_bytes());
113 output.extend_from_slice(block);
114 } else {
115 output.extend_from_slice(&(compressed_block.len() as u32).to_le_bytes());
117 output.extend_from_slice(&compressed_block);
118 }
119
120 pos = block_end;
121 }
122
123 output.extend_from_slice(&0u32.to_le_bytes());
125
126 Ok(output)
127 }
128
129 fn compress_block(input: &[u8]) -> Vec<u8> {
131 let mut output = Vec::with_capacity(input.len());
132 let mut pos = 0;
133 let mut literal_start = 0;
134
135 let mut hash_table = [0usize; 4096];
137
138 while pos + LZ4_MIN_MATCH <= input.len() {
139 let hash = hash4(&input[pos..]) & 0xFFF;
140 let match_pos = hash_table[hash];
141 hash_table[hash] = pos;
142
143 if match_pos < pos && pos - match_pos < 65535 {
145 let match_len = find_match_length(&input[match_pos..], &input[pos..]);
146
147 if match_len >= LZ4_MIN_MATCH {
148 let offset = pos - match_pos;
149 let _literal_len = pos - literal_start;
150
151 write_sequence(&mut output, &input[literal_start..pos], offset, match_len);
153
154 pos += match_len;
155 literal_start = pos;
156 continue;
157 }
158 }
159
160 pos += 1;
161 }
162
163 if literal_start < input.len() {
165 let literal_len = input.len() - literal_start;
166 let token = core::cmp::min(literal_len, 15) as u8;
167 output.push(token << 4);
168
169 if literal_len >= 15 {
170 let mut remaining = literal_len - 15;
171 while remaining >= 255 {
172 output.push(255);
173 remaining -= 255;
174 }
175 output.push(remaining as u8);
176 }
177
178 output.extend_from_slice(&input[literal_start..]);
179 }
180
181 output
182 }
183
184 fn hash4(data: &[u8]) -> usize {
186 if data.len() < 4 {
187 return 0;
188 }
189 let val = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
190 (val.wrapping_mul(2654435761) >> 20) as usize
191 }
192
193 fn find_match_length(match_data: &[u8], current: &[u8]) -> usize {
195 let max_len = core::cmp::min(match_data.len(), current.len());
196 let mut len = 0;
197
198 while len < max_len && match_data[len] == current[len] {
199 len += 1;
200 }
201
202 len
203 }
204
205 fn write_sequence(output: &mut Vec<u8>, literals: &[u8], offset: usize, match_len: usize) {
207 let literal_len = literals.len();
208 let ml = match_len - LZ4_MIN_MATCH;
209
210 let lit_token = core::cmp::min(literal_len, 15) as u8;
212 let match_token = core::cmp::min(ml, 15) as u8;
213 output.push((lit_token << 4) | match_token);
214
215 if literal_len >= 15 {
217 let mut remaining = literal_len - 15;
218 while remaining >= 255 {
219 output.push(255);
220 remaining -= 255;
221 }
222 output.push(remaining as u8);
223 }
224
225 output.extend_from_slice(literals);
227
228 output.push((offset & 0xFF) as u8);
230 output.push(((offset >> 8) & 0xFF) as u8);
231
232 if ml >= 15 {
234 let mut remaining = ml - 15;
235 while remaining >= 255 {
236 output.push(255);
237 remaining -= 255;
238 }
239 output.push(remaining as u8);
240 }
241 }
242
243 pub fn decompress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
245 if input.len() < 15 {
246 return Err(CompressionError::InputTooShort);
247 }
248
249 let magic = u32::from_le_bytes([input[0], input[1], input[2], input[3]]);
251 if magic != LZ4_MAGIC {
252 return Err(CompressionError::InvalidMagic);
253 }
254
255 let _flg = input[4];
257 let _bd = input[5];
258
259 let original_size = u64::from_le_bytes([
261 input[6], input[7], input[8], input[9], input[10], input[11], input[12], input[13],
262 ]) as usize;
263
264 let mut output = Vec::with_capacity(original_size);
265 let mut pos = 15; while pos + 4 <= input.len() {
269 let block_size_raw =
270 u32::from_le_bytes([input[pos], input[pos + 1], input[pos + 2], input[pos + 3]]);
271 pos += 4;
272
273 if block_size_raw == 0 {
275 break;
276 }
277
278 let uncompressed = (block_size_raw & 0x80000000) != 0;
279 let block_size = (block_size_raw & 0x7FFFFFFF) as usize;
280
281 if pos + block_size > input.len() {
282 return Err(CompressionError::TruncatedBlock);
283 }
284
285 if uncompressed {
286 output.extend_from_slice(&input[pos..pos + block_size]);
287 } else {
288 decompress_block(&input[pos..pos + block_size], &mut output)?;
289 }
290
291 pos += block_size;
292 }
293
294 Ok(output)
295 }
296
297 fn decompress_block(input: &[u8], output: &mut Vec<u8>) -> Result<(), CompressionError> {
299 let mut pos = 0;
300
301 while pos < input.len() {
302 let token = input[pos];
303 pos += 1;
304
305 let mut literal_len = ((token >> 4) & 0x0F) as usize;
307 if literal_len == 15 {
308 while pos < input.len() {
309 let byte = input[pos];
310 pos += 1;
311 literal_len += byte as usize;
312 if byte != 255 {
313 break;
314 }
315 }
316 }
317
318 if pos + literal_len > input.len() {
320 return Err(CompressionError::TruncatedLiterals);
321 }
322 output.extend_from_slice(&input[pos..pos + literal_len]);
323 pos += literal_len;
324
325 if pos >= input.len() {
327 break;
328 }
329
330 if pos + 2 > input.len() {
332 return Err(CompressionError::InvalidOffset);
333 }
334 let offset = u16::from_le_bytes([input[pos], input[pos + 1]]) as usize;
335 pos += 2;
336
337 if offset == 0 || offset > output.len() {
338 return Err(CompressionError::InvalidOffset);
339 }
340
341 let mut match_len = (token & 0x0F) as usize + LZ4_MIN_MATCH;
343 if (token & 0x0F) == 15 {
344 while pos < input.len() {
345 let byte = input[pos];
346 pos += 1;
347 match_len += byte as usize;
348 if byte != 255 {
349 break;
350 }
351 }
352 }
353
354 let match_start = output.len() - offset;
356 for i in 0..match_len {
357 let byte = output[match_start + (i % offset)];
358 output.push(byte);
359 }
360 }
361
362 Ok(())
363 }
364}
365
366const ZSTD_MAGIC: u32 = 0xFD2FB528;
372
373mod zstd {
375 use super::*;
376
377 pub fn compress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
379 if input.is_empty() {
380 return Ok(Vec::new());
381 }
382
383 let mut output = Vec::with_capacity(input.len() + 16);
384
385 output.extend_from_slice(&ZSTD_MAGIC.to_le_bytes());
387
388 output.push(0x20); output.push(input.len() as u8); if input.len() > 255 {
397 output[4] = 0xA0; output.pop();
399 output.extend_from_slice(&(input.len() as u32).to_le_bytes());
400 }
401
402 let compressed = compress_block_rle(input);
404
405 let (block_header, actual_data_len) = if compressed.len() >= input.len() {
410 (((input.len() as u32) << 3) | 0x00, input.len())
412 } else {
413 ((((compressed.len()) as u32) << 3) | 0x04, compressed.len())
415 };
416
417 output.push((block_header & 0xFF) as u8);
418 output.push(((block_header >> 8) & 0xFF) as u8);
419 output.push(((block_header >> 16) & 0xFF) as u8);
420
421 if compressed.len() >= input.len() {
422 output.extend_from_slice(input);
423 } else {
424 output.extend_from_slice(&compressed);
425 }
426
427 let flag_idx = output.len() - (actual_data_len + 3);
430 output[flag_idx] |= 0x01;
431
432 Ok(output)
433 }
434
435 fn compress_block_rle(input: &[u8]) -> Vec<u8> {
437 let mut output = Vec::with_capacity(input.len());
438 let mut pos = 0;
439
440 while pos < input.len() {
441 let byte = input[pos];
442 let mut run_len = 1;
443
444 while pos + run_len < input.len() && input[pos + run_len] == byte && run_len < 127 {
446 run_len += 1;
447 }
448
449 if run_len >= 4 {
450 output.push(0x80 | (run_len as u8));
452 output.push(byte);
453 pos += run_len;
454 } else {
455 let literal_start = pos;
457 let mut literal_len = 0;
458
459 while pos + literal_len < input.len() && literal_len < 127 {
460 let b = input[pos + literal_len];
461 let mut next_run = 1;
462 while pos + literal_len + next_run < input.len()
463 && input[pos + literal_len + next_run] == b
464 && next_run < 127
465 {
466 next_run += 1;
467 }
468
469 if next_run >= 4 {
470 break;
471 }
472 literal_len += 1;
473 }
474
475 if literal_len > 0 {
476 output.push(literal_len as u8);
477 output.extend_from_slice(&input[literal_start..literal_start + literal_len]);
478 pos += literal_len;
479 }
480 }
481 }
482
483 output
484 }
485
486 pub fn decompress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
488 if input.len() < 8 {
489 return Err(CompressionError::InputTooShort);
490 }
491
492 let magic = u32::from_le_bytes([input[0], input[1], input[2], input[3]]);
494 if magic != ZSTD_MAGIC {
495 return Err(CompressionError::InvalidMagic);
496 }
497
498 let fhd = input[4];
500 let content_size_flag = (fhd >> 6) & 0x03;
501
502 let (content_size, mut pos) = match content_size_flag {
503 0 => (input[5] as usize, 6),
504 1 => (u16::from_le_bytes([input[5], input[6]]) as usize + 256, 7),
505 2 => (
506 u32::from_le_bytes([input[5], input[6], input[7], input[8]]) as usize,
507 9,
508 ),
509 _ => return Err(CompressionError::UnsupportedContentSize),
510 };
511
512 let mut output = Vec::with_capacity(content_size);
513
514 while pos + 3 <= input.len() {
516 let block_header = u32::from_le_bytes([input[pos], input[pos + 1], input[pos + 2], 0]);
517 pos += 3;
518
519 let last_block = (block_header & 0x01) != 0;
520 let block_type = (block_header >> 1) & 0x03;
521 let block_size = (block_header >> 3) as usize;
522
523 if pos + block_size > input.len() {
524 return Err(CompressionError::TruncatedBlock);
525 }
526
527 match block_type {
528 0 => {
529 output.extend_from_slice(&input[pos..pos + block_size]);
531 }
532 1 => {
533 let byte = input[pos];
535 for _ in 0..block_size {
536 output.push(byte);
537 }
538 }
539 2 => {
540 decompress_block_rle(&input[pos..pos + block_size], &mut output)?;
542 }
543 _ => return Err(CompressionError::ReservedBlockType),
544 }
545
546 pos += block_size;
547
548 if last_block {
549 break;
550 }
551 }
552
553 Ok(output)
554 }
555
556 fn decompress_block_rle(input: &[u8], output: &mut Vec<u8>) -> Result<(), CompressionError> {
558 let mut pos = 0;
559
560 while pos < input.len() {
561 let control = input[pos];
562 pos += 1;
563
564 if (control & 0x80) != 0 {
565 let run_len = (control & 0x7F) as usize;
567 if pos >= input.len() {
568 return Err(CompressionError::TruncatedRle);
569 }
570 let byte = input[pos];
571 pos += 1;
572 for _ in 0..run_len {
573 output.push(byte);
574 }
575 } else {
576 let literal_len = control as usize;
578 if pos + literal_len > input.len() {
579 return Err(CompressionError::TruncatedLiterals);
580 }
581 output.extend_from_slice(&input[pos..pos + literal_len]);
582 pos += literal_len;
583 }
584 }
585
586 Ok(())
587 }
588}
589
590const BROTLI_WINDOW_BITS: u8 = 22;
596
597mod brotli {
599 use alloc::vec;
600
601 use super::*;
602
603 pub fn compress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
605 if input.is_empty() {
606 return Ok(vec![0x06]); }
608
609 let mut output = Vec::with_capacity(input.len() + 16);
610
611 output.push(BROTLI_WINDOW_BITS);
614
615 let compressed = compress_meta_block(input);
617
618 let is_last = true;
620 let _mnibbles = ((compressed.len() + 1) / 16) + 1;
621
622 let header_byte = if is_last { 0x01 } else { 0x00 };
624 output.push(header_byte);
625
626 output.extend_from_slice(&(input.len() as u32).to_le_bytes());
628
629 if compressed.len() >= input.len() {
631 output.push(0x80); output.extend_from_slice(input);
634 } else {
635 output.push(0x00); output.extend_from_slice(&compressed);
637 }
638
639 Ok(output)
640 }
641
642 fn compress_meta_block(input: &[u8]) -> Vec<u8> {
644 let mut output = Vec::with_capacity(input.len());
645 let mut pos = 0;
646
647 let mut hash_table = [0usize; 8192];
649
650 while pos < input.len() {
651 if pos + 4 <= input.len() {
653 let hash = hash4(&input[pos..]) & 0x1FFF;
654 let match_pos = hash_table[hash];
655 hash_table[hash] = pos;
656
657 if match_pos < pos && pos - match_pos < 65535 {
658 let match_len = find_match(&input[match_pos..], &input[pos..]);
659
660 if match_len >= 4 {
661 let distance = pos - match_pos;
662
663 if match_len < 16 && distance < 256 {
665 output.push(0xF0 | (match_len as u8 - 4));
666 output.push(distance as u8);
667 } else {
668 output.push(0xFF);
669 output.extend_from_slice(&(match_len as u16).to_le_bytes());
670 output.extend_from_slice(&(distance as u16).to_le_bytes());
671 }
672
673 pos += match_len;
674 continue;
675 }
676 }
677 }
678
679 let byte = input[pos];
681 if byte >= 0xF0 {
682 output.push(0xFE); }
684 output.push(byte);
685 pos += 1;
686 }
687
688 output
689 }
690
691 fn hash4(data: &[u8]) -> usize {
693 if data.len() < 4 {
694 return 0;
695 }
696 let val = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
697 (val.wrapping_mul(0x1E35A7BD) >> 19) as usize
698 }
699
700 fn find_match(match_data: &[u8], current: &[u8]) -> usize {
702 let max_len = core::cmp::min(core::cmp::min(match_data.len(), current.len()), 258);
703 let mut len = 0;
704
705 while len < max_len && match_data[len] == current[len] {
706 len += 1;
707 }
708
709 len
710 }
711
712 pub fn decompress(input: &[u8]) -> Result<Vec<u8>, CompressionError> {
714 if input.is_empty() {
715 return Err(CompressionError::EmptyInput);
716 }
717
718 if input.len() == 1 && input[0] == 0x06 {
720 return Ok(Vec::new());
721 }
722
723 if input.len() < 7 {
724 return Err(CompressionError::InputTooShort);
725 }
726
727 let _window_bits = input[0];
729 let _header_byte = input[1];
730
731 let original_len = u32::from_le_bytes([input[2], input[3], input[4], input[5]]) as usize;
733
734 let compressed_flag = input[6];
735 let data_start = 7;
736
737 let mut output = Vec::with_capacity(original_len);
738
739 if compressed_flag == 0x80 {
740 output.extend_from_slice(&input[data_start..]);
742 } else {
743 decompress_meta_block(&input[data_start..], &mut output)?;
745 }
746
747 Ok(output)
748 }
749
750 fn decompress_meta_block(input: &[u8], output: &mut Vec<u8>) -> Result<(), CompressionError> {
752 let mut pos = 0;
753
754 while pos < input.len() {
755 let byte = input[pos];
756 pos += 1;
757
758 if byte == 0xFE {
759 if pos >= input.len() {
761 return Err(CompressionError::TruncatedEscape);
762 }
763 output.push(input[pos]);
764 pos += 1;
765 } else if byte == 0xFF {
766 if pos + 4 > input.len() {
768 return Err(CompressionError::TruncatedMatch);
769 }
770 let match_len = u16::from_le_bytes([input[pos], input[pos + 1]]) as usize;
771 let distance = u16::from_le_bytes([input[pos + 2], input[pos + 3]]) as usize;
772 pos += 4;
773
774 if distance > output.len() {
775 return Err(CompressionError::InvalidDistance);
776 }
777
778 let match_start = output.len() - distance;
779 for i in 0..match_len {
780 let b = output[match_start + (i % distance)];
781 output.push(b);
782 }
783 } else if byte >= 0xF0 {
784 let match_len = (byte & 0x0F) as usize + 4;
786 if pos >= input.len() {
787 return Err(CompressionError::TruncatedMatch);
788 }
789 let distance = input[pos] as usize;
790 pos += 1;
791
792 if distance > output.len() {
793 return Err(CompressionError::InvalidDistance);
794 }
795
796 let match_start = output.len() - distance;
797 for i in 0..match_len {
798 let b = output[match_start + (i % distance)];
799 output.push(b);
800 }
801 } else {
802 output.push(byte);
804 }
805 }
806
807 Ok(())
808 }
809}
810
811pub fn decompress(data: &[u8], compression: Compression) -> Result<Vec<u8>, CompressionError> {
817 match compression {
818 Compression::None => Ok(data.to_vec()),
819 Compression::Zstd => zstd::decompress(data),
820 Compression::Lz4 => lz4::decompress(data),
821 Compression::Brotli => brotli::decompress(data),
822 }
823}
824
825pub fn compress(data: &[u8], compression: Compression) -> Result<Vec<u8>, CompressionError> {
827 match compression {
828 Compression::None => Ok(data.to_vec()),
829 Compression::Zstd => zstd::compress(data),
830 Compression::Lz4 => lz4::compress(data),
831 Compression::Brotli => brotli::compress(data),
832 }
833}