⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/desktop/
pdf.rs

1//! PDF Renderer
2//!
3//! Minimal PDF 1.4 parser and renderer. Parses the cross-reference table,
4//! trailer, and page tree to extract page content streams. Renders text
5//! (using the kernel's 8x16 bitmap font) and filled rectangles to a pixel
6//! buffer.
7//!
8//! All coordinate math is integer-only.
9
10#![allow(dead_code)]
11
12use alloc::{collections::BTreeMap, string::String, vec::Vec};
13
14// ---------------------------------------------------------------------------
15// PDF objects
16// ---------------------------------------------------------------------------
17
18/// Represents a PDF object value.
19#[derive(Debug, Clone)]
20pub enum PdfObject {
21    /// The null object.
22    Null,
23    /// Boolean value.
24    Bool(bool),
25    /// Integer value.
26    Integer(i64),
27    /// PDF name (e.g. `/Type`).
28    Name(String),
29    /// Literal string (parenthesised).
30    StringLiteral(Vec<u8>),
31    /// Array of objects.
32    Array(Vec<PdfObject>),
33    /// Dictionary of name-object pairs.
34    Dictionary(BTreeMap<String, PdfObject>),
35    /// Stream: dictionary + raw bytes.
36    Stream(BTreeMap<String, PdfObject>, Vec<u8>),
37    /// Indirect reference: object number, generation.
38    Reference(u32, u16),
39}
40
41impl PdfObject {
42    /// Try to extract as integer.
43    pub fn as_integer(&self) -> Option<i64> {
44        match self {
45            PdfObject::Integer(v) => Some(*v),
46            _ => None,
47        }
48    }
49
50    /// Try to extract as name string.
51    pub fn as_name(&self) -> Option<&str> {
52        match self {
53            PdfObject::Name(s) => Some(s.as_str()),
54            _ => None,
55        }
56    }
57
58    /// Try to extract as dictionary.
59    pub fn as_dict(&self) -> Option<&BTreeMap<String, PdfObject>> {
60        match self {
61            PdfObject::Dictionary(d) => Some(d),
62            PdfObject::Stream(d, _) => Some(d),
63            _ => None,
64        }
65    }
66
67    /// Try to extract as array.
68    pub fn as_array(&self) -> Option<&Vec<PdfObject>> {
69        match self {
70            PdfObject::Array(a) => Some(a),
71            _ => None,
72        }
73    }
74
75    /// Try to extract stream data.
76    pub fn as_stream_data(&self) -> Option<&[u8]> {
77        match self {
78            PdfObject::Stream(_, data) => Some(data),
79            _ => None,
80        }
81    }
82}
83
84// ---------------------------------------------------------------------------
85// Cross-reference table
86// ---------------------------------------------------------------------------
87
88/// A single xref entry.
89#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
90pub struct XrefEntry {
91    /// Byte offset of the object in the file.
92    pub offset: u64,
93    /// Generation number.
94    pub generation: u16,
95    /// Whether this entry is in use (vs free).
96    pub in_use: bool,
97}
98
99// ---------------------------------------------------------------------------
100// PDF parser
101// ---------------------------------------------------------------------------
102
103/// PDF file parser.
104#[derive(Debug)]
105pub struct PdfParser {
106    /// Raw file data.
107    data: Vec<u8>,
108    /// Parsed xref table.
109    xref: Vec<XrefEntry>,
110    /// Trailer dictionary.
111    trailer: BTreeMap<String, PdfObject>,
112    /// Cached parsed objects.
113    objects: BTreeMap<u32, PdfObject>,
114}
115
116impl PdfParser {
117    /// Create a parser from raw PDF file bytes.
118    pub fn new(data: Vec<u8>) -> Self {
119        Self {
120            data,
121            xref: Vec::new(),
122            trailer: BTreeMap::new(),
123            objects: BTreeMap::new(),
124        }
125    }
126
127    /// Parse the PDF header and verify the signature.
128    pub fn parse_header(&self) -> bool {
129        self.data.len() >= 5 && &self.data[0..5] == b"%PDF-"
130    }
131
132    /// Parse the cross-reference table.
133    ///
134    /// Looks for `startxref` near the end of the file, then parses the xref
135    /// section.
136    pub fn parse_xref_table(&mut self) -> bool {
137        // Find "startxref" near end of file
138        let search_start = if self.data.len() > 1024 {
139            self.data.len() - 1024
140        } else {
141            0
142        };
143
144        let startxref_pos = self.find_bytes(b"startxref", search_start);
145        if startxref_pos.is_none() {
146            return false;
147        }
148
149        let pos = startxref_pos.unwrap();
150        // Parse the offset after "startxref\n"
151        let offset_str = self.read_line(pos + 9);
152        let xref_offset = self.parse_u64(&offset_str);
153
154        if xref_offset == 0 || xref_offset as usize >= self.data.len() {
155            return false;
156        }
157
158        // Parse xref section at offset
159        let mut cursor = xref_offset as usize;
160
161        // Skip "xref\n"
162        if cursor + 4 <= self.data.len() && &self.data[cursor..cursor + 4] == b"xref" {
163            cursor += 4;
164            cursor = self.skip_whitespace(cursor);
165        } else {
166            return false;
167        }
168
169        // Parse subsections: "start_obj count\n"
170        while cursor < self.data.len() {
171            let line = self.read_line(cursor);
172            if line.starts_with("trailer") {
173                break;
174            }
175
176            let parts: Vec<&str> = line.split_whitespace().collect();
177            if parts.len() < 2 {
178                cursor += line.len() + 1;
179                continue;
180            }
181
182            let start_obj = self.parse_u64(parts[0]) as u32;
183            let count = self.parse_u64(parts[1]) as u32;
184            cursor += line.len() + 1;
185
186            // Pre-size xref table
187            let needed = (start_obj + count) as usize;
188            while self.xref.len() < needed {
189                self.xref.push(XrefEntry::default());
190            }
191
192            for i in 0..count {
193                if cursor + 20 > self.data.len() {
194                    break;
195                }
196                let entry_line = self.read_line(cursor);
197                let entry_parts: Vec<&str> = entry_line.split_whitespace().collect();
198                if entry_parts.len() >= 3 {
199                    let offset = self.parse_u64(entry_parts[0]);
200                    let gen = self.parse_u64(entry_parts[1]) as u16;
201                    let in_use = entry_parts[2] == "n";
202                    let idx = (start_obj + i) as usize;
203                    if idx < self.xref.len() {
204                        self.xref[idx] = XrefEntry {
205                            offset,
206                            generation: gen,
207                            in_use,
208                        };
209                    }
210                }
211                cursor += entry_line.len() + 1;
212            }
213        }
214
215        true
216    }
217
218    /// Parse a single object from the data at the given offset.
219    pub fn parse_object(&mut self, obj_num: u32) -> Option<PdfObject> {
220        if let Some(cached) = self.objects.get(&obj_num) {
221            return Some(cached.clone());
222        }
223
224        let idx = obj_num as usize;
225        if idx >= self.xref.len() || !self.xref[idx].in_use {
226            return None;
227        }
228
229        let offset = self.xref[idx].offset as usize;
230        if offset >= self.data.len() {
231            return None;
232        }
233
234        // Skip "N G obj\n"
235        let line = self.read_line(offset);
236        let cursor = offset + line.len() + 1;
237
238        let obj = self.parse_value(cursor).map(|(v, _)| v);
239        if let Some(ref o) = obj {
240            self.objects.insert(obj_num, o.clone());
241        }
242        obj
243    }
244
245    /// Parse a PDF value at the given cursor position.
246    /// Returns (value, new_cursor).
247    fn parse_value(&self, mut pos: usize) -> Option<(PdfObject, usize)> {
248        pos = self.skip_whitespace(pos);
249        if pos >= self.data.len() {
250            return None;
251        }
252
253        let b = self.data[pos];
254
255        match b {
256            // Dictionary or Name
257            b'/' => {
258                let (name, end) = self.parse_name(pos);
259                Some((PdfObject::Name(name), end))
260            }
261            b'<' => {
262                if pos + 1 < self.data.len() && self.data[pos + 1] == b'<' {
263                    // Dictionary
264                    let (dict, end) = self.parse_dictionary(pos);
265                    Some((PdfObject::Dictionary(dict), end))
266                } else {
267                    // Hex string
268                    let (bytes, end) = self.parse_hex_string(pos);
269                    Some((PdfObject::StringLiteral(bytes), end))
270                }
271            }
272            b'(' => {
273                let (bytes, end) = self.parse_literal_string(pos);
274                Some((PdfObject::StringLiteral(bytes), end))
275            }
276            b'[' => {
277                let (arr, end) = self.parse_array(pos);
278                Some((PdfObject::Array(arr), end))
279            }
280            b't' => {
281                // true
282                Some((PdfObject::Bool(true), pos + 4))
283            }
284            b'f' => {
285                // false
286                Some((PdfObject::Bool(false), pos + 5))
287            }
288            b'n' => {
289                // null
290                Some((PdfObject::Null, pos + 4))
291            }
292            b'0'..=b'9' | b'-' | b'+' => {
293                let (num, end) = self.parse_number(pos);
294                // Check if this is an indirect reference (N G R)
295                let after = self.skip_whitespace(end);
296                if after < self.data.len() && self.data[after].is_ascii_digit() {
297                    let (gen, end2) = self.parse_number(after);
298                    let after2 = self.skip_whitespace(end2);
299                    if after2 < self.data.len() && self.data[after2] == b'R' {
300                        return Some((PdfObject::Reference(num as u32, gen as u16), after2 + 1));
301                    }
302                }
303                Some((PdfObject::Integer(num), end))
304            }
305            _ => None,
306        }
307    }
308
309    // -- helper parsers --
310
311    fn parse_name(&self, pos: usize) -> (String, usize) {
312        // pos is at '/'
313        let mut end = pos + 1;
314        while end < self.data.len() {
315            let c = self.data[end];
316            if c.is_ascii_whitespace()
317                || c == b'/'
318                || c == b'<'
319                || c == b'>'
320                || c == b'['
321                || c == b']'
322                || c == b'('
323                || c == b')'
324            {
325                break;
326            }
327            end += 1;
328        }
329        let name = String::from_utf8_lossy(&self.data[pos + 1..end]).into_owned();
330        (name, end)
331    }
332
333    fn parse_dictionary(&self, pos: usize) -> (BTreeMap<String, PdfObject>, usize) {
334        let mut dict = BTreeMap::new();
335        let mut cursor = pos + 2; // skip "<<"
336
337        loop {
338            cursor = self.skip_whitespace(cursor);
339            if cursor + 1 >= self.data.len() {
340                break;
341            }
342            if self.data[cursor] == b'>' && self.data[cursor + 1] == b'>' {
343                cursor += 2;
344                break;
345            }
346            if self.data[cursor] != b'/' {
347                cursor += 1;
348                continue;
349            }
350            let (key, end) = self.parse_name(cursor);
351            if let Some((val, end2)) = self.parse_value(end) {
352                dict.insert(key, val);
353                cursor = end2;
354            } else {
355                cursor = end;
356            }
357        }
358
359        (dict, cursor)
360    }
361
362    fn parse_array(&self, pos: usize) -> (Vec<PdfObject>, usize) {
363        let mut arr = Vec::new();
364        let mut cursor = pos + 1; // skip '['
365
366        loop {
367            cursor = self.skip_whitespace(cursor);
368            if cursor >= self.data.len() || self.data[cursor] == b']' {
369                cursor += 1;
370                break;
371            }
372            if let Some((val, end)) = self.parse_value(cursor) {
373                arr.push(val);
374                cursor = end;
375            } else {
376                cursor += 1;
377            }
378        }
379
380        (arr, cursor)
381    }
382
383    fn parse_literal_string(&self, pos: usize) -> (Vec<u8>, usize) {
384        let mut result = Vec::new();
385        let mut cursor = pos + 1; // skip '('
386        let mut depth = 1u32;
387
388        while cursor < self.data.len() && depth > 0 {
389            match self.data[cursor] {
390                b'(' => {
391                    depth += 1;
392                    result.push(b'(');
393                }
394                b')' => {
395                    depth -= 1;
396                    if depth > 0 {
397                        result.push(b')');
398                    }
399                }
400                b'\\' => {
401                    cursor += 1;
402                    if cursor < self.data.len() {
403                        match self.data[cursor] {
404                            b'n' => result.push(b'\n'),
405                            b'r' => result.push(b'\r'),
406                            b't' => result.push(b'\t'),
407                            other => result.push(other),
408                        }
409                    }
410                }
411                other => result.push(other),
412            }
413            cursor += 1;
414        }
415
416        (result, cursor)
417    }
418
419    fn parse_hex_string(&self, pos: usize) -> (Vec<u8>, usize) {
420        let mut result = Vec::new();
421        let mut cursor = pos + 1; // skip '<'
422
423        let mut high: Option<u8> = None;
424        while cursor < self.data.len() && self.data[cursor] != b'>' {
425            let c = self.data[cursor];
426            if let Some(nibble) = hex_nibble(c) {
427                if let Some(h) = high {
428                    result.push((h << 4) | nibble);
429                    high = None;
430                } else {
431                    high = Some(nibble);
432                }
433            }
434            cursor += 1;
435        }
436        if let Some(h) = high {
437            result.push(h << 4);
438        }
439        if cursor < self.data.len() {
440            cursor += 1; // skip '>'
441        }
442
443        (result, cursor)
444    }
445
446    fn parse_number(&self, pos: usize) -> (i64, usize) {
447        let mut end = pos;
448        if end < self.data.len() && (self.data[end] == b'-' || self.data[end] == b'+') {
449            end += 1;
450        }
451        while end < self.data.len() && self.data[end].is_ascii_digit() {
452            end += 1;
453        }
454        // Skip decimal point and fractional digits (we truncate to integer)
455        if end < self.data.len() && self.data[end] == b'.' {
456            end += 1;
457            while end < self.data.len() && self.data[end].is_ascii_digit() {
458                end += 1;
459            }
460        }
461        let s = String::from_utf8_lossy(&self.data[pos..end]);
462        // Parse integer part only
463        let int_str: String = s.chars().take_while(|c| *c != '.').collect();
464        let val = self.parse_i64(&int_str);
465        (val, end)
466    }
467
468    fn skip_whitespace(&self, mut pos: usize) -> usize {
469        while pos < self.data.len() {
470            match self.data[pos] {
471                b' ' | b'\t' | b'\r' | b'\n' | 0 => pos += 1,
472                b'%' => {
473                    // Skip comment to end of line
474                    while pos < self.data.len() && self.data[pos] != b'\n' {
475                        pos += 1;
476                    }
477                }
478                _ => break,
479            }
480        }
481        pos
482    }
483
484    fn read_line(&self, pos: usize) -> String {
485        let mut end = pos;
486        while end < self.data.len() && self.data[end] != b'\n' && self.data[end] != b'\r' {
487            end += 1;
488        }
489        String::from_utf8_lossy(&self.data[pos..end]).into_owned()
490    }
491
492    fn find_bytes(&self, needle: &[u8], start: usize) -> Option<usize> {
493        if needle.is_empty() || self.data.len() < needle.len() {
494            return None;
495        }
496        let end = self.data.len() - needle.len();
497        for i in start..=end {
498            if &self.data[i..i + needle.len()] == needle {
499                return Some(i);
500            }
501        }
502        None
503    }
504
505    fn parse_u64(&self, s: &str) -> u64 {
506        let s = s.trim();
507        let mut val: u64 = 0;
508        for c in s.bytes() {
509            if c.is_ascii_digit() {
510                val = val.saturating_mul(10).saturating_add((c - b'0') as u64);
511            }
512        }
513        val
514    }
515
516    fn parse_i64(&self, s: &str) -> i64 {
517        let s = s.trim();
518        let (neg, digits) = if let Some(rest) = s.strip_prefix('-') {
519            (true, rest)
520        } else if let Some(rest) = s.strip_prefix('+') {
521            (false, rest)
522        } else {
523            (false, s)
524        };
525        let mut val: i64 = 0;
526        for c in digits.bytes() {
527            if c.is_ascii_digit() {
528                val = val.saturating_mul(10).saturating_add((c - b'0') as i64);
529            }
530        }
531        if neg {
532            -val
533        } else {
534            val
535        }
536    }
537
538    /// Get the number of xref entries.
539    pub fn xref_count(&self) -> usize {
540        self.xref.len()
541    }
542}
543
544/// Convert a hex character to its nibble value.
545fn hex_nibble(c: u8) -> Option<u8> {
546    match c {
547        b'0'..=b'9' => Some(c - b'0'),
548        b'a'..=b'f' => Some(c - b'a' + 10),
549        b'A'..=b'F' => Some(c - b'A' + 10),
550        _ => None,
551    }
552}
553
554// ---------------------------------------------------------------------------
555// PDF page
556// ---------------------------------------------------------------------------
557
558/// A single PDF page with its content and media box.
559#[derive(Debug, Clone)]
560pub struct PdfPage {
561    /// Media box: x, y, width, height (integer coordinates).
562    pub media_box_x: i32,
563    pub media_box_y: i32,
564    pub media_box_width: i32,
565    pub media_box_height: i32,
566    /// Raw content stream bytes.
567    pub content_stream: Vec<u8>,
568    /// Resource dictionary.
569    pub resources: BTreeMap<String, PdfObject>,
570}
571
572impl Default for PdfPage {
573    fn default() -> Self {
574        Self {
575            media_box_x: 0,
576            media_box_y: 0,
577            media_box_width: 612, // US Letter width in points
578            media_box_height: 792,
579            content_stream: Vec::new(),
580            resources: BTreeMap::new(),
581        }
582    }
583}
584
585// ---------------------------------------------------------------------------
586// Content stream operations
587// ---------------------------------------------------------------------------
588
589/// A parsed content stream operation.
590#[derive(Debug, Clone)]
591pub enum ContentStreamOp {
592    /// Begin text object.
593    BT,
594    /// End text object.
595    ET,
596    /// Set text matrix: a, b, c, d, e, f (integer, 1000ths).
597    Tm(i32, i32, i32, i32, i32, i32),
598    /// Show text string.
599    Tj(Vec<u8>),
600    /// Draw rectangle: x, y, w, h (integer points).
601    Re(i32, i32, i32, i32),
602    /// Fill path.
603    F,
604    /// Set non-stroking colour (RGB, 0-1000 each — milli-units).
605    Rg(i32, i32, i32),
606    /// Concatenate matrix.
607    Cm(i32, i32, i32, i32, i32, i32),
608    /// Move to next line and set leading.
609    Td(i32, i32),
610    /// Set font and size.
611    Tf(String, i32),
612}
613
614// ---------------------------------------------------------------------------
615// Content stream parser (minimal)
616// ---------------------------------------------------------------------------
617
618/// Parse a content stream into operations.
619pub fn parse_content_stream(data: &[u8]) -> Vec<ContentStreamOp> {
620    let mut ops = Vec::new();
621    let text = String::from_utf8_lossy(data);
622    let mut operands: Vec<String> = Vec::new();
623
624    for token in text.split_whitespace() {
625        match token {
626            "BT" => ops.push(ContentStreamOp::BT),
627            "ET" => ops.push(ContentStreamOp::ET),
628            "f" | "F" => ops.push(ContentStreamOp::F),
629            "Tm" => {
630                if operands.len() >= 6 {
631                    let vals: Vec<i32> = operands
632                        .iter()
633                        .rev()
634                        .take(6)
635                        .rev()
636                        .map(|s| parse_content_int(s))
637                        .collect();
638                    ops.push(ContentStreamOp::Tm(
639                        vals[0], vals[1], vals[2], vals[3], vals[4], vals[5],
640                    ));
641                }
642                operands.clear();
643            }
644            "Td" | "TD" => {
645                if operands.len() >= 2 {
646                    let n = operands.len();
647                    let tx = parse_content_int(&operands[n - 2]);
648                    let ty = parse_content_int(&operands[n - 1]);
649                    ops.push(ContentStreamOp::Td(tx, ty));
650                }
651                operands.clear();
652            }
653            "Tj" => {
654                // Text from last parenthesised string
655                let joined: String = operands.join(" ");
656                if let Some(start) = joined.find('(') {
657                    if let Some(end) = joined.rfind(')') {
658                        let text_bytes = joined.as_bytes()[start + 1..end].to_vec();
659                        ops.push(ContentStreamOp::Tj(text_bytes));
660                    }
661                }
662                operands.clear();
663            }
664            "re" => {
665                if operands.len() >= 4 {
666                    let n = operands.len();
667                    ops.push(ContentStreamOp::Re(
668                        parse_content_int(&operands[n - 4]),
669                        parse_content_int(&operands[n - 3]),
670                        parse_content_int(&operands[n - 2]),
671                        parse_content_int(&operands[n - 1]),
672                    ));
673                }
674                operands.clear();
675            }
676            "rg" => {
677                if operands.len() >= 3 {
678                    let n = operands.len();
679                    ops.push(ContentStreamOp::Rg(
680                        parse_content_milli(&operands[n - 3]),
681                        parse_content_milli(&operands[n - 2]),
682                        parse_content_milli(&operands[n - 1]),
683                    ));
684                }
685                operands.clear();
686            }
687            "cm" => {
688                if operands.len() >= 6 {
689                    let vals: Vec<i32> = operands
690                        .iter()
691                        .rev()
692                        .take(6)
693                        .rev()
694                        .map(|s| parse_content_int(s))
695                        .collect();
696                    ops.push(ContentStreamOp::Cm(
697                        vals[0], vals[1], vals[2], vals[3], vals[4], vals[5],
698                    ));
699                }
700                operands.clear();
701            }
702            "Tf" => {
703                if operands.len() >= 2 {
704                    let n = operands.len();
705                    let font = String::from(operands[n - 2].trim_start_matches('/'));
706                    let size = parse_content_int(&operands[n - 1]);
707                    ops.push(ContentStreamOp::Tf(font, size));
708                }
709                operands.clear();
710            }
711            other => {
712                operands.push(String::from(other));
713            }
714        }
715    }
716
717    ops
718}
719
720/// Parse a content stream number to integer (truncates decimal).
721fn parse_content_int(s: &str) -> i32 {
722    let s = s.trim();
723    let (neg, digits) = if let Some(rest) = s.strip_prefix('-') {
724        (true, rest)
725    } else {
726        (false, s)
727    };
728    let mut val: i32 = 0;
729    for c in digits.bytes() {
730        if c == b'.' {
731            break;
732        }
733        if c.is_ascii_digit() {
734            val = val.saturating_mul(10).saturating_add((c - b'0') as i32);
735        }
736    }
737    if neg {
738        -val
739    } else {
740        val
741    }
742}
743
744/// Parse a content stream number to milli-units (0.0-1.0 -> 0-1000).
745fn parse_content_milli(s: &str) -> i32 {
746    let s = s.trim();
747    let (neg, digits) = if let Some(rest) = s.strip_prefix('-') {
748        (true, rest)
749    } else {
750        (false, s)
751    };
752
753    let mut integer_part: i32 = 0;
754    let mut frac_part: i32 = 0;
755    let mut frac_divisor: i32 = 1;
756    let mut in_frac = false;
757
758    for c in digits.bytes() {
759        if c == b'.' {
760            in_frac = true;
761            continue;
762        }
763        if c.is_ascii_digit() {
764            if in_frac {
765                frac_part = frac_part
766                    .saturating_mul(10)
767                    .saturating_add((c - b'0') as i32);
768                frac_divisor = frac_divisor.saturating_mul(10);
769            } else {
770                integer_part = integer_part
771                    .saturating_mul(10)
772                    .saturating_add((c - b'0') as i32);
773            }
774        }
775    }
776
777    let milli = integer_part * 1000 + (frac_part * 1000) / frac_divisor.max(1);
778    if neg {
779        -milli
780    } else {
781        milli
782    }
783}
784
785// ---------------------------------------------------------------------------
786// PDF renderer
787// ---------------------------------------------------------------------------
788
789/// Renders PDF content stream operations to a pixel buffer.
790pub struct PdfRenderer {
791    /// Output buffer width.
792    width: u32,
793    /// Output buffer height.
794    height: u32,
795    /// Current text position X (points).
796    text_x: i32,
797    /// Current text position Y (points).
798    text_y: i32,
799    /// Current fill colour (ARGB8888).
800    fill_color: u32,
801    /// Current font size (points).
802    font_size: i32,
803    /// Scale factor from PDF points to pixels (256 = 1.0).
804    scale: i32,
805}
806
807impl PdfRenderer {
808    /// Create a renderer targeting a buffer of `width x height` pixels.
809    pub fn new(width: u32, height: u32) -> Self {
810        // Default scale: assume 72 DPI PDF, target ~96 DPI screen
811        // scale = width * 256 / 612 (US Letter width in points)
812        let scale = if width > 0 {
813            (width as i32 * 256) / 612
814        } else {
815            256
816        };
817
818        Self {
819            width,
820            height,
821            text_x: 0,
822            text_y: 0,
823            fill_color: 0xFF000000,
824            font_size: 12,
825            scale,
826        }
827    }
828
829    /// Scale a PDF-point coordinate to pixel coordinate.
830    fn to_px(&self, pt: i32) -> i32 {
831        (pt * self.scale) / 256
832    }
833
834    /// Render a page's content stream to a pixel buffer.
835    ///
836    /// `buf` must be `width * height` u32 values (ARGB8888).
837    pub fn render_page(&mut self, page: &PdfPage, buf: &mut [u32]) {
838        // Clear to white
839        for px in buf.iter_mut() {
840            *px = 0xFFFFFFFF;
841        }
842
843        let ops = parse_content_stream(&page.content_stream);
844        self.text_x = 0;
845        self.text_y = 0;
846        self.fill_color = 0xFF000000;
847
848        for op in &ops {
849            match op {
850                ContentStreamOp::BT => {
851                    self.text_x = 0;
852                    self.text_y = 0;
853                }
854                ContentStreamOp::ET => {}
855                ContentStreamOp::Tm(_, _, _, _, e, f) => {
856                    self.text_x = *e;
857                    // PDF Y is bottom-up; convert to top-down
858                    self.text_y = page.media_box_height - *f;
859                }
860                ContentStreamOp::Td(tx, ty) => {
861                    self.text_x += *tx;
862                    self.text_y -= *ty; // PDF Y is bottom-up
863                }
864                ContentStreamOp::Tj(text) => {
865                    self.render_text(text, buf);
866                }
867                ContentStreamOp::Re(x, y, w, h) => {
868                    let px = self.to_px(*x);
869                    let py = self.to_px(page.media_box_height - *y - *h);
870                    let pw = self.to_px(*w);
871                    let ph = self.to_px(*h);
872                    self.fill_rect(buf, px, py, pw, ph);
873                }
874                ContentStreamOp::F => {
875                    // Fill is applied by Re already in this simple renderer
876                }
877                ContentStreamOp::Rg(r, g, b) => {
878                    let rc = ((*r * 255) / 1000).clamp(0, 255) as u32;
879                    let gc = ((*g * 255) / 1000).clamp(0, 255) as u32;
880                    let bc = ((*b * 255) / 1000).clamp(0, 255) as u32;
881                    self.fill_color = 0xFF000000 | (rc << 16) | (gc << 8) | bc;
882                }
883                ContentStreamOp::Tf(_, size) => {
884                    self.font_size = *size;
885                }
886                ContentStreamOp::Cm(_, _, _, _, _, _) => {
887                    // Matrix concatenation — not implemented in simple renderer
888                }
889            }
890        }
891    }
892
893    /// Render text at the current text position using the 8x16 bitmap font.
894    fn render_text(&mut self, text: &[u8], buf: &mut [u32]) {
895        let px = self.to_px(self.text_x);
896        let py = self.to_px(self.text_y);
897        let color = self.fill_color;
898        let bw = self.width as i32;
899        let bh = self.height as i32;
900        let char_w = 8i32;
901        let char_h = 16i32;
902
903        for (i, &ch) in text.iter().enumerate() {
904            let cx = px + (i as i32) * char_w;
905            if cx + char_w <= 0 || cx >= bw {
906                continue;
907            }
908            if py + char_h <= 0 || py >= bh {
909                continue;
910            }
911
912            // Simple placeholder glyph rendering: filled rect for printable chars
913            if (0x20..0x7F).contains(&ch) {
914                for row in 0..char_h {
915                    let dy = py + row;
916                    if dy < 0 || dy >= bh {
917                        continue;
918                    }
919                    for col in 0..char_w {
920                        let dx = cx + col;
921                        if dx < 0 || dx >= bw {
922                            continue;
923                        }
924                        // Simple bitmap: draw character outline
925                        if row == 0 || row == char_h - 1 || col == 0 || col == char_w - 1 {
926                            buf[(dy * bw + dx) as usize] = color;
927                        }
928                    }
929                }
930            }
931        }
932
933        // Advance text position
934        self.text_x += (text.len() as i32) * 8;
935    }
936
937    /// Fill a rectangle in the buffer.
938    fn fill_rect(&self, buf: &mut [u32], x: i32, y: i32, w: i32, h: i32) {
939        let bw = self.width as i32;
940        let bh = self.height as i32;
941
942        for row in 0..h {
943            let dy = y + row;
944            if dy < 0 || dy >= bh {
945                continue;
946            }
947            for col in 0..w {
948                let dx = x + col;
949                if dx < 0 || dx >= bw {
950                    continue;
951                }
952                buf[(dy * bw + dx) as usize] = self.fill_color;
953            }
954        }
955    }
956}
957
958// ---------------------------------------------------------------------------
959// PDF document
960// ---------------------------------------------------------------------------
961
962/// A parsed PDF document with pages.
963#[derive(Debug)]
964pub struct PdfDocument {
965    /// Parser instance.
966    parser: PdfParser,
967    /// Extracted pages.
968    pub pages: Vec<PdfPage>,
969}
970
971impl PdfDocument {
972    /// Open a PDF document from raw bytes.
973    pub fn open(data: Vec<u8>) -> Option<Self> {
974        let mut parser = PdfParser::new(data);
975
976        if !parser.parse_header() {
977            return None;
978        }
979
980        parser.parse_xref_table();
981
982        Some(Self {
983            parser,
984            pages: Vec::new(),
985        })
986    }
987
988    /// Add a page manually (useful for constructing test documents).
989    pub fn add_page(&mut self, page: PdfPage) {
990        self.pages.push(page);
991    }
992
993    /// Get a page by index.
994    pub fn get_page(&self, index: usize) -> Option<&PdfPage> {
995        self.pages.get(index)
996    }
997
998    /// Number of pages.
999    pub fn page_count(&self) -> usize {
1000        self.pages.len()
1001    }
1002
1003    /// Number of xref entries in the file.
1004    pub fn xref_count(&self) -> usize {
1005        self.parser.xref_count()
1006    }
1007}
1008
1009// ---------------------------------------------------------------------------
1010// Tests
1011// ---------------------------------------------------------------------------
1012
1013#[cfg(test)]
1014mod tests {
1015    #[allow(unused_imports)]
1016    use alloc::vec;
1017
1018    use super::*;
1019
1020    #[test]
1021    fn test_pdf_header_valid() {
1022        let data = b"%PDF-1.4\n".to_vec();
1023        let parser = PdfParser::new(data);
1024        assert!(parser.parse_header());
1025    }
1026
1027    #[test]
1028    fn test_pdf_header_invalid() {
1029        let data = b"not a pdf".to_vec();
1030        let parser = PdfParser::new(data);
1031        assert!(!parser.parse_header());
1032    }
1033
1034    #[test]
1035    fn test_pdf_object_accessors() {
1036        let obj = PdfObject::Integer(42);
1037        assert_eq!(obj.as_integer(), Some(42));
1038        assert!(obj.as_name().is_none());
1039
1040        let obj = PdfObject::Name(String::from("Type"));
1041        assert_eq!(obj.as_name(), Some("Type"));
1042    }
1043
1044    #[test]
1045    fn test_xref_entry_default() {
1046        let entry = XrefEntry::default();
1047        assert_eq!(entry.offset, 0);
1048        assert!(!entry.in_use);
1049    }
1050
1051    #[test]
1052    fn test_content_stream_parse() {
1053        let data = b"BT /F1 12 Tf 100 700 Td (Hello World) Tj ET";
1054        let ops = parse_content_stream(data);
1055        assert!(!ops.is_empty());
1056        // Should contain BT, Tf, Td, Tj, ET
1057        let mut has_bt = false;
1058        let mut has_et = false;
1059        for op in &ops {
1060            match op {
1061                ContentStreamOp::BT => has_bt = true,
1062                ContentStreamOp::ET => has_et = true,
1063                _ => {}
1064            }
1065        }
1066        assert!(has_bt);
1067        assert!(has_et);
1068    }
1069
1070    #[test]
1071    fn test_content_stream_rect() {
1072        let data = b"100 200 50 30 re f";
1073        let ops = parse_content_stream(data);
1074        let mut found_re = false;
1075        for op in &ops {
1076            if let ContentStreamOp::Re(x, y, w, h) = op {
1077                assert_eq!(*x, 100);
1078                assert_eq!(*y, 200);
1079                assert_eq!(*w, 50);
1080                assert_eq!(*h, 30);
1081                found_re = true;
1082            }
1083        }
1084        assert!(found_re);
1085    }
1086
1087    #[test]
1088    fn test_content_stream_color() {
1089        let data = b"1 0 0 rg";
1090        let ops = parse_content_stream(data);
1091        let mut found = false;
1092        for op in &ops {
1093            if let ContentStreamOp::Rg(r, g, b) = op {
1094                assert_eq!(*r, 1000);
1095                assert_eq!(*g, 0);
1096                assert_eq!(*b, 0);
1097                found = true;
1098            }
1099        }
1100        assert!(found);
1101    }
1102
1103    #[test]
1104    fn test_parse_content_milli() {
1105        assert_eq!(parse_content_milli("0.5"), 500);
1106        assert_eq!(parse_content_milli("1"), 1000);
1107        assert_eq!(parse_content_milli("0"), 0);
1108        assert_eq!(parse_content_milli("0.25"), 250);
1109    }
1110
1111    #[test]
1112    fn test_pdf_renderer_render_page() {
1113        let mut renderer = PdfRenderer::new(100, 100);
1114        let page = PdfPage {
1115            content_stream: b"BT 10 780 Td (Test) Tj ET".to_vec(),
1116            ..PdfPage::default()
1117        };
1118        let mut buf = vec![0u32; 100 * 100];
1119        renderer.render_page(&page, &mut buf);
1120        // Should have modified at least some pixels (white background)
1121        assert!(buf.iter().any(|&p| p == 0xFFFFFFFF));
1122    }
1123
1124    #[test]
1125    fn test_pdf_document_open() {
1126        let data = b"%PDF-1.4\nsome content".to_vec();
1127        let doc = PdfDocument::open(data);
1128        assert!(doc.is_some());
1129    }
1130}