⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/desktop/desktop_ext/
cjk.rs

1//! CJK Unicode / Wide Character Support
2//!
3//! Wide character detection, double-width cell rendering, and IME framework.
4
5#[cfg(feature = "alloc")]
6use alloc::{collections::BTreeMap, string::String, vec::Vec};
7
8/// Check if a character is a CJK wide character (occupies 2 cells).
9///
10/// Based on Unicode East Asian Width property and common CJK ranges:
11/// - CJK Unified Ideographs (U+4E00-U+9FFF)
12/// - CJK Unified Ideographs Extension A (U+3400-U+4DBF)
13/// - CJK Compatibility Ideographs (U+F900-U+FAFF)
14/// - Hangul Syllables (U+AC00-U+D7AF)
15/// - Katakana (U+30A0-U+30FF)
16/// - Hiragana (U+3040-U+309F)
17/// - CJK Symbols and Punctuation (U+3000-U+303F)
18/// - Fullwidth Forms (U+FF01-U+FF60, U+FFE0-U+FFE6)
19/// - Bopomofo (U+3100-U+312F)
20/// - Enclosed CJK (U+3200-U+32FF)
21/// - CJK Compatibility (U+3300-U+33FF)
22/// - CJK Unified Ideographs Extension B+ (U+20000-U+2A6DF)
23pub fn is_cjk_wide(ch: char) -> bool {
24    let cp = ch as u32;
25
26    // Check the most common ranges first for performance.
27    if (0x4E00..=0x9FFF).contains(&cp) {
28        return true;
29    }
30    if (0xAC00..=0xD7AF).contains(&cp) {
31        return true;
32    }
33    if (0x3040..=0x30FF).contains(&cp) {
34        return true;
35    }
36    if (0xFF01..=0xFF60).contains(&cp) {
37        return true;
38    }
39    if (0xFFE0..=0xFFE6).contains(&cp) {
40        return true;
41    }
42    if (0x3400..=0x4DBF).contains(&cp) {
43        return true;
44    }
45    if (0x3000..=0x303F).contains(&cp) {
46        return true;
47    }
48    if (0x3100..=0x312F).contains(&cp) {
49        return true;
50    }
51    if (0x3200..=0x33FF).contains(&cp) {
52        return true;
53    }
54    if (0xF900..=0xFAFF).contains(&cp) {
55        return true;
56    }
57    if (0x20000..=0x2A6DF).contains(&cp) {
58        return true;
59    }
60
61    false
62}
63
64/// Get the display width of a character in terminal cells.
65///
66/// Returns 2 for wide (CJK) characters, 0 for zero-width characters
67/// (combining marks, control chars), and 1 for everything else.
68pub fn char_width(ch: char) -> u8 {
69    let cp = ch as u32;
70
71    // Control characters and zero-width.
72    if cp == 0 || (0x01..=0x1F).contains(&cp) || cp == 0x7F {
73        return 0;
74    }
75
76    // Combining marks (general category Mn/Mc/Me).
77    if (0x0300..=0x036F).contains(&cp) {
78        return 0; // Combining Diacritical Marks
79    }
80    if (0x1AB0..=0x1AFF).contains(&cp) {
81        return 0; // Combining Diacritical Marks Extended
82    }
83    if (0x1DC0..=0x1DFF).contains(&cp) {
84        return 0; // Combining Diacritical Marks Supplement
85    }
86    if (0x20D0..=0x20FF).contains(&cp) {
87        return 0; // Combining Diacritical Marks for Symbols
88    }
89    if (0xFE20..=0xFE2F).contains(&cp) {
90        return 0; // Combining Half Marks
91    }
92
93    // Soft hyphen.
94    if cp == 0x00AD {
95        return 1;
96    }
97
98    // Zero-width joiner / non-joiner / space.
99    if cp == 0x200B || cp == 0x200C || cp == 0x200D || cp == 0xFEFF {
100        return 0;
101    }
102
103    if is_cjk_wide(ch) {
104        return 2;
105    }
106
107    1
108}
109
110/// Calculate the display width of a string in terminal cells.
111#[cfg(feature = "alloc")]
112pub fn string_width(s: &str) -> usize {
113    s.chars().map(|c| char_width(c) as usize).sum()
114}
115
116/// Truncate a string to fit within `max_width` terminal cells.
117/// Appends "..." if truncated.
118#[cfg(feature = "alloc")]
119pub fn truncate_to_width(s: &str, max_width: usize) -> String {
120    if max_width < 3 {
121        return String::new();
122    }
123
124    let mut width = 0usize;
125    let mut result = String::new();
126
127    for ch in s.chars() {
128        let cw = char_width(ch) as usize;
129        if width + cw > max_width - 3 {
130            result.push_str("...");
131            return result;
132        }
133        result.push(ch);
134        width += cw;
135    }
136
137    result
138}
139
140/// Double-width cell renderer helper.
141///
142/// When rendering a wide character at cell (col, row), it occupies
143/// cells (col, row) and (col+1, row). The second cell should be marked
144/// as a continuation.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
146pub enum CellContent {
147    /// Normal single-width character.
148    Narrow(char),
149    /// First cell of a wide character.
150    WideStart(char),
151    /// Continuation of a wide character (second cell).
152    WideContinuation,
153    /// Empty cell.
154    #[default]
155    Empty,
156}
157
158/// Input Method Editor (IME) state.
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
160pub enum ImeState {
161    /// IME is inactive (direct input).
162    #[default]
163    Inactive,
164    /// Composing: user is typing a sequence that will be converted.
165    Composing,
166    /// Committed: the composed text has been finalized.
167    Committed,
168}
169
170/// A candidate in the IME candidate list.
171#[derive(Debug, Clone, PartialEq, Eq)]
172#[cfg(feature = "alloc")]
173pub struct ImeCandidate {
174    /// Display label (e.g., "1", "2").
175    pub label: String,
176    /// The candidate text.
177    pub text: String,
178}
179
180/// Input Method Editor framework.
181///
182/// Provides the state machine and data structures for input composition.
183/// Actual input method dictionaries would be loaded from user space.
184#[derive(Debug)]
185#[cfg(feature = "alloc")]
186pub struct InputMethodEditor {
187    /// Current IME state.
188    state: ImeState,
189    /// Preedit (composing) string.
190    preedit: String,
191    /// Cursor position within preedit.
192    preedit_cursor: usize,
193    /// Candidate list.
194    candidates: Vec<ImeCandidate>,
195    /// Selected candidate index.
196    selected_candidate: usize,
197    /// Committed text (ready for insertion).
198    committed: String,
199    /// Whether the IME is enabled.
200    enabled: bool,
201    /// Pinyin lookup table (stub).
202    pinyin_table: BTreeMap<String, Vec<String>>,
203}
204
205#[cfg(feature = "alloc")]
206impl Default for InputMethodEditor {
207    fn default() -> Self {
208        Self::new()
209    }
210}
211
212#[cfg(feature = "alloc")]
213impl InputMethodEditor {
214    /// Create a new IME with basic Pinyin stub entries.
215    pub fn new() -> Self {
216        let mut pinyin_table = BTreeMap::new();
217
218        // Basic Pinyin stub entries for common characters.
219        pinyin_table.insert(
220            String::from("ni"),
221            alloc::vec![String::from("\u{4F60}"), String::from("\u{5C3C}")],
222        );
223        pinyin_table.insert(
224            String::from("hao"),
225            alloc::vec![String::from("\u{597D}"), String::from("\u{53F7}")],
226        );
227        pinyin_table.insert(
228            String::from("shi"),
229            alloc::vec![
230                String::from("\u{662F}"),
231                String::from("\u{4E16}"),
232                String::from("\u{4E8B}"),
233            ],
234        );
235        pinyin_table.insert(
236            String::from("de"),
237            alloc::vec![String::from("\u{7684}"), String::from("\u{5F97}")],
238        );
239        pinyin_table.insert(String::from("wo"), alloc::vec![String::from("\u{6211}")]);
240        pinyin_table.insert(
241            String::from("ren"),
242            alloc::vec![String::from("\u{4EBA}"), String::from("\u{8BA4}")],
243        );
244        pinyin_table.insert(
245            String::from("da"),
246            alloc::vec![String::from("\u{5927}"), String::from("\u{6253}")],
247        );
248        pinyin_table.insert(
249            String::from("zhong"),
250            alloc::vec![String::from("\u{4E2D}"), String::from("\u{91CD}")],
251        );
252        pinyin_table.insert(
253            String::from("guo"),
254            alloc::vec![String::from("\u{56FD}"), String::from("\u{8FC7}")],
255        );
256        pinyin_table.insert(
257            String::from("yi"),
258            alloc::vec![
259                String::from("\u{4E00}"),
260                String::from("\u{4E49}"),
261                String::from("\u{5DF2}"),
262            ],
263        );
264
265        Self {
266            state: ImeState::Inactive,
267            preedit: String::new(),
268            preedit_cursor: 0,
269            candidates: Vec::new(),
270            selected_candidate: 0,
271            committed: String::new(),
272            enabled: false,
273            pinyin_table,
274        }
275    }
276
277    /// Enable or disable the IME.
278    pub fn set_enabled(&mut self, enabled: bool) {
279        self.enabled = enabled;
280        if !enabled {
281            self.reset();
282        }
283    }
284
285    /// Check if IME is enabled.
286    pub fn is_enabled(&self) -> bool {
287        self.enabled
288    }
289
290    /// Get current IME state.
291    pub fn state(&self) -> ImeState {
292        self.state
293    }
294
295    /// Get the preedit string (what the user is typing).
296    pub fn preedit(&self) -> &str {
297        &self.preedit
298    }
299
300    /// Get the preedit cursor position.
301    pub fn preedit_cursor(&self) -> usize {
302        self.preedit_cursor
303    }
304
305    /// Get the candidate list.
306    pub fn candidates(&self) -> &[ImeCandidate] {
307        &self.candidates
308    }
309
310    /// Get the selected candidate index.
311    pub fn selected_candidate(&self) -> usize {
312        self.selected_candidate
313    }
314
315    /// Get and clear the committed text.
316    pub fn take_committed(&mut self) -> String {
317        let result = core::mem::take(&mut self.committed);
318        if self.state == ImeState::Committed {
319            self.state = ImeState::Inactive;
320        }
321        result
322    }
323
324    /// Feed a character into the IME.
325    pub fn feed_char(&mut self, ch: char) {
326        if !self.enabled {
327            self.committed.push(ch);
328            self.state = ImeState::Committed;
329            return;
330        }
331
332        if ch.is_ascii_alphabetic() {
333            self.preedit.push(ch.to_ascii_lowercase());
334            self.preedit_cursor = self.preedit.len();
335            self.state = ImeState::Composing;
336            self.update_candidates();
337        } else if ch.is_ascii_digit() && self.state == ImeState::Composing {
338            // Select candidate by number.
339            let idx = (ch as u8 - b'1') as usize;
340            self.select_candidate(idx);
341        } else if ch == ' ' && self.state == ImeState::Composing {
342            // Commit first candidate.
343            self.select_candidate(0);
344        } else {
345            // Non-alphabetic input while not composing: pass through.
346            if self.state == ImeState::Composing {
347                self.commit_preedit();
348            }
349            self.committed.push(ch);
350            self.state = ImeState::Committed;
351        }
352    }
353
354    /// Feed a backspace into the IME.
355    pub fn feed_backspace(&mut self) {
356        if self.state == ImeState::Composing && !self.preedit.is_empty() {
357            self.preedit.pop();
358            self.preedit_cursor = self.preedit.len();
359            if self.preedit.is_empty() {
360                self.state = ImeState::Inactive;
361                self.candidates.clear();
362            } else {
363                self.update_candidates();
364            }
365        }
366    }
367
368    /// Feed an Enter key: commit preedit as-is.
369    pub fn feed_enter(&mut self) {
370        if self.state == ImeState::Composing {
371            self.commit_preedit();
372        }
373    }
374
375    /// Feed Escape: cancel composition.
376    pub fn feed_escape(&mut self) {
377        self.reset();
378    }
379
380    /// Move candidate selection up.
381    pub fn candidate_prev(&mut self) {
382        if !self.candidates.is_empty() && self.selected_candidate > 0 {
383            self.selected_candidate -= 1;
384        }
385    }
386
387    /// Move candidate selection down.
388    pub fn candidate_next(&mut self) {
389        if !self.candidates.is_empty() && self.selected_candidate + 1 < self.candidates.len() {
390            self.selected_candidate += 1;
391        }
392    }
393
394    /// Update the candidate list based on current preedit.
395    fn update_candidates(&mut self) {
396        self.candidates.clear();
397        self.selected_candidate = 0;
398
399        if let Some(chars) = self.pinyin_table.get(&self.preedit) {
400            for (i, text) in chars.iter().enumerate() {
401                self.candidates.push(ImeCandidate {
402                    label: String::from(match i {
403                        0 => "1",
404                        1 => "2",
405                        2 => "3",
406                        3 => "4",
407                        4 => "5",
408                        5 => "6",
409                        6 => "7",
410                        7 => "8",
411                        8 => "9",
412                        _ => "?",
413                    }),
414                    text: text.clone(),
415                });
416            }
417        }
418    }
419
420    /// Select and commit a candidate by index.
421    fn select_candidate(&mut self, idx: usize) {
422        if idx < self.candidates.len() {
423            self.committed = self.candidates[idx].text.clone();
424        } else if !self.preedit.is_empty() {
425            // No matching candidate: commit preedit as-is.
426            self.committed = core::mem::take(&mut self.preedit);
427        }
428        self.preedit.clear();
429        self.preedit_cursor = 0;
430        self.candidates.clear();
431        self.selected_candidate = 0;
432        self.state = ImeState::Committed;
433    }
434
435    /// Commit the raw preedit string.
436    fn commit_preedit(&mut self) {
437        self.committed = core::mem::take(&mut self.preedit);
438        self.preedit_cursor = 0;
439        self.candidates.clear();
440        self.selected_candidate = 0;
441        self.state = ImeState::Committed;
442    }
443
444    /// Reset the IME to inactive state.
445    pub fn reset(&mut self) {
446        self.preedit.clear();
447        self.preedit_cursor = 0;
448        self.candidates.clear();
449        self.selected_candidate = 0;
450        self.committed.clear();
451        self.state = ImeState::Inactive;
452    }
453}