veridian_kernel/arch/x86_64/pat.rs
1//! Page Attribute Table (PAT) initialization for write-combining memory.
2//!
3//! Reprograms PAT entry 1 from WT (write-through) to WC (write-combining),
4//! then provides `apply_write_combining()` to set framebuffer page table
5//! entries to use the WC memory type. This yields 5-150x faster MMIO
6//! writes for the fbcon flush path.
7//!
8//! # PAT index mapping after `init()`
9//!
10//! | Index | PWT | PCD | PAT | Type |
11//! |-------|-----|-----|-----|------|
12//! | 0 | 0 | 0 | 0 | WB |
13//! | 1 | 1 | 0 | 0 | **WC** (was WT) |
14//! | 2 | 0 | 1 | 0 | UC- |
15//! | 3 | 1 | 1 | 0 | UC |
16//! | 4-7 | | | 1 | (mirrors 0-3 by default) |
17//!
18//! Framebuffer PTEs use index 1 (PWT=1, PCD=0, PAT=0) for write-combining.
19
20use super::msr::{phys_to_virt, rdmsr, wrmsr};
21
22/// IA32_PAT MSR address.
23const IA32_PAT: u32 = 0x277;
24
25/// PAT memory type: Write-Combining.
26const PAT_WC: u64 = 0x01;
27
28/// PTE flag: Page Write-Through (bit 3).
29const PTE_PWT: u64 = 1 << 3;
30/// PTE flag: Page Cache Disable (bit 4).
31const PTE_PCD: u64 = 1 << 4;
32/// PTE flag: PAT bit in leaf PTE (bit 7).
33const PTE_PAT: u64 = 1 << 7;
34/// PTE flag: Present (bit 0).
35const PTE_PRESENT: u64 = 1 << 0;
36
37/// Check CPUID for PAT support (leaf 1, EDX bit 16).
38fn cpu_has_pat() -> bool {
39 let edx: u32;
40 // SAFETY: CPUID with EAX=1 is a read-only, side-effect-free instruction.
41 // RBX is saved/restored because LLVM reserves it as a frame pointer.
42 unsafe {
43 core::arch::asm!(
44 "push rbx",
45 "mov eax, 1",
46 "cpuid",
47 "pop rbx",
48 out("edx") edx,
49 out("eax") _,
50 out("ecx") _,
51 options(nomem, preserves_flags),
52 );
53 }
54 (edx & (1 << 16)) != 0
55}
56
57/// Reprogram PAT entry 1 from WT to WC.
58///
59/// Must be called early in boot, before any memory is mapped with PAT
60/// index 1. No-op if the CPU does not support PAT.
61pub fn init() {
62 if !cpu_has_pat() {
63 return;
64 }
65 let mut pat = rdmsr(IA32_PAT);
66 // Clear PAT entry 1 (bits [15:8]) and set to WC (0x01)
67 pat = (pat & !0xFF00) | (PAT_WC << 8);
68 wrmsr(IA32_PAT, pat);
69}
70
71/// Apply write-combining attributes to a virtual address range.
72///
73/// Walks the active page table (CR3), finds PTEs for each 4KB page in
74/// the range, sets PWT=1 PCD=0 PAT=0 (PAT index 1 = WC after `init()`),
75/// and flushes the TLB entry.
76///
77/// # Safety
78///
79/// - `vaddr` must be page-aligned and mapped with 4KB pages.
80/// - `size` must be a multiple of 4096.
81/// - PAT entry 1 must have been reprogrammed to WC via `init()`.
82pub unsafe fn apply_write_combining(vaddr: usize, size: usize) {
83 if !cpu_has_pat() {
84 return;
85 }
86
87 // Read CR3 for PML4 physical address
88 let cr3: u64;
89 core::arch::asm!("mov {}, cr3", out(reg) cr3);
90 let pml4_phys = (cr3 & 0x000F_FFFF_FFFF_F000) as usize;
91
92 let num_pages = size / 4096;
93 for i in 0..num_pages {
94 let addr = vaddr + i * 4096;
95 set_page_wc(pml4_phys, addr);
96 }
97}
98
99/// Set a single 4KB page's PTE to use PAT index 1 (WC).
100///
101/// Walks PML4 -> PDPT -> PD -> PT, reads the leaf PTE, sets PWT=1,
102/// clears PCD and PAT, writes back, and flushes the TLB for that address.
103unsafe fn set_page_wc(pml4_phys: usize, vaddr: usize) {
104 // Extract page table indices from the virtual address
105 let pml4_idx = (vaddr >> 39) & 0x1FF;
106 let pdpt_idx = (vaddr >> 30) & 0x1FF;
107 let pd_idx = (vaddr >> 21) & 0x1FF;
108 let pt_idx = (vaddr >> 12) & 0x1FF;
109
110 // Walk PML4 -> PDPT
111 let pml4_virt = match phys_to_virt(pml4_phys) {
112 Some(v) => v as *const u64,
113 None => return,
114 };
115 let pml4_entry = pml4_virt.add(pml4_idx).read_volatile();
116 if (pml4_entry & PTE_PRESENT) == 0 {
117 return;
118 }
119 let pdpt_phys = (pml4_entry & 0x000F_FFFF_FFFF_F000) as usize;
120
121 // Walk PDPT -> PD
122 let pdpt_virt = match phys_to_virt(pdpt_phys) {
123 Some(v) => v as *const u64,
124 None => return,
125 };
126 let pdpt_entry = pdpt_virt.add(pdpt_idx).read_volatile();
127 if (pdpt_entry & PTE_PRESENT) == 0 {
128 return;
129 }
130 // Check for 1GiB huge page (bit 7 = PS)
131 if (pdpt_entry & (1 << 7)) != 0 {
132 return; // Cannot set WC on huge pages via this path
133 }
134 let pd_phys = (pdpt_entry & 0x000F_FFFF_FFFF_F000) as usize;
135
136 // Walk PD -> PT
137 let pd_virt = match phys_to_virt(pd_phys) {
138 Some(v) => v as *const u64,
139 None => return,
140 };
141 let pd_entry = pd_virt.add(pd_idx).read_volatile();
142 if (pd_entry & PTE_PRESENT) == 0 {
143 return;
144 }
145 // Check for 2MiB huge page (bit 7 = PS)
146 if (pd_entry & (1 << 7)) != 0 {
147 return; // Cannot set WC on huge pages via this path
148 }
149 let pt_phys = (pd_entry & 0x000F_FFFF_FFFF_F000) as usize;
150
151 // Read and modify leaf PTE
152 let pt_virt = match phys_to_virt(pt_phys) {
153 Some(v) => v as *mut u64,
154 None => return,
155 };
156 let pt_entry_ptr = pt_virt.add(pt_idx);
157 let mut pte = pt_entry_ptr.read_volatile();
158 if (pte & PTE_PRESENT) == 0 {
159 return;
160 }
161
162 // Set PAT index 1: PWT=1, PCD=0, PAT(bit7)=0
163 pte |= PTE_PWT;
164 pte &= !PTE_PCD;
165 pte &= !PTE_PAT;
166 pt_entry_ptr.write_volatile(pte);
167
168 // Flush TLB for this address
169 super::tlb_flush_address(vaddr as u64);
170}