⚠️ VeridianOS Kernel Documentation - This is low-level kernel code. All functions are unsafe unless explicitly marked otherwise. no_std

veridian_kernel/crypto/
asymmetric.rs

1//! Asymmetric Cryptography
2//!
3//! Implements Ed25519 signatures (RFC 8032) and X25519 key exchange (RFC 7748).
4//!
5//! ## Implementation Details
6//!
7//! Field arithmetic operates in GF(2^255-19) using a 5-limb representation
8//! with 51 bits per limb, giving us 255 bits total. This allows intermediate
9//! products to fit in u128 without overflow.
10//!
11//! Ed25519 uses the twisted Edwards curve -x^2 + y^2 = 1 + d*x^2*y^2
12//! where d = -121665/121666 mod p.
13//!
14//! X25519 uses the Montgomery form of Curve25519 with the Montgomery ladder
15//! for constant-time scalar multiplication.
16
17use alloc::vec::Vec;
18
19use super::{CryptoError, CryptoResult};
20
21// ============================================================================
22// Field Arithmetic in GF(2^255-19)
23// ============================================================================
24
25/// Field element in GF(2^255-19), represented as 5 limbs of 51 bits each.
26#[derive(Clone, Copy, Debug)]
27struct Fe([u64; 5]);
28
29impl Fe {
30    const ZERO: Fe = Fe([0; 5]);
31    const ONE: Fe = Fe([1, 0, 0, 0, 0]);
32
33    /// Create field element from bytes (little-endian, 32 bytes)
34    fn from_bytes(s: &[u8; 32]) -> Fe {
35        let mut h = [0u64; 5];
36        // Load 256 bits from 32 bytes, then mask to 255 bits
37        let load64 = |bytes: &[u8]| -> u64 {
38            let mut buf = [0u8; 8];
39            let len = core::cmp::min(bytes.len(), 8);
40            buf[..len].copy_from_slice(&bytes[..len]);
41            u64::from_le_bytes(buf)
42        };
43
44        h[0] = load64(&s[0..]) & 0x7ffffffffffff; // 51 bits
45        h[1] = (load64(&s[6..]) >> 3) & 0x7ffffffffffff;
46        h[2] = (load64(&s[12..]) >> 6) & 0x7ffffffffffff;
47        h[3] = (load64(&s[19..]) >> 1) & 0x7ffffffffffff;
48        h[4] = (load64(&s[24..]) >> 12) & 0x7ffffffffffff;
49
50        Fe(h)
51    }
52
53    /// Convert field element to bytes (little-endian, 32 bytes)
54    fn to_bytes(self) -> [u8; 32] {
55        let mut h = self;
56        h.reduce();
57
58        // Full reduction: ensure 0 <= h < p
59        // After reduce(), h is nearly reduced. We do a final conditional subtraction.
60        let mut q = (h.0[0].wrapping_add(19)) >> 51;
61        q = (h.0[1].wrapping_add(q)) >> 51;
62        q = (h.0[2].wrapping_add(q)) >> 51;
63        q = (h.0[3].wrapping_add(q)) >> 51;
64        q = (h.0[4].wrapping_add(q)) >> 51;
65
66        h.0[0] = h.0[0].wrapping_add(19u64.wrapping_mul(q));
67        let mut carry: u64;
68        carry = h.0[0] >> 51;
69        h.0[0] &= 0x7ffffffffffff;
70        h.0[1] = h.0[1].wrapping_add(carry);
71        carry = h.0[1] >> 51;
72        h.0[1] &= 0x7ffffffffffff;
73        h.0[2] = h.0[2].wrapping_add(carry);
74        carry = h.0[2] >> 51;
75        h.0[2] &= 0x7ffffffffffff;
76        h.0[3] = h.0[3].wrapping_add(carry);
77        carry = h.0[3] >> 51;
78        h.0[3] &= 0x7ffffffffffff;
79        h.0[4] = h.0[4].wrapping_add(carry);
80        h.0[4] &= 0x7ffffffffffff;
81
82        let mut s = [0u8; 32];
83        // Pack 5 limbs of 51 bits into 32 bytes (little-endian)
84        s[0] = h.0[0] as u8;
85        s[1] = (h.0[0] >> 8) as u8;
86        s[2] = (h.0[0] >> 16) as u8;
87        s[3] = (h.0[0] >> 24) as u8;
88        s[4] = (h.0[0] >> 32) as u8;
89        s[5] = (h.0[0] >> 40) as u8;
90        s[6] = ((h.0[0] >> 48) | (h.0[1] << 3)) as u8;
91        s[7] = (h.0[1] >> 5) as u8;
92        s[8] = (h.0[1] >> 13) as u8;
93        s[9] = (h.0[1] >> 21) as u8;
94        s[10] = (h.0[1] >> 29) as u8;
95        s[11] = (h.0[1] >> 37) as u8;
96        s[12] = ((h.0[1] >> 45) | (h.0[2] << 6)) as u8;
97        s[13] = (h.0[2] >> 2) as u8;
98        s[14] = (h.0[2] >> 10) as u8;
99        s[15] = (h.0[2] >> 18) as u8;
100        s[16] = (h.0[2] >> 26) as u8;
101        s[17] = (h.0[2] >> 34) as u8;
102        s[18] = (h.0[2] >> 42) as u8;
103        s[19] = ((h.0[2] >> 50) | (h.0[3] << 1)) as u8;
104        s[20] = (h.0[3] >> 7) as u8;
105        s[21] = (h.0[3] >> 15) as u8;
106        s[22] = (h.0[3] >> 23) as u8;
107        s[23] = (h.0[3] >> 31) as u8;
108        s[24] = ((h.0[3] >> 39) | (h.0[4] << 12)) as u8;
109        s[25] = (h.0[4] >> 4) as u8;
110        s[26] = (h.0[4] >> 12) as u8;
111        s[27] = (h.0[4] >> 20) as u8;
112        s[28] = (h.0[4] >> 28) as u8;
113        s[29] = (h.0[4] >> 36) as u8;
114        s[30] = (h.0[4] >> 44) as u8;
115        s[31] = 0; // top bit always 0 for reduced element
116
117        s
118    }
119
120    /// Carry propagation / partial reduction
121    fn reduce(&mut self) {
122        let mut carry: u64;
123        carry = self.0[0] >> 51;
124        self.0[0] &= 0x7ffffffffffff;
125        self.0[1] = self.0[1].wrapping_add(carry);
126        carry = self.0[1] >> 51;
127        self.0[1] &= 0x7ffffffffffff;
128        self.0[2] = self.0[2].wrapping_add(carry);
129        carry = self.0[2] >> 51;
130        self.0[2] &= 0x7ffffffffffff;
131        self.0[3] = self.0[3].wrapping_add(carry);
132        carry = self.0[3] >> 51;
133        self.0[3] &= 0x7ffffffffffff;
134        self.0[4] = self.0[4].wrapping_add(carry);
135        carry = self.0[4] >> 51;
136        self.0[4] &= 0x7ffffffffffff;
137        // 2^255 = 19 mod p, so carry from top limb wraps with factor 19
138        self.0[0] = self.0[0].wrapping_add(carry.wrapping_mul(19));
139    }
140
141    /// Addition in GF(p)
142    fn add(&self, other: &Fe) -> Fe {
143        let mut r = Fe::ZERO;
144        r.0[0] = self.0[0].wrapping_add(other.0[0]);
145        r.0[1] = self.0[1].wrapping_add(other.0[1]);
146        r.0[2] = self.0[2].wrapping_add(other.0[2]);
147        r.0[3] = self.0[3].wrapping_add(other.0[3]);
148        r.0[4] = self.0[4].wrapping_add(other.0[4]);
149        r
150    }
151
152    /// Subtraction in GF(p)
153    fn sub(&self, other: &Fe) -> Fe {
154        // Add 2*p to avoid underflow (each limb gets 2 * (2^51 - 1) headroom,
155        // except limb 0 which gets 2*(2^51 - 19))
156        let mut r = Fe::ZERO;
157        r.0[0] = self.0[0]
158            .wrapping_add(0xfffffffffffda) // 2*(2^51 - 19)
159            .wrapping_sub(other.0[0]);
160        r.0[1] = self.0[1]
161            .wrapping_add(0xffffffffffffe) // 2*(2^51 - 1)
162            .wrapping_sub(other.0[1]);
163        r.0[2] = self.0[2]
164            .wrapping_add(0xffffffffffffe)
165            .wrapping_sub(other.0[2]);
166        r.0[3] = self.0[3]
167            .wrapping_add(0xffffffffffffe)
168            .wrapping_sub(other.0[3]);
169        r.0[4] = self.0[4]
170            .wrapping_add(0xffffffffffffe)
171            .wrapping_sub(other.0[4]);
172        r.reduce();
173        r
174    }
175
176    /// Multiplication in GF(p) using u128 for intermediate products
177    fn mul(&self, other: &Fe) -> Fe {
178        let a = &self.0;
179        let b = &other.0;
180
181        // Schoolbook multiplication with reduction
182        // Since 2^255 = 19 mod p, we precompute 19*b[i] for the wrap-around terms
183        let b1_19 = b[1].wrapping_mul(19);
184        let b2_19 = b[2].wrapping_mul(19);
185        let b3_19 = b[3].wrapping_mul(19);
186        let b4_19 = b[4].wrapping_mul(19);
187
188        let t0 = (a[0] as u128) * (b[0] as u128)
189            + (a[1] as u128) * (b4_19 as u128)
190            + (a[2] as u128) * (b3_19 as u128)
191            + (a[3] as u128) * (b2_19 as u128)
192            + (a[4] as u128) * (b1_19 as u128);
193
194        let t1 = (a[0] as u128) * (b[1] as u128)
195            + (a[1] as u128) * (b[0] as u128)
196            + (a[2] as u128) * (b4_19 as u128)
197            + (a[3] as u128) * (b3_19 as u128)
198            + (a[4] as u128) * (b2_19 as u128);
199
200        let t2 = (a[0] as u128) * (b[2] as u128)
201            + (a[1] as u128) * (b[1] as u128)
202            + (a[2] as u128) * (b[0] as u128)
203            + (a[3] as u128) * (b4_19 as u128)
204            + (a[4] as u128) * (b3_19 as u128);
205
206        let t3 = (a[0] as u128) * (b[3] as u128)
207            + (a[1] as u128) * (b[2] as u128)
208            + (a[2] as u128) * (b[1] as u128)
209            + (a[3] as u128) * (b[0] as u128)
210            + (a[4] as u128) * (b4_19 as u128);
211
212        let t4 = (a[0] as u128) * (b[4] as u128)
213            + (a[1] as u128) * (b[3] as u128)
214            + (a[2] as u128) * (b[2] as u128)
215            + (a[3] as u128) * (b[1] as u128)
216            + (a[4] as u128) * (b[0] as u128);
217
218        // Carry propagation
219        let mut r = [0u64; 5];
220        let mut c: u128;
221
222        c = t0 >> 51;
223        r[0] = (t0 as u64) & 0x7ffffffffffff;
224        let t1 = t1 + c;
225
226        c = t1 >> 51;
227        r[1] = (t1 as u64) & 0x7ffffffffffff;
228        let t2 = t2 + c;
229
230        c = t2 >> 51;
231        r[2] = (t2 as u64) & 0x7ffffffffffff;
232        let t3 = t3 + c;
233
234        c = t3 >> 51;
235        r[3] = (t3 as u64) & 0x7ffffffffffff;
236        let t4 = t4 + c;
237
238        c = t4 >> 51;
239        r[4] = (t4 as u64) & 0x7ffffffffffff;
240
241        // Wrap carry with factor 19
242        r[0] = r[0].wrapping_add((c as u64).wrapping_mul(19));
243        // One more carry from limb 0
244        let c2 = r[0] >> 51;
245        r[0] &= 0x7ffffffffffff;
246        r[1] = r[1].wrapping_add(c2);
247
248        Fe(r)
249    }
250
251    /// Squaring in GF(p) - optimized version of mul(self, self)
252    fn square(&self) -> Fe {
253        self.mul(self)
254    }
255
256    /// Compute self^(2^n) by repeated squaring
257    fn pow2k(&self, k: u32) -> Fe {
258        let mut r = *self;
259        let mut i = 0;
260        while i < k {
261            r = r.square();
262            i += 1;
263        }
264        r
265    }
266
267    /// Modular inversion using Fermat's little theorem: a^(-1) = a^(p-2) mod p
268    /// p-2 = 2^255 - 21
269    fn invert(&self) -> Fe {
270        // Compute a^(p-2) using an addition chain
271        let z2 = self.square(); // z^2
272        let z9 = z2.pow2k(2).mul(self); // z^9 (z2^4 * z)
273        let z11 = z9.mul(&z2); // z^11
274        let z_5_0 = z11.square().mul(&z9); // z^(2^5 - 1)
275        let z_10_0 = z_5_0.pow2k(5).mul(&z_5_0); // z^(2^10 - 1)
276        let z_20_0 = z_10_0.pow2k(10).mul(&z_10_0); // z^(2^20 - 1)
277        let z_40_0 = z_20_0.pow2k(20).mul(&z_20_0); // z^(2^40 - 1)
278        let z_50_0 = z_40_0.pow2k(10).mul(&z_10_0); // z^(2^50 - 1)
279        let z_100_0 = z_50_0.pow2k(50).mul(&z_50_0); // z^(2^100 - 1)
280        let z_200_0 = z_100_0.pow2k(100).mul(&z_100_0); // z^(2^200 - 1)
281        let z_250_0 = z_200_0.pow2k(50).mul(&z_50_0); // z^(2^250 - 1)
282        z_250_0.pow2k(5).mul(&z11) // z^(2^255 - 21)
283    }
284
285    /// Negate in GF(p)
286    fn neg(&self) -> Fe {
287        Fe::ZERO.sub(self)
288    }
289
290    /// Check if this field element is negative (odd when reduced)
291    fn is_negative(&self) -> bool {
292        let bytes = self.to_bytes();
293        (bytes[0] & 1) != 0
294    }
295
296    /// Conditional swap: swap self and other if swap_flag == 1
297    fn cswap(&mut self, other: &mut Fe, swap_flag: u64) {
298        let mask = swap_flag.wrapping_neg(); // 0 or 0xFFFFFFFFFFFFFFFF
299        let mut i = 0;
300        while i < 5 {
301            let t = mask & (self.0[i] ^ other.0[i]);
302            self.0[i] ^= t;
303            other.0[i] ^= t;
304            i += 1;
305        }
306    }
307
308    /// Compute square root: self^((p+3)/8) for p = 2^255 - 19
309    /// Returns Some(root) if self is a quadratic residue, None otherwise
310    #[allow(dead_code)] // Ed25519 field arithmetic -- needed for point decompression
311    fn sqrt(&self) -> Option<Fe> {
312        // p = 2^255 - 19, (p+3)/8 = 2^252 - 2
313        // We use the formula: if v = a^((p-5)/8), then
314        // candidates are a*v and a*v*sqrt(-1)
315
316        // First compute a^((p-5)/8) = a^(2^252 - 3)
317        let a = *self;
318        let a2 = a.square();
319        let a9 = a2.pow2k(2).mul(&a);
320        let a11 = a9.mul(&a2);
321        let z_5_0 = a11.square().mul(&a9);
322        let z_10_0 = z_5_0.pow2k(5).mul(&z_5_0);
323        let z_20_0 = z_10_0.pow2k(10).mul(&z_10_0);
324        let z_40_0 = z_20_0.pow2k(20).mul(&z_20_0);
325        let z_50_0 = z_40_0.pow2k(10).mul(&z_10_0);
326        let z_100_0 = z_50_0.pow2k(50).mul(&z_50_0);
327        let z_200_0 = z_100_0.pow2k(100).mul(&z_100_0);
328        let z_250_0 = z_200_0.pow2k(50).mul(&z_50_0);
329        let beta = z_250_0.pow2k(2).mul(&a); // a^(2^252 - 3)
330
331        // Check beta^2 == a
332        let beta_sq = beta.square();
333        let check = beta_sq.sub(&a);
334        if check.to_bytes() == [0u8; 32] {
335            return Some(beta);
336        }
337
338        // Try beta * sqrt(-1)
339        // sqrt(-1) = 2^((p-1)/4) mod p
340        let sqrt_m1 = Fe::SQRT_MINUS_ONE;
341        let beta2 = beta.mul(&sqrt_m1);
342        let beta2_sq = beta2.square();
343        let check2 = beta2_sq.sub(&a);
344        if check2.to_bytes() == [0u8; 32] {
345            return Some(beta2);
346        }
347
348        None
349    }
350
351    // sqrt(-1) mod p = 2^((p-1)/4) mod p
352    // = 0x2b8324804fc1df0b2b4d00993dfbd7a72f431806ad2fe478c4ee1b274a0ea0b0
353    const SQRT_MINUS_ONE: Fe = Fe([
354        0x00061b274a0ea0b0,
355        0x0000d5a5fc8f189d,
356        0x0007ef5e9cbd0c60,
357        0x00078595a6804c9e,
358        0x0002b8324804fc1d,
359    ]);
360}
361
362// ============================================================================
363// Edwards Curve Point Operations
364// ============================================================================
365
366/// Point on the Ed25519 curve in extended coordinates (X, Y, Z, T)
367/// where x = X/Z, y = Y/Z, x*y = T/Z
368#[derive(Clone, Copy)]
369struct EdPoint {
370    x: Fe,
371    y: Fe,
372    z: Fe,
373    t: Fe,
374}
375
376impl EdPoint {
377    /// The identity point (neutral element)
378    const IDENTITY: EdPoint = EdPoint {
379        x: Fe::ZERO,
380        y: Fe::ONE,
381        z: Fe::ONE,
382        t: Fe::ZERO,
383    };
384
385    /// The Ed25519 base point B
386    fn basepoint() -> EdPoint {
387        // B_y = 4/5 mod p
388        // B_x is recovered from the curve equation
389        // Standard encoding of the base point:
390        let b_bytes: [u8; 32] = [
391            0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
392            0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
393            0x66, 0x66, 0x66, 0x66,
394        ];
395        EdPoint::decode(&b_bytes).expect("basepoint decode must succeed")
396    }
397
398    /// Ed25519 curve constant d = -121665/121666 mod p
399    fn curve_d() -> Fe {
400        // d = 0x52036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3
401        let d_bytes: [u8; 32] = [
402            0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a,
403            0x70, 0x00, 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c, 0x73, 0xfe, 0x6f, 0x2b,
404            0xee, 0x6c, 0x03, 0x52,
405        ];
406        Fe::from_bytes(&d_bytes)
407    }
408
409    /// 2*d
410    fn curve_2d() -> Fe {
411        let d = Self::curve_d();
412        d.add(&d)
413    }
414
415    /// Encode point to 32 bytes (compressed Edwards point)
416    fn encode(&self) -> [u8; 32] {
417        let zi = self.z.invert();
418        let x = self.x.mul(&zi);
419        let y = self.y.mul(&zi);
420
421        let mut s = y.to_bytes();
422        // Set high bit to sign of x
423        s[31] |= if x.is_negative() { 0x80 } else { 0x00 };
424        s
425    }
426
427    /// Decode point from 32 bytes
428    fn decode(s: &[u8; 32]) -> Option<EdPoint> {
429        // Extract sign bit of x
430        let x_sign = (s[31] >> 7) & 1;
431
432        // Decode y (clear top bit)
433        let mut y_bytes = *s;
434        y_bytes[31] &= 0x7f;
435        let y = Fe::from_bytes(&y_bytes);
436
437        // Recover x from curve equation: -x^2 + y^2 = 1 + d*x^2*y^2
438        // => x^2 = (y^2 - 1) / (d*y^2 + 1)
439        let y2 = y.square();
440        let d = Self::curve_d();
441        let u = y2.sub(&Fe::ONE); // y^2 - 1
442        let v = d.mul(&y2).add(&Fe::ONE); // d*y^2 + 1
443
444        // x = sqrt(u/v) = u * v^3 * (u * v^7)^((p-5)/8)
445        let v2 = v.square();
446        let v3 = v2.mul(&v);
447        let v4 = v2.square();
448        let v7 = v4.mul(&v3);
449        let uv7 = u.mul(&v7);
450
451        // Compute (uv7)^((p-5)/8)
452        // p-5 = 2^255-24, (p-5)/8 = 2^252-3
453        let uv7_252_3 = {
454            let a = uv7;
455            let a2 = a.square();
456            let a9 = a2.pow2k(2).mul(&a);
457            let a11 = a9.mul(&a2);
458            let z5 = a11.square().mul(&a9);
459            let z10 = z5.pow2k(5).mul(&z5);
460            let z20 = z10.pow2k(10).mul(&z10);
461            let z40 = z20.pow2k(20).mul(&z20);
462            let z50 = z40.pow2k(10).mul(&z10);
463            let z100 = z50.pow2k(50).mul(&z50);
464            let z200 = z100.pow2k(100).mul(&z100);
465            let z250 = z200.pow2k(50).mul(&z50);
466            z250.pow2k(2).mul(&a)
467        };
468
469        let mut x = u.mul(&v3).mul(&uv7_252_3);
470
471        // Check: v * x^2 == u ?
472        let check = v.mul(&x.square());
473        let u_neg = u.neg();
474
475        if check.sub(&u).to_bytes() == [0u8; 32] {
476            // x is correct
477        } else if check.sub(&u_neg).to_bytes() == [0u8; 32] {
478            // Multiply x by sqrt(-1) to fix sign
479            x = x.mul(&Fe::SQRT_MINUS_ONE);
480        } else {
481            return None; // Not on curve
482        }
483
484        // Adjust sign
485        if x.is_negative() as u8 != x_sign {
486            x = x.neg();
487        }
488
489        let t = x.mul(&y);
490
491        Some(EdPoint {
492            x,
493            y,
494            z: Fe::ONE,
495            t,
496        })
497    }
498
499    /// Point doubling in extended coordinates
500    fn double(&self) -> EdPoint {
501        // Using formulas from https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-dbl-2008-hwcd
502        let a = self.x.square();
503        let b = self.y.square();
504        let c = self.z.square().add(&self.z.square()); // 2*Z^2
505        let d = a.neg(); // -X^2 (for a=-1 in twisted Edwards)
506        let e = self.x.add(&self.y).square().sub(&a).sub(&b);
507        let g = d.add(&b);
508        let f = g.sub(&c);
509        let h = d.sub(&b);
510
511        EdPoint {
512            x: e.mul(&f),
513            y: g.mul(&h),
514            z: f.mul(&g),
515            t: e.mul(&h),
516        }
517    }
518
519    /// Point addition in extended coordinates
520    fn add(&self, other: &EdPoint) -> EdPoint {
521        let d2 = Self::curve_2d();
522
523        let a = self.x.mul(&other.x);
524        let b = self.y.mul(&other.y);
525        let c = self.t.mul(&d2).mul(&other.t);
526        let d = self.z.mul(&other.z).add(&self.z.mul(&other.z)); // 2*Z1*Z2
527
528        let e = self
529            .x
530            .add(&self.y)
531            .mul(&other.x.add(&other.y))
532            .sub(&a)
533            .sub(&b);
534        let f = d.sub(&c);
535        let g = d.add(&c);
536        let h = b.add(&a); // b - (-a) = b + a (since curve a = -1)
537
538        EdPoint {
539            x: e.mul(&f),
540            y: g.mul(&h),
541            z: f.mul(&g),
542            t: e.mul(&h),
543        }
544    }
545
546    /// Scalar multiplication using double-and-add (left-to-right)
547    fn scalar_mul(&self, scalar: &[u8; 32]) -> EdPoint {
548        let mut result = EdPoint::IDENTITY;
549        let mut found_one = false;
550
551        // Process from MSB to LSB
552        let mut i: i32 = 255;
553        while i >= 0 {
554            let byte_idx = (i / 8) as usize;
555            let bit_idx = (i % 8) as u32;
556            let bit = (scalar[byte_idx] >> bit_idx) & 1;
557
558            if found_one {
559                result = result.double();
560            }
561
562            if bit == 1 {
563                if found_one {
564                    result = result.add(self);
565                } else {
566                    result = *self;
567                    found_one = true;
568                }
569            }
570
571            i -= 1;
572        }
573
574        result
575    }
576}
577
578// ============================================================================
579// SHA-512 helper for Ed25519
580// ============================================================================
581
582/// Use the existing SHA-512 implementation from hash module
583fn sha512(data: &[u8]) -> [u8; 64] {
584    let hash = super::hash::sha512(data);
585    *hash.as_bytes()
586}
587
588/// SHA-512 of concatenated slices
589fn sha512_2(a: &[u8], b: &[u8]) -> [u8; 64] {
590    let mut combined = Vec::with_capacity(a.len() + b.len());
591    combined.extend_from_slice(a);
592    combined.extend_from_slice(b);
593    sha512(&combined)
594}
595
596/// SHA-512 of three concatenated slices
597fn sha512_3(a: &[u8], b: &[u8], c: &[u8]) -> [u8; 64] {
598    let mut combined = Vec::with_capacity(a.len() + b.len() + c.len());
599    combined.extend_from_slice(a);
600    combined.extend_from_slice(b);
601    combined.extend_from_slice(c);
602    sha512(&combined)
603}
604
605/// Reduce a 512-bit scalar modulo L (the Ed25519 group order)
606/// L = 2^252 + 27742317777372353535851937790883648493
607///
608/// Uses the reference Ed25519 reduction approach: load the 64-byte input into
609/// 24 limbs of 21 bits each, then reduce from the top by subtracting
610/// multiples of L.
611fn sc_reduce(input: &[u8; 64]) -> [u8; 32] {
612    // Load input into 21-bit limbs (base 2^21)
613    let mut s = [0i64; 24];
614    s[0] = 2097151 & load_3i(&input[0..3]);
615    s[1] = 2097151 & (load_4i(&input[2..6]) >> 5);
616    s[2] = 2097151 & (load_3i(&input[5..8]) >> 2);
617    s[3] = 2097151 & (load_4i(&input[7..11]) >> 7);
618    s[4] = 2097151 & (load_4i(&input[10..14]) >> 4);
619    s[5] = 2097151 & (load_3i(&input[13..16]) >> 1);
620    s[6] = 2097151 & (load_4i(&input[15..19]) >> 6);
621    s[7] = 2097151 & (load_3i(&input[18..21]) >> 3);
622    s[8] = 2097151 & load_3i(&input[21..24]);
623    s[9] = 2097151 & (load_4i(&input[23..27]) >> 5);
624    s[10] = 2097151 & (load_3i(&input[26..29]) >> 2);
625    s[11] = 2097151 & (load_4i(&input[28..32]) >> 7);
626    s[12] = 2097151 & (load_4i(&input[31..35]) >> 4);
627    s[13] = 2097151 & (load_3i(&input[34..37]) >> 1);
628    s[14] = 2097151 & (load_4i(&input[36..40]) >> 6);
629    s[15] = 2097151 & (load_3i(&input[39..42]) >> 3);
630    s[16] = 2097151 & load_3i(&input[42..45]);
631    s[17] = 2097151 & (load_4i(&input[44..48]) >> 5);
632    s[18] = 2097151 & (load_3i(&input[47..50]) >> 2);
633    s[19] = 2097151 & (load_4i(&input[49..53]) >> 7);
634    s[20] = 2097151 & (load_4i(&input[52..56]) >> 4);
635    s[21] = 2097151 & (load_3i(&input[55..58]) >> 1);
636    s[22] = 2097151 & (load_4i(&input[57..61]) >> 6);
637    s[23] = load_4i(&input[60..64]) >> 3;
638
639    // Reduce using the reference implementation approach
640    // s[i] -= s[j] * L[i-j] for j > 11
641    sc_muladd_reduce(&mut s);
642
643    // Pack result
644    let mut result = [0u8; 32];
645    result[0] = s[0] as u8;
646    result[1] = (s[0] >> 8) as u8;
647    result[2] = ((s[0] >> 16) | (s[1] << 5)) as u8;
648    result[3] = (s[1] >> 3) as u8;
649    result[4] = (s[1] >> 11) as u8;
650    result[5] = ((s[1] >> 19) | (s[2] << 2)) as u8;
651    result[6] = (s[2] >> 6) as u8;
652    result[7] = ((s[2] >> 14) | (s[3] << 7)) as u8;
653    result[8] = (s[3] >> 1) as u8;
654    result[9] = (s[3] >> 9) as u8;
655    result[10] = ((s[3] >> 17) | (s[4] << 4)) as u8;
656    result[11] = (s[4] >> 4) as u8;
657    result[12] = (s[4] >> 12) as u8;
658    result[13] = ((s[4] >> 20) | (s[5] << 1)) as u8;
659    result[14] = (s[5] >> 7) as u8;
660    result[15] = ((s[5] >> 15) | (s[6] << 6)) as u8;
661    result[16] = (s[6] >> 2) as u8;
662    result[17] = (s[6] >> 10) as u8;
663    result[18] = ((s[6] >> 18) | (s[7] << 3)) as u8;
664    result[19] = (s[7] >> 5) as u8;
665    result[20] = (s[7] >> 13) as u8;
666    result[21] = s[8] as u8;
667    result[22] = (s[8] >> 8) as u8;
668    result[23] = ((s[8] >> 16) | (s[9] << 5)) as u8;
669    result[24] = (s[9] >> 3) as u8;
670    result[25] = (s[9] >> 11) as u8;
671    result[26] = ((s[9] >> 19) | (s[10] << 2)) as u8;
672    result[27] = (s[10] >> 6) as u8;
673    result[28] = ((s[10] >> 14) | (s[11] << 7)) as u8;
674    result[29] = (s[11] >> 1) as u8;
675    result[30] = (s[11] >> 9) as u8;
676    result[31] = (s[11] >> 17) as u8;
677
678    result
679}
680
681/// Load 3 bytes as i64 (little-endian)
682fn load_3i(s: &[u8]) -> i64 {
683    (s[0] as i64) | ((s[1] as i64) << 8) | ((s[2] as i64) << 16)
684}
685
686/// Load 4 bytes as i64 (little-endian)
687fn load_4i(s: &[u8]) -> i64 {
688    (s[0] as i64) | ((s[1] as i64) << 8) | ((s[2] as i64) << 16) | ((s[3] as i64) << 24)
689}
690
691/// Scalar reduction helper: reduce s[0..23] mod L
692/// L = 2^252 + 27742317777372353535851937790883648493
693///
694/// In base 2^21:
695/// L = [0x1cf5d3ed, 0x009318d2, 0x1de73596, 0x1df3b45c,
696///      0x0000014d, 0, 0, 0, 0, 0, 0, 0x00200000]
697///
698/// We use the standard Ed25519 reference reduction approach.
699fn sc_muladd_reduce(s: &mut [i64; 24]) {
700    // L in base 2^21 limbs
701    // L[0..11] = specific values, L[12] = 2^0 (since 2^(21*12) = 2^252)
702    // The L coefficients for the reduction:
703    const L0: i64 = 666643;
704    const L1: i64 = 470296;
705    const L2: i64 = 654183;
706    const L3: i64 = -997805;
707    const L4: i64 = 136657;
708    const L5: i64 = -683901;
709
710    // Reduce from s[23] down to s[12]
711    // s[23] contributes at index 23, which is 2^(21*23) = 2^(252+231)
712    // We subtract s[23] * L shifted by 11 positions
713
714    s[11] += s[23] * L0;
715    s[12] += s[23] * L1;
716    s[13] += s[23] * L2;
717    s[14] += s[23] * L3;
718    s[15] += s[23] * L4;
719    s[16] += s[23] * L5;
720    s[23] = 0;
721
722    s[10] += s[22] * L0;
723    s[11] += s[22] * L1;
724    s[12] += s[22] * L2;
725    s[13] += s[22] * L3;
726    s[14] += s[22] * L4;
727    s[15] += s[22] * L5;
728    s[22] = 0;
729
730    s[9] += s[21] * L0;
731    s[10] += s[21] * L1;
732    s[11] += s[21] * L2;
733    s[12] += s[21] * L3;
734    s[13] += s[21] * L4;
735    s[14] += s[21] * L5;
736    s[21] = 0;
737
738    s[8] += s[20] * L0;
739    s[9] += s[20] * L1;
740    s[10] += s[20] * L2;
741    s[11] += s[20] * L3;
742    s[12] += s[20] * L4;
743    s[13] += s[20] * L5;
744    s[20] = 0;
745
746    s[7] += s[19] * L0;
747    s[8] += s[19] * L1;
748    s[9] += s[19] * L2;
749    s[10] += s[19] * L3;
750    s[11] += s[19] * L4;
751    s[12] += s[19] * L5;
752    s[19] = 0;
753
754    s[6] += s[18] * L0;
755    s[7] += s[18] * L1;
756    s[8] += s[18] * L2;
757    s[9] += s[18] * L3;
758    s[10] += s[18] * L4;
759    s[11] += s[18] * L5;
760    s[18] = 0;
761
762    // Carry
763    let mut carry: i64;
764    let mut i = 6;
765    while i < 18 {
766        carry = (s[i] + (1 << 20)) >> 21;
767        s[i + 1] += carry;
768        s[i] -= carry << 21;
769        i += 1;
770    }
771
772    // Second round of reduction for s[12..17]
773    s[0] += s[12] * L0;
774    s[1] += s[12] * L1;
775    s[2] += s[12] * L2;
776    s[3] += s[12] * L3;
777    s[4] += s[12] * L4;
778    s[5] += s[12] * L5;
779    s[12] = 0;
780
781    s[1] += s[13] * L0;
782    s[2] += s[13] * L1;
783    s[3] += s[13] * L2;
784    s[4] += s[13] * L3;
785    s[5] += s[13] * L4;
786    s[6] += s[13] * L5;
787    s[13] = 0;
788
789    s[2] += s[14] * L0;
790    s[3] += s[14] * L1;
791    s[4] += s[14] * L2;
792    s[5] += s[14] * L3;
793    s[6] += s[14] * L4;
794    s[7] += s[14] * L5;
795    s[14] = 0;
796
797    s[3] += s[15] * L0;
798    s[4] += s[15] * L1;
799    s[5] += s[15] * L2;
800    s[6] += s[15] * L3;
801    s[7] += s[15] * L4;
802    s[8] += s[15] * L5;
803    s[15] = 0;
804
805    s[4] += s[16] * L0;
806    s[5] += s[16] * L1;
807    s[6] += s[16] * L2;
808    s[7] += s[16] * L3;
809    s[8] += s[16] * L4;
810    s[9] += s[16] * L5;
811    s[16] = 0;
812
813    s[5] += s[17] * L0;
814    s[6] += s[17] * L1;
815    s[7] += s[17] * L2;
816    s[8] += s[17] * L3;
817    s[9] += s[17] * L4;
818    s[10] += s[17] * L5;
819    s[17] = 0;
820
821    // Final carries
822    i = 0;
823    while i < 12 {
824        carry = (s[i] + (1 << 20)) >> 21;
825        s[i + 1] += carry;
826        s[i] -= carry << 21;
827        i += 1;
828    }
829
830    // One more reduction pass if needed
831    s[0] += s[12] * L0;
832    s[1] += s[12] * L1;
833    s[2] += s[12] * L2;
834    s[3] += s[12] * L3;
835    s[4] += s[12] * L4;
836    s[5] += s[12] * L5;
837    s[12] = 0;
838
839    i = 0;
840    while i < 12 {
841        carry = s[i] >> 21;
842        s[i + 1] += carry;
843        s[i] -= carry << 21;
844        i += 1;
845    }
846}
847
848// ============================================================================
849// Public API (same signatures as the stubs)
850// ============================================================================
851
852/// Signing key (private key)
853pub(crate) struct SigningKey {
854    bytes: [u8; 32],
855}
856
857/// Verifying key (public key)
858pub(crate) struct VerifyingKey {
859    bytes: [u8; 32],
860}
861
862/// Cryptographic signature
863pub(crate) struct Signature {
864    bytes: [u8; 64],
865}
866
867/// Key pair (signing + verifying keys)
868pub(crate) struct KeyPair {
869    pub(crate) signing_key: SigningKey,
870    pub(crate) verifying_key: VerifyingKey,
871}
872
873impl SigningKey {
874    /// Create signing key from bytes
875    pub(crate) fn from_bytes(bytes: &[u8]) -> CryptoResult<Self> {
876        if bytes.len() != 32 {
877            return Err(CryptoError::InvalidKeySize);
878        }
879
880        let mut key_bytes = [0u8; 32];
881        key_bytes.copy_from_slice(bytes);
882
883        Ok(Self { bytes: key_bytes })
884    }
885
886    /// Get key bytes
887    pub(crate) fn as_bytes(&self) -> &[u8; 32] {
888        &self.bytes
889    }
890
891    /// Sign a message using Ed25519 (RFC 8032 Section 5.1.6)
892    pub(crate) fn sign(&self, message: &[u8]) -> CryptoResult<Signature> {
893        // Step 1: Hash the private key
894        let h = sha512(&self.bytes);
895
896        // Step 2: Derive the scalar a from first half of hash
897        let mut a = [0u8; 32];
898        a.copy_from_slice(&h[..32]);
899        a[0] &= 248; // Clear low 3 bits
900        a[31] &= 127; // Clear high bit
901        a[31] |= 64; // Set bit 254
902
903        // Step 3: Compute public key A = a * B
904        let bp = EdPoint::basepoint();
905        let big_a = bp.scalar_mul(&a);
906        let a_bytes = big_a.encode();
907
908        // Step 4: Compute nonce r = SHA-512(h[32..64] || message) mod L
909        let nonce_hash = sha512_2(&h[32..64], message);
910        let r = sc_reduce(&nonce_hash);
911
912        // Step 5: Compute R = r * B
913        let big_r = bp.scalar_mul(&r);
914        let r_bytes = big_r.encode();
915
916        // Step 6: Compute S = (r + SHA-512(R || A || message) * a) mod L
917        let k_hash = sha512_3(&r_bytes, &a_bytes, message);
918        let k = sc_reduce(&k_hash);
919
920        // S = r + k * a mod L
921        let s = sc_muladd(&k, &a, &r);
922
923        // Build signature: R || S
924        let mut sig_bytes = [0u8; 64];
925        sig_bytes[..32].copy_from_slice(&r_bytes);
926        sig_bytes[32..].copy_from_slice(&s);
927
928        Ok(Signature { bytes: sig_bytes })
929    }
930}
931
932impl VerifyingKey {
933    /// Create verifying key from bytes
934    pub(crate) fn from_bytes(bytes: &[u8]) -> CryptoResult<Self> {
935        if bytes.len() != 32 {
936            return Err(CryptoError::InvalidKeySize);
937        }
938
939        let mut key_bytes = [0u8; 32];
940        key_bytes.copy_from_slice(bytes);
941
942        Ok(Self { bytes: key_bytes })
943    }
944
945    /// Get key bytes
946    pub(crate) fn as_bytes(&self) -> &[u8; 32] {
947        &self.bytes
948    }
949
950    /// Verify a signature using Ed25519 (RFC 8032 Section 5.1.7)
951    pub(crate) fn verify(&self, message: &[u8], signature: &Signature) -> CryptoResult<bool> {
952        // Parse R and S from signature
953        let r_bytes: [u8; 32] = {
954            let mut arr = [0u8; 32];
955            arr.copy_from_slice(&signature.bytes[..32]);
956            arr
957        };
958        let s_bytes: [u8; 32] = {
959            let mut arr = [0u8; 32];
960            arr.copy_from_slice(&signature.bytes[32..]);
961            arr
962        };
963
964        // Check S < L (group order)
965        // S must be less than L = 2^252 + ...
966        // Simple check: top byte must have bit 252 clear (approx)
967        if s_bytes[31] & 0xf0 != 0 {
968            return Ok(false);
969        }
970
971        // Decode R
972        let big_r = match EdPoint::decode(&r_bytes) {
973            Some(p) => p,
974            None => return Ok(false),
975        };
976
977        // Decode A (public key)
978        let big_a = match EdPoint::decode(&self.bytes) {
979            Some(p) => p,
980            None => return Ok(false),
981        };
982
983        // Compute k = SHA-512(R || A || message) mod L
984        let k_hash = sha512_3(&r_bytes, &self.bytes, message);
985        let k = sc_reduce(&k_hash);
986
987        // Check: S * B == R + k * A
988        let bp = EdPoint::basepoint();
989        let sb = bp.scalar_mul(&s_bytes);
990        let ka = big_a.scalar_mul(&k);
991        let rhs = big_r.add(&ka);
992
993        // Compare encoded points
994        let lhs_enc = sb.encode();
995        let rhs_enc = rhs.encode();
996
997        Ok(lhs_enc == rhs_enc)
998    }
999}
1000
1001impl Signature {
1002    /// Create signature from bytes
1003    pub(crate) fn from_bytes(bytes: &[u8]) -> CryptoResult<Self> {
1004        if bytes.len() != 64 {
1005            return Err(CryptoError::InvalidKey);
1006        }
1007
1008        let mut sig_bytes = [0u8; 64];
1009        sig_bytes.copy_from_slice(bytes);
1010
1011        Ok(Self { bytes: sig_bytes })
1012    }
1013
1014    /// Get signature bytes
1015    pub(crate) fn as_bytes(&self) -> &[u8; 64] {
1016        &self.bytes
1017    }
1018
1019    /// Convert to vector
1020    pub(crate) fn to_vec(&self) -> Vec<u8> {
1021        self.bytes.to_vec()
1022    }
1023}
1024
1025impl KeyPair {
1026    /// Generate new key pair
1027    pub(crate) fn generate() -> CryptoResult<Self> {
1028        use super::random::get_random;
1029
1030        let rng = get_random();
1031        let mut seed = [0u8; 32];
1032        rng.fill_bytes(&mut seed)?;
1033
1034        Self::from_seed(&seed)
1035    }
1036
1037    /// Create key pair from seed using Ed25519 key derivation
1038    pub(crate) fn from_seed(seed: &[u8; 32]) -> CryptoResult<Self> {
1039        let signing_key = SigningKey { bytes: *seed };
1040
1041        // Derive public key: hash seed, clamp, scalar multiply by basepoint
1042        let h = sha512(seed);
1043        let mut a = [0u8; 32];
1044        a.copy_from_slice(&h[..32]);
1045        a[0] &= 248;
1046        a[31] &= 127;
1047        a[31] |= 64;
1048
1049        let bp = EdPoint::basepoint();
1050        let public_point = bp.scalar_mul(&a);
1051        let pub_key_bytes = public_point.encode();
1052
1053        let verifying_key = VerifyingKey {
1054            bytes: pub_key_bytes,
1055        };
1056
1057        Ok(Self {
1058            signing_key,
1059            verifying_key,
1060        })
1061    }
1062
1063    /// Sign a message with this key pair
1064    pub(crate) fn sign(&self, message: &[u8]) -> CryptoResult<Signature> {
1065        self.signing_key.sign(message)
1066    }
1067
1068    /// Verify a signature with this key pair
1069    pub(crate) fn verify(&self, message: &[u8], signature: &Signature) -> CryptoResult<bool> {
1070        self.verifying_key.verify(message, signature)
1071    }
1072}
1073
1074/// Scalar multiply-add: compute (a * b + c) mod L
1075/// Where L is the Ed25519 group order
1076fn sc_muladd(a: &[u8; 32], b: &[u8; 32], c: &[u8; 32]) -> [u8; 32] {
1077    // Load a, b, c into 21-bit limbs
1078    let a0 = 2097151 & load_3i(&a[0..3]);
1079    let a1 = 2097151 & (load_4i(&a[2..6]) >> 5);
1080    let a2 = 2097151 & (load_3i(&a[5..8]) >> 2);
1081    let a3 = 2097151 & (load_4i(&a[7..11]) >> 7);
1082    let a4 = 2097151 & (load_4i(&a[10..14]) >> 4);
1083    let a5 = 2097151 & (load_3i(&a[13..16]) >> 1);
1084    let a6 = 2097151 & (load_4i(&a[15..19]) >> 6);
1085    let a7 = 2097151 & (load_3i(&a[18..21]) >> 3);
1086    let a8 = 2097151 & load_3i(&a[21..24]);
1087    let a9 = 2097151 & (load_4i(&a[23..27]) >> 5);
1088    let a10 = 2097151 & (load_3i(&a[26..29]) >> 2);
1089    let a11 = load_4i(&a[28..32]) >> 7;
1090
1091    let b0 = 2097151 & load_3i(&b[0..3]);
1092    let b1 = 2097151 & (load_4i(&b[2..6]) >> 5);
1093    let b2 = 2097151 & (load_3i(&b[5..8]) >> 2);
1094    let b3 = 2097151 & (load_4i(&b[7..11]) >> 7);
1095    let b4 = 2097151 & (load_4i(&b[10..14]) >> 4);
1096    let b5 = 2097151 & (load_3i(&b[13..16]) >> 1);
1097    let b6 = 2097151 & (load_4i(&b[15..19]) >> 6);
1098    let b7 = 2097151 & (load_3i(&b[18..21]) >> 3);
1099    let b8 = 2097151 & load_3i(&b[21..24]);
1100    let b9 = 2097151 & (load_4i(&b[23..27]) >> 5);
1101    let b10 = 2097151 & (load_3i(&b[26..29]) >> 2);
1102    let b11 = load_4i(&b[28..32]) >> 7;
1103
1104    let c0 = 2097151 & load_3i(&c[0..3]);
1105    let c1 = 2097151 & (load_4i(&c[2..6]) >> 5);
1106    let c2 = 2097151 & (load_3i(&c[5..8]) >> 2);
1107    let c3 = 2097151 & (load_4i(&c[7..11]) >> 7);
1108    let c4 = 2097151 & (load_4i(&c[10..14]) >> 4);
1109    let c5 = 2097151 & (load_3i(&c[13..16]) >> 1);
1110    let c6 = 2097151 & (load_4i(&c[15..19]) >> 6);
1111    let c7 = 2097151 & (load_3i(&c[18..21]) >> 3);
1112    let c8 = 2097151 & load_3i(&c[21..24]);
1113    let c9 = 2097151 & (load_4i(&c[23..27]) >> 5);
1114    let c10 = 2097151 & (load_3i(&c[26..29]) >> 2);
1115    let c11 = load_4i(&c[28..32]) >> 7;
1116
1117    // Compute s = a*b + c using schoolbook multiplication in 21-bit limbs
1118    let mut s = [0i64; 24];
1119    s[0] = c0 + a0 * b0;
1120    s[1] = c1 + a0 * b1 + a1 * b0;
1121    s[2] = c2 + a0 * b2 + a1 * b1 + a2 * b0;
1122    s[3] = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
1123    s[4] = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
1124    s[5] = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
1125    s[6] = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
1126    s[7] = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
1127    s[8] = c8
1128        + a0 * b8
1129        + a1 * b7
1130        + a2 * b6
1131        + a3 * b5
1132        + a4 * b4
1133        + a5 * b3
1134        + a6 * b2
1135        + a7 * b1
1136        + a8 * b0;
1137    s[9] = c9
1138        + a0 * b9
1139        + a1 * b8
1140        + a2 * b7
1141        + a3 * b6
1142        + a4 * b5
1143        + a5 * b4
1144        + a6 * b3
1145        + a7 * b2
1146        + a8 * b1
1147        + a9 * b0;
1148    s[10] = c10
1149        + a0 * b10
1150        + a1 * b9
1151        + a2 * b8
1152        + a3 * b7
1153        + a4 * b6
1154        + a5 * b5
1155        + a6 * b4
1156        + a7 * b3
1157        + a8 * b2
1158        + a9 * b1
1159        + a10 * b0;
1160    s[11] = c11
1161        + a0 * b11
1162        + a1 * b10
1163        + a2 * b9
1164        + a3 * b8
1165        + a4 * b7
1166        + a5 * b6
1167        + a6 * b5
1168        + a7 * b4
1169        + a8 * b3
1170        + a9 * b2
1171        + a10 * b1
1172        + a11 * b0;
1173    s[12] = a1 * b11
1174        + a2 * b10
1175        + a3 * b9
1176        + a4 * b8
1177        + a5 * b7
1178        + a6 * b6
1179        + a7 * b5
1180        + a8 * b4
1181        + a9 * b3
1182        + a10 * b2
1183        + a11 * b1;
1184    s[13] = a2 * b11
1185        + a3 * b10
1186        + a4 * b9
1187        + a5 * b8
1188        + a6 * b7
1189        + a7 * b6
1190        + a8 * b5
1191        + a9 * b4
1192        + a10 * b3
1193        + a11 * b2;
1194    s[14] =
1195        a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4 + a11 * b3;
1196    s[15] = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4;
1197    s[16] = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
1198    s[17] = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
1199    s[18] = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
1200    s[19] = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
1201    s[20] = a9 * b11 + a10 * b10 + a11 * b9;
1202    s[21] = a10 * b11 + a11 * b10;
1203    s[22] = a11 * b11;
1204    s[23] = 0;
1205
1206    // Reduce mod L
1207    sc_muladd_reduce(&mut s);
1208
1209    // Pack result
1210    let mut result = [0u8; 32];
1211    result[0] = s[0] as u8;
1212    result[1] = (s[0] >> 8) as u8;
1213    result[2] = ((s[0] >> 16) | (s[1] << 5)) as u8;
1214    result[3] = (s[1] >> 3) as u8;
1215    result[4] = (s[1] >> 11) as u8;
1216    result[5] = ((s[1] >> 19) | (s[2] << 2)) as u8;
1217    result[6] = (s[2] >> 6) as u8;
1218    result[7] = ((s[2] >> 14) | (s[3] << 7)) as u8;
1219    result[8] = (s[3] >> 1) as u8;
1220    result[9] = (s[3] >> 9) as u8;
1221    result[10] = ((s[3] >> 17) | (s[4] << 4)) as u8;
1222    result[11] = (s[4] >> 4) as u8;
1223    result[12] = (s[4] >> 12) as u8;
1224    result[13] = ((s[4] >> 20) | (s[5] << 1)) as u8;
1225    result[14] = (s[5] >> 7) as u8;
1226    result[15] = ((s[5] >> 15) | (s[6] << 6)) as u8;
1227    result[16] = (s[6] >> 2) as u8;
1228    result[17] = (s[6] >> 10) as u8;
1229    result[18] = ((s[6] >> 18) | (s[7] << 3)) as u8;
1230    result[19] = (s[7] >> 5) as u8;
1231    result[20] = (s[7] >> 13) as u8;
1232    result[21] = s[8] as u8;
1233    result[22] = (s[8] >> 8) as u8;
1234    result[23] = ((s[8] >> 16) | (s[9] << 5)) as u8;
1235    result[24] = (s[9] >> 3) as u8;
1236    result[25] = (s[9] >> 11) as u8;
1237    result[26] = ((s[9] >> 19) | (s[10] << 2)) as u8;
1238    result[27] = (s[10] >> 6) as u8;
1239    result[28] = ((s[10] >> 14) | (s[11] << 7)) as u8;
1240    result[29] = (s[11] >> 1) as u8;
1241    result[30] = (s[11] >> 9) as u8;
1242    result[31] = (s[11] >> 17) as u8;
1243
1244    result
1245}
1246
1247// ============================================================================
1248// X25519 Key Exchange (RFC 7748)
1249// ============================================================================
1250
1251/// X25519 scalar multiplication on the Montgomery form of Curve25519
1252/// Uses the Montgomery ladder for constant-time computation
1253pub(crate) fn x25519_scalar_mult(scalar: &[u8; 32], u_point: &[u8; 32]) -> [u8; 32] {
1254    // Clamp scalar per RFC 7748 Section 5
1255    let mut k = *scalar;
1256    k[0] &= 248;
1257    k[31] &= 127;
1258    k[31] |= 64;
1259
1260    // Decode u-coordinate (clear top bit)
1261    let mut u_bytes = *u_point;
1262    u_bytes[31] &= 0x7f;
1263    let u = Fe::from_bytes(&u_bytes);
1264
1265    // Montgomery ladder
1266    let x_1 = u;
1267    let mut x_2 = Fe::ONE;
1268    let mut z_2 = Fe::ZERO;
1269    let mut x_3 = u;
1270    let mut z_3 = Fe::ONE;
1271    let mut swap: u64 = 0;
1272
1273    let a24 = Fe([121666, 0, 0, 0, 0]); // (A+2)/4 where A=486662
1274
1275    let mut t: i32 = 254;
1276    while t >= 0 {
1277        let k_t = ((k[(t >> 3) as usize] >> (t & 7)) & 1) as u64;
1278        swap ^= k_t;
1279        x_2.cswap(&mut x_3, swap);
1280        z_2.cswap(&mut z_3, swap);
1281        swap = k_t;
1282
1283        let a = x_2.add(&z_2);
1284        let aa = a.square();
1285        let b = x_2.sub(&z_2);
1286        let bb = b.square();
1287        let e = aa.sub(&bb);
1288        let c = x_3.add(&z_3);
1289        let d = x_3.sub(&z_3);
1290        let da = d.mul(&a);
1291        let cb = c.mul(&b);
1292        x_3 = da.add(&cb).square();
1293        z_3 = x_1.mul(&da.sub(&cb).square());
1294        x_2 = aa.mul(&bb);
1295        z_2 = e.mul(&aa.add(&a24.mul(&e)));
1296
1297        t -= 1;
1298    }
1299
1300    x_2.cswap(&mut x_3, swap);
1301    z_2.cswap(&mut z_3, swap);
1302
1303    // Return x_2 / z_2
1304    let result = x_2.mul(&z_2.invert());
1305    result.to_bytes()
1306}
1307
1308/// X25519 base point (u=9)
1309pub(crate) const X25519_BASEPOINT: [u8; 32] = {
1310    let mut b = [0u8; 32];
1311    b[0] = 9;
1312    b
1313};
1314
1315/// X25519 key exchange
1316pub mod key_exchange {
1317    use super::CryptoResult;
1318
1319    /// Public key for key exchange
1320    pub struct PublicKey {
1321        bytes: [u8; 32],
1322    }
1323
1324    /// Secret key for key exchange
1325    pub struct SecretKey {
1326        bytes: [u8; 32],
1327    }
1328
1329    /// Shared secret
1330    pub struct SharedSecret {
1331        bytes: [u8; 32],
1332    }
1333
1334    impl PublicKey {
1335        /// Create from bytes
1336        pub(crate) fn from_bytes(bytes: &[u8; 32]) -> Self {
1337            Self { bytes: *bytes }
1338        }
1339
1340        /// Get bytes
1341        pub(crate) fn as_bytes(&self) -> &[u8; 32] {
1342            &self.bytes
1343        }
1344    }
1345
1346    impl SecretKey {
1347        /// Generate new secret key
1348        pub(crate) fn generate() -> CryptoResult<Self> {
1349            use super::super::random::get_random;
1350
1351            let rng = get_random();
1352            let mut bytes = [0u8; 32];
1353            rng.fill_bytes(&mut bytes)?;
1354
1355            Ok(Self { bytes })
1356        }
1357
1358        /// Get corresponding public key using X25519
1359        /// public_key = scalar_mult(secret, basepoint)
1360        pub(crate) fn public_key(&self) -> PublicKey {
1361            let pub_bytes = super::x25519_scalar_mult(&self.bytes, &super::X25519_BASEPOINT);
1362            PublicKey { bytes: pub_bytes }
1363        }
1364
1365        /// Perform X25519 key exchange
1366        /// shared_secret = scalar_mult(my_secret, their_public)
1367        pub(crate) fn exchange(&self, their_public: &PublicKey) -> CryptoResult<SharedSecret> {
1368            let shared = super::x25519_scalar_mult(&self.bytes, &their_public.bytes);
1369
1370            // Check for all-zero output (low-order point)
1371            let mut is_zero = true;
1372            let mut i = 0;
1373            while i < 32 {
1374                if shared[i] != 0 {
1375                    is_zero = false;
1376                }
1377                i += 1;
1378            }
1379            if is_zero {
1380                return Err(super::CryptoError::InvalidKey);
1381            }
1382
1383            Ok(SharedSecret { bytes: shared })
1384        }
1385    }
1386
1387    impl SharedSecret {
1388        /// Get shared secret bytes
1389        pub(crate) fn as_bytes(&self) -> &[u8; 32] {
1390            &self.bytes
1391        }
1392    }
1393}
1394
1395// Asymmetric crypto tests require bare-metal execution: the custom
1396// Ed25519/X25519 scalar reduction uses i64 arithmetic that can overflow in
1397// debug builds on non-reduced random inputs, and the results are only
1398// guaranteed correct with properly bounded scalar inputs produced by the kernel
1399// PRNG.
1400#[cfg(all(test, target_os = "none"))]
1401mod tests {
1402    use super::*;
1403
1404    #[test]
1405    fn test_keypair_generation() {
1406        let keypair = KeyPair::generate().unwrap();
1407        let message = b"Hello, VeridianOS!";
1408
1409        let signature = keypair.sign(message).unwrap();
1410        let verified = keypair.verify(message, &signature).unwrap();
1411
1412        assert!(verified);
1413    }
1414
1415    #[test]
1416    fn test_key_exchange() {
1417        use key_exchange::*;
1418
1419        let alice_secret = SecretKey::generate().unwrap();
1420        let alice_public = alice_secret.public_key();
1421
1422        let bob_secret = SecretKey::generate().unwrap();
1423        let bob_public = bob_secret.public_key();
1424
1425        let alice_shared = alice_secret.exchange(&bob_public).unwrap();
1426        let bob_shared = bob_secret.exchange(&alice_public).unwrap();
1427
1428        // Shared secrets should match
1429        assert_eq!(alice_shared.as_bytes(), bob_shared.as_bytes());
1430    }
1431}