tola_caps/primitives/const_utils.rs
1//! Const evaluation utilities
2
3use core::marker::PhantomData;
4use super::bool::{Bool, Present, Absent};
5
6/// Get string length in a const context
7pub const fn str_len(s: &str) -> usize {
8 s.len()
9}
10
11/// Compare two strings for equality in a const context
12pub const fn str_eq(a: &str, b: &str) -> bool {
13 let a = a.as_bytes();
14 let b = b.as_bytes();
15 if a.len() != b.len() {
16 return false;
17 }
18 let mut i = 0;
19 while i < a.len() {
20 if a[i] != b[i] {
21 return false;
22 }
23 i += 1;
24 }
25 true
26}
27
28/// FNV-1a 64-bit Hash for strings (const fn)
29pub const fn fnv1a_64_str(s: &str) -> u64 {
30 let bytes = s.as_bytes();
31 let mut hash: u64 = 0xcbf29ce484222325;
32 let mut i = 0;
33 while i < bytes.len() {
34 hash ^= bytes[i] as u64;
35 hash = hash.wrapping_mul(0x100000001b3);
36 i += 1;
37 }
38 hash
39}
40
41// =============================================================================
42// 512-bit Hash (4 × 128-bit)
43// =============================================================================
44//
45// Cascaded hashing with different seeds for collision resistance.
46
47/// FNV-1a 128-bit with custom seed
48pub const fn fnv1a_128_seeded(s: &str, seed: u128) -> u128 {
49 let bytes = s.as_bytes();
50 let prime: u128 = 309485009821345068724781371;
51 let mut hash: u128 = seed;
52 let mut i = 0;
53 while i < bytes.len() {
54 hash = hash ^ (bytes[i] as u128);
55 hash = hash.wrapping_mul(prime);
56 i += 1;
57 }
58 hash
59}
60
61/// Generate 512-bit hash as 4 × u128
62/// Each component uses different seed to ensure independence
63pub const fn hash_512_h0(s: &str) -> u128 {
64 fnv1a_128_seeded(s, 0xcbf29ce484222325cbf29ce484222325)
65}
66
67pub const fn hash_512_h1(s: &str) -> u128 {
68 let h0 = hash_512_h0(s);
69 fnv1a_128_seeded(s, 0x100000001b3100000001b3 ^ h0.rotate_left(64))
70}
71
72pub const fn hash_512_h2(s: &str) -> u128 {
73 let h1 = hash_512_h1(s);
74 fnv1a_128_seeded(s, 0x84222325cbf29ce484222325cbf29ce4 ^ h1.rotate_left(32))
75}
76
77pub const fn hash_512_h3(s: &str) -> u128 {
78 let h2 = hash_512_h2(s);
79 fnv1a_128_seeded(s, 0x1b3100000001b310000000100000001 ^ h2.rotate_left(96))
80}
81
82/// Extract nibble N (0-127) from 512-bit hash
83pub const fn hash_512_nibble(s: &str, n: usize) -> u8 {
84 let (hash, shift) = if n < 32 {
85 (hash_512_h0(s), n * 4)
86 } else if n < 64 {
87 (hash_512_h1(s), (n - 32) * 4)
88 } else if n < 96 {
89 (hash_512_h2(s), (n - 64) * 4)
90 } else {
91 (hash_512_h3(s), (n - 96) * 4)
92 };
93 ((hash >> shift) & 0xF) as u8
94}
95
96/// Extract nibble N (0-15) from 64-bit FNV-1a hash
97/// Used for HashStream16 generation from module paths
98pub const fn hash_nibble(s: &str, n: u8) -> u8 {
99 let hash = fnv1a_64_str(s);
100 ((hash >> (n * 4)) & 0xF) as u8
101}
102
103// =============================================================================
104// Raw Byte → Nibble extraction
105// =============================================================================
106//
107// Extract nibbles directly from string bytes without hashing.
108
109/// Get nibble at position N from string
110/// N=0 → high nibble of byte 0, N=1 → low nibble of byte 0, etc.
111pub const fn get_nibble(s: &str, n: u8) -> u8 {
112 let bytes = s.as_bytes();
113 let byte_idx = (n / 2) as usize;
114 let is_high = n.is_multiple_of(2);
115
116 if byte_idx < bytes.len() {
117 if is_high {
118 (bytes[byte_idx] >> 4) & 0xF
119 } else {
120 bytes[byte_idx] & 0xF
121 }
122 } else {
123 0
124 }
125}
126
127// =============================================================================
128// Raw Byte Packing
129// =============================================================================
130//
131// Pack string bytes directly into u128 without hashing.
132
133/// Pack bytes [offset, offset+16) of string into u128
134/// Pads with 0 if string is shorter
135pub const fn pack_bytes_u128(s: &str, offset: usize) -> u128 {
136 let bytes = s.as_bytes();
137 let mut result: u128 = 0;
138 let mut i = 0;
139 while i < 16 {
140 let idx = offset + i;
141 let byte = if idx < bytes.len() { bytes[idx] } else { 0 };
142 result |= (byte as u128) << (i * 8);
143 i += 1;
144 }
145 result
146}
147
148/// Pack first 64 bytes of string into 4 × u128
149/// For ByteStream<C0, C1, C2, C3>
150pub const fn pack_c0(s: &str) -> u128 { pack_bytes_u128(s, 0) }
151pub const fn pack_c1(s: &str) -> u128 { pack_bytes_u128(s, 16) }
152pub const fn pack_c2(s: &str) -> u128 { pack_bytes_u128(s, 32) }
153pub const fn pack_c3(s: &str) -> u128 { pack_bytes_u128(s, 48) }
154
155/// FNV-1a 128-bit Hash
156///
157/// Parameters from FNV spec:
158/// offset_basis: 144066263297769815596495629667062367629
159/// prime: 309485009821345068724781371
160pub const fn fnv1a_128(input: &[u8]) -> u128 {
161 let prime: u128 = 309485009821345068724781371;
162 let mut hash: u128 = 144066263297769815596495629667062367629;
163
164 let mut i = 0;
165 while i < input.len() {
166 hash = hash ^ (input[i] as u128);
167 hash = hash.wrapping_mul(prime);
168 i += 1;
169 }
170 hash
171}
172
173/// FNV-1a 128-bit Hash for strings (const fn)
174pub const fn fnv1a_128_str(s: &str) -> u128 {
175 fnv1a_128(s.as_bytes())
176}
177
178/// Concatenate path and name and hash them
179pub const fn hash_path_name(path: &str, name: &str) -> u128 {
180 // We can't allocate a string, so we must feed bytes sequentially to hash.
181 // Re-impl hashing to handle two slices.
182 let prime: u128 = 309485009821345068724781371;
183 let mut hash: u128 = 144066263297769815596495629667062367629;
184
185 // Hash path
186 let p = path.as_bytes();
187 let mut i = 0;
188 while i < p.len() {
189 hash = hash ^ (p[i] as u128);
190 hash = hash.wrapping_mul(prime);
191 i += 1;
192 }
193
194 // Hash separator "::"
195 hash = hash ^ (b':' as u128); hash = hash.wrapping_mul(prime);
196 hash = hash ^ (b':' as u128); hash = hash.wrapping_mul(prime);
197
198 // Hash name
199 let n = name.as_bytes();
200 i = 0;
201 while i < n.len() {
202 hash = hash ^ (n[i] as u128);
203 hash = hash.wrapping_mul(prime);
204 i += 1;
205 }
206
207 hash
208}
209
210/// Get the byte at index `i` of the concatenated `path + "::" + name` string.
211/// Returns 0 if index is out of bounds.
212pub const fn get_path_name_byte(path: &str, name: &str, idx: usize) -> u8 {
213 let p = path.as_bytes();
214 let n = name.as_bytes();
215 let sep_len = 2; // "::"
216
217 if idx < p.len() {
218 return p[idx];
219 }
220
221 let idx_after_path = idx - p.len();
222 if idx_after_path < sep_len {
223 return b':';
224 }
225
226 let idx_after_sep = idx_after_path - sep_len;
227 if idx_after_sep < n.len() {
228 return n[idx_after_sep];
229 }
230
231 0 // Out of bounds / Padding
232}
233
234// =============================================================================
235// PathIdentity
236// =============================================================================
237
238/// Pack 16 bytes of a string starting at offset into a u128.
239/// Returns 0 for bytes beyond string length.
240pub const fn pack_bytes_16(path: &str, name: &str, offset: usize) -> u128 {
241 let mut result: u128 = 0;
242 let mut i = 0;
243 while i < 16 {
244 let byte = get_path_name_byte(path, name, offset + i);
245 result |= (byte as u128) << (i * 8);
246 i += 1;
247 }
248 result
249}
250
251// =============================================================================
252// UniqueId - Hybrid Identity (Type-Hash + Type-Body)
253// =============================================================================
254
255use crate::primitives::identity::IdentityEq;
256use crate::primitives::pack::TupleEq;
257
258
259/// Hybrid Unique Identity.
260///
261/// - `HASH`: A type-level tuple (32 nibbles) for O(1) matching.
262/// - `BODY`: A type-level tuple of `Segment` (32 nibbles each) for full path verification.
263#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
264pub struct UniqueId<Hash, Body>(PhantomData<(Hash, Body)>);
265
266// -----------------------------------------------------------------------------
267// IdentityEq Logic with Lazy Gating
268// -----------------------------------------------------------------------------
269
270impl<H1, H2, B1, B2> IdentityEq<UniqueId<H2, B2>> for UniqueId<H1, B1>
271where
272 // 1. Compare Hash (Type-Level). Returns Present or Absent.
273 H1: TupleEq<H2>,
274 // 2. Gate the Body check based on Hash result.
275 // If HashResult is Absent, this trait implementation simply returns Absent
276 // WITHOUT resolving `B1: TupleEq<B2>`. True Lazy!
277 <H1 as TupleEq<H2>>::Out: GateBodyEq<B1, B2>,
278{
279 type Out = <<H1 as TupleEq<H2>>::Out as GateBodyEq<B1, B2>>::Out;
280}
281
282/// Gate trait for lazy body evaluation.
283pub trait GateBodyEq<B1, B2> {
284 type Out: Bool;
285}
286
287// Case 1: Hash Mismatch (Absent) -> Short-circuit
288impl<B1, B2> GateBodyEq<B1, B2> for Absent {
289 type Out = Absent;
290}
291
292// Case 2: Hash Match (Present) -> Compare Bodies
293impl<B1, B2> GateBodyEq<B1, B2> for Present
294where
295 B1: crate::primitives::pack::ItemEq<B2>,
296{
297 type Out = <B1 as crate::primitives::pack::ItemEq<B2>>::Out;
298}
299
300// =============================================================================
301// ConstIdentity
302// =============================================================================
303//
304// Stores up to 256 bytes of path data in 16 x u128 const generic parameters.
305// Different const values = different types = exact comparison.
306
307/// Pack 16 bytes of a string starting at offset into a u128.
308/// Returns 0 for bytes beyond string length.
309pub const fn pack_str_chunk(s: &str, offset: usize) -> u128 {
310 let bytes = s.as_bytes();
311 let mut result: u128 = 0;
312 let mut i = 0;
313 while i < 16 {
314 let idx = offset + i;
315 let byte = if idx < bytes.len() { bytes[idx] } else { 0 };
316 result |= (byte as u128) << (i * 8);
317 i += 1;
318 }
319 result
320}
321
322// =============================================================================
323// CharIdentity - Type-Level Character Identity
324// =============================================================================
325//
326// Tiered IList for exact string matching at compile time:
327// - Tier 1-4: Full IList for strings ≤64 chars
328// - Tier 5: Smart sampling (head+mid+tail) for >64 chars
329
330/// Character type with const generic
331pub struct C<const CHAR: char>;
332
333/// HList node: head + tail
334pub struct IList<Head, Tail>(PhantomData<(Head, Tail)>);
335
336/// HList terminator
337pub struct INil;
338
339/// Collision marker - triggers compile error if two >64 char strings collide.
340#[allow(non_camel_case_types)]
341pub struct COLLISION_DETECTED_INCREASE_SAMPLING_SIZE;
342
343// Tiered wrapper types - each tier is a distinct type
344pub struct IList8<T>(PhantomData<T>); // ≤8 chars
345pub struct IList16<T>(PhantomData<T>); // ≤16 chars
346pub struct IList32<T>(PhantomData<T>); // ≤32 chars
347pub struct IList64<T>(PhantomData<T>); // ≤64 chars
348pub struct IListSampled<T>(PhantomData<T>); // >64 chars (sampled)
349
350// =============================================================================
351// String → Identity Helper Functions
352// =============================================================================
353
354/// Smart sampling for strings > 64 chars
355/// Strategy: head(32) + mid(16) + tail(16) = 64 samples
356/// This maximizes differentiation for long strings
357pub const fn sample_indices_64(len: usize) -> [usize; 64] {
358 let mut indices = [0usize; 64];
359 if len <= 64 {
360 // Short string: take all chars
361 let mut i = 0;
362 while i < 64 {
363 indices[i] = if i < len { i } else { len }; // Out of bounds → '\0'
364 i += 1;
365 }
366 } else {
367 // Long string: sample head(32) + mid(16) + tail(16)
368 let mut i = 0;
369 // Head: first 32 chars
370 while i < 32 {
371 indices[i] = i;
372 i += 1;
373 }
374 // Mid: sample from middle 16 chars
375 let mid_start = (len - 16) / 2;
376 let mut j = 0;
377 while j < 16 {
378 indices[32 + j] = mid_start + j;
379 j += 1;
380 }
381 // Tail: last 16 chars
382 let mut k = 0;
383 while k < 16 {
384 indices[48 + k] = len - 16 + k;
385 k += 1;
386 }
387 }
388 indices
389}
390
391/// Extract character at position N from string
392/// Returns '\0' for out-of-bounds positions
393pub const fn str_char_at(s: &str, n: usize) -> char {
394 let bytes = s.as_bytes();
395 if n >= bytes.len() {
396 return '\0';
397 }
398
399 // Simple ASCII for now (module paths are typically ASCII)
400 // For Unicode, we'd need full UTF-8 decoding
401 let byte = bytes[n];
402 if byte < 128 {
403 byte as char
404 } else {
405 // For non-ASCII, use the byte value as char (limited Unicode support)
406 // This is safe for const fn context
407 '?' // Placeholder for non-ASCII
408 }
409}
410
411/// Get effective length for identity (max 64 chars)
412pub const fn identity_len(s: &str) -> usize {
413 let len = s.len();
414 if len > 64 { 64 } else { len }
415}
416
417/// DEPRECATED: Old TypeMarker approach (no longer used)
418#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
419pub struct TypeMarker<T>(PhantomData<T>);
420
421impl<T> IdentityEq<TypeMarker<T>> for TypeMarker<T> {
422 type Out = Present;
423}
424
425// =============================================================================
426// IdentityBytes
427// =============================================================================
428//
429// Stores up to 64 bytes of module_path in 4 × u128 const generic parameters.
430// Different const values = different types = exact identity comparison.
431
432/// Identity type storing up to 64 bytes of path data.
433#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
434pub struct IdentityBytes<
435 const B0: u128, const B1: u128, const B2: u128, const B3: u128
436>(PhantomData<()>);
437
438/// Const fn to pack 16 bytes of a string into a u128.
439pub const fn pack_str_bytes(s: &str, offset: usize) -> u128 {
440 let bytes = s.as_bytes();
441 let mut result: u128 = 0;
442 let mut i = 0;
443 while i < 16 {
444 let idx = offset + i;
445 let byte = if idx < bytes.len() { bytes[idx] } else { 0 };
446 result |= (byte as u128) << (i * 8);
447 i += 1;
448 }
449 result
450}
451
452// IdentityEq implementation: Compare ANY two IdentityBytes types
453//
454// Uses SelectBool to convert const comparison to type-level bool.
455// This allows Bucket traversal to work: different Identity -> Absent -> continue search.
456//
457// Note: This requires `generic_const_exprs` feature on nightly, OR we need a different approach.
458// For now, we keep the simple impl and rely on hash uniqueness to avoid Bucket comparisons.
459impl<
460 const B0: u128, const B1: u128, const B2: u128, const B3: u128,
461> IdentityEq<IdentityBytes<B0, B1, B2, B3>> for IdentityBytes<B0, B1, B2, B3>
462{
463 type Out = Present;
464}
465
466// For different IdentityBytes, we need a blanket impl that returns Absent.
467// But Rust doesn't allow overlapping impls without specialization.
468//
469// WORKAROUND: Since we use full module_path in both Stream hash AND Identity,
470// two capabilities with the same Stream hash MUST have the same Identity
471// (because they're from the same module with the same name).
472// Therefore, Bucket collision (same hash, different Identity) should NEVER happen.
473//
474// If it does happen (extremely rare 64-bit hash collision), compilation fails safely.