1#[cfg(not(target_endian = "little"))]
2compile_error!("JAM format requires a little-endian platform");
3
4use bytemuck::{Pod, Zeroable};
5
6pub const MAGIC: [u8; 4] = *b"JAM\0";
7pub const VERSION: u32 = 3;
8
9pub const PAGE_SIZE: usize = 4096;
10
11#[inline]
12pub const fn align_to_page(offset: usize) -> usize {
13 (offset + PAGE_SIZE - 1) & !(PAGE_SIZE - 1)
14}
15pub const BUCKET_COUNT: usize = 256;
16pub const BUCKET_BITS: u8 = 8;
17pub const ENTRY_SIZE: usize = 12;
18pub const HEADER_SIZE: usize = 160;
19pub const BUCKET_META_SIZE: usize = 32;
20pub const BUCKET_TABLE_SIZE: usize = BUCKET_COUNT * BUCKET_META_SIZE;
21pub const DATA_START: usize = HEADER_SIZE + BUCKET_TABLE_SIZE;
22
23#[inline(always)]
24pub fn bucket_id(hash: u64) -> usize {
25 (hash & 0xFF) as usize
26}
27
28#[repr(C)]
29#[derive(Debug, Clone, Copy, Pod, Zeroable)]
30pub struct Header {
31 pub magic: [u8; 4],
32 pub version: u32,
33 pub flags: u64,
34
35 pub entry_count: u64,
36 pub unique_hash_count: u64,
37 pub sample_count: u32,
38 pub bucket_count: u16,
39 pub bucket_bits: u8,
40 pub entry_size: u8,
41
42 pub hash_threshold: u64,
43 pub kmer_size: u8,
44 pub _param_reserved: [u8; 7],
45
46 pub bucket_table_offset: u64,
47 pub entries_offset: u64,
48 pub filters_offset: u64,
49 pub bias_table_offset: u64,
50
51 pub entries_size: u64,
52 pub filters_size: u64,
53 pub bias_table_size: u64,
54
55 pub sample_names_offset: u64,
56 pub sample_names_size: u64,
57 pub sample_sizes_offset: u64,
58 pub sample_sizes_size: u64,
59
60 pub _padding: [u8; 16],
61}
62
63pub const FLAG_HAS_BIAS_TABLE: u64 = 1 << 0;
64
65const _: () = assert!(std::mem::size_of::<Header>() == 160);
66
67impl Header {
68 pub fn validate(&self) -> Result<(), FormatError> {
69 if self.magic != MAGIC {
70 return Err(FormatError::InvalidMagic(self.magic));
71 }
72 if self.version != VERSION {
73 return Err(FormatError::UnsupportedVersion(self.version));
74 }
75 if self.bucket_count != BUCKET_COUNT as u16 {
76 return Err(FormatError::InvalidBucketCount(self.bucket_count));
77 }
78 if self.entry_size != ENTRY_SIZE as u8 {
79 return Err(FormatError::InvalidEntrySize(self.entry_size));
80 }
81 if self.hash_threshold == 0 {
82 return Err(FormatError::InvalidHashThreshold);
83 }
84 Ok(())
85 }
86}
87
88#[repr(C)]
89#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)]
90pub struct BucketMeta {
91 pub entry_offset: u64,
92 pub entry_count: u64,
93 pub filter_offset: u64,
94 pub filter_size: u64,
95}
96
97const _: () = assert!(std::mem::size_of::<BucketMeta>() == 32);
98
99#[repr(C, packed)]
100#[derive(Debug, Clone, Copy, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord)]
101pub struct Entry {
102 pub hash: u64,
103 pub sample_id: u32,
104}
105
106const _: () = assert!(std::mem::size_of::<Entry>() == 12);
107
108impl Entry {
109 #[inline]
110 pub fn new(hash: u64, sample_id: u32) -> Self {
111 Self { hash, sample_id }
112 }
113
114 #[inline]
115 pub fn bucket_id(&self) -> usize {
116 bucket_id(self.hash)
117 }
118}
119
120#[derive(Debug, thiserror::Error)]
121pub enum FormatError {
122 #[error("Invalid magic bytes: {0:?}")]
123 InvalidMagic([u8; 4]),
124
125 #[error("Unsupported version: {0}")]
126 UnsupportedVersion(u32),
127
128 #[error("Invalid bucket count: {0}")]
129 InvalidBucketCount(u16),
130
131 #[error("Invalid entry size: {0}")]
132 InvalidEntrySize(u8),
133
134 #[error("Invalid hash threshold: must be > 0")]
135 InvalidHashThreshold,
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn test_struct_sizes() {
144 assert_eq!(std::mem::size_of::<Header>(), 160);
145 assert_eq!(std::mem::size_of::<BucketMeta>(), 32);
146 assert_eq!(std::mem::size_of::<Entry>(), 12);
147 }
148
149 #[test]
150 fn test_bucket_id() {
151 assert_eq!(bucket_id(0x0000_0000_0000_0000), 0);
152 assert_eq!(bucket_id(0x0000_0000_0000_00FF), 255);
153 assert_eq!(bucket_id(0xFFFF_FFFF_FFFF_FF00), 0);
154 assert_eq!(bucket_id(0xABCD_EF12_3456_7842), 0x42);
155 }
156
157 #[test]
158 fn test_entry_ordering() {
159 let e1 = Entry::new(100, 1);
160 let e2 = Entry::new(100, 2);
161 let e3 = Entry::new(200, 1);
162
163 assert!(e1 < e2);
164 assert!(e2 < e3);
165 assert!(e1 < e3);
166 }
167
168 #[test]
169 fn test_bucket_id_distribution() {
170 let threshold: u64 = (u64::MAX as f64 * 0.001) as u64;
171 let mut bucket_counts = [0usize; 256];
172
173 for i in 0..100_000u64 {
174 let hash = i.wrapping_mul(0x517cc1b727220a95) % threshold;
175 bucket_counts[bucket_id(hash)] += 1;
176 }
177
178 let avg = 100_000 / 256;
179 for (i, &count) in bucket_counts.iter().enumerate() {
180 let deviation = (count as f64 - avg as f64).abs() / avg as f64;
181 assert!(deviation < 0.3, "Bucket {} has skewed count: {}", i, count);
182 }
183 }
184
185 #[test]
186 fn test_header_validate_valid() {
187 let mut header = Header::zeroed();
188 header.magic = MAGIC;
189 header.version = VERSION;
190 header.bucket_count = BUCKET_COUNT as u16;
191 header.entry_size = ENTRY_SIZE as u8;
192 header.hash_threshold = u64::MAX; assert!(header.validate().is_ok());
194 }
195
196 #[test]
197 fn test_header_validate_zero_threshold() {
198 let mut header = Header::zeroed();
199 header.magic = MAGIC;
200 header.version = VERSION;
201 header.bucket_count = BUCKET_COUNT as u16;
202 header.entry_size = ENTRY_SIZE as u8;
203 header.hash_threshold = 0; assert!(matches!(
205 header.validate(),
206 Err(FormatError::InvalidHashThreshold)
207 ));
208 }
209
210 #[test]
211 fn test_header_validate_bad_magic() {
212 let mut header = Header::zeroed();
213 header.magic = *b"BAD\0";
214 header.version = VERSION;
215 header.bucket_count = BUCKET_COUNT as u16;
216 header.entry_size = ENTRY_SIZE as u8;
217 header.hash_threshold = u64::MAX;
218 assert!(matches!(
219 header.validate(),
220 Err(FormatError::InvalidMagic(_))
221 ));
222 }
223
224 #[test]
225 fn test_header_validate_bad_version() {
226 let mut header = Header::zeroed();
227 header.magic = MAGIC;
228 header.version = 99;
229 header.bucket_count = BUCKET_COUNT as u16;
230 header.entry_size = ENTRY_SIZE as u8;
231 header.hash_threshold = u64::MAX;
232 assert!(matches!(
233 header.validate(),
234 Err(FormatError::UnsupportedVersion(99))
235 ));
236 }
237
238 #[test]
239 fn test_entry_bucket_id() {
240 let entry = Entry::new(0xABCD_EF12_3456_7842, 5);
241 assert_eq!(entry.bucket_id(), 0x42);
242 }
243}