1use crate::bias::HashBiasTable;
2use crate::format::{
3 BUCKET_COUNT, BUCKET_TABLE_SIZE, BucketMeta, ENTRY_SIZE, Entry, FLAG_HAS_BIAS_TABLE,
4 FormatError, HEADER_SIZE, Header, PAGE_SIZE, bucket_id,
5};
6use crate::writer::FILTER_DESCRIPTOR_SIZE;
7use memmap2::{Mmap, MmapOptions};
8use std::fs::File;
9use std::io;
10use std::path::Path;
11use std::sync::Arc;
12use xorf::{BinaryFuse8Ref, Filter, FilterRef};
13
14#[cfg(unix)]
15use memmap2::{Advice, UncheckedAdvice};
16
17#[derive(Debug, thiserror::Error)]
18pub enum ReaderError {
19 #[error("I/O error: {0}")]
20 Io(#[from] io::Error),
21
22 #[error("Format error: {0}")]
23 Format(#[from] FormatError),
24
25 #[error("Invalid filter data for bucket {bucket}: {message}")]
26 InvalidFilter { bucket: usize, message: String },
27
28 #[error("File too small: expected at least {expected} bytes, got {actual}")]
29 FileTooSmall { expected: usize, actual: usize },
30
31 #[error("Invalid sample data: {message}")]
32 InvalidSampleData { message: String },
33}
34
35#[derive(Debug, Clone)]
36pub struct ReaderStats {
37 pub entry_count: u64,
38 pub unique_hash_count: u64,
39 pub sample_count: u32,
40 pub file_size: u64,
41 pub kmer_size: u8,
42 pub hash_threshold: u64,
43 pub bucket_entry_counts: [u64; BUCKET_COUNT],
44 pub has_bias_table: bool,
45}
46
47struct FilterMeta {
48 descriptor_offset: usize,
49 fingerprints_offset: usize,
50 fingerprints_size: usize,
51}
52
53pub struct BucketFilter<'a> {
54 mmap: &'a [u8],
55 meta: &'a FilterMeta,
56}
57
58impl BucketFilter<'_> {
59 #[inline]
60 pub fn contains(&self, hash: &u64) -> bool {
61 let descriptor = &self.mmap
62 [self.meta.descriptor_offset..self.meta.descriptor_offset + FILTER_DESCRIPTOR_SIZE];
63 let fingerprints = &self.mmap[self.meta.fingerprints_offset
64 ..self.meta.fingerprints_offset + self.meta.fingerprints_size];
65 BinaryFuse8Ref::from_dma(descriptor, fingerprints).contains(hash)
66 }
67}
68
69struct CachedFilterMeta {
70 descriptor_start: usize,
71 descriptor_size: usize,
72 fingerprints_start: usize,
73 fingerprints_size: usize,
74}
75
76pub struct BucketRegion {
77 mmap: Mmap,
78 data_offset: usize,
79 filter_size: usize,
80 entry_count: usize,
81 filter_meta: Option<CachedFilterMeta>,
82}
83
84impl BucketRegion {
85 #[inline]
86 pub fn filter_contains(&self, hash: &u64) -> bool {
87 let meta = match &self.filter_meta {
88 Some(m) => m,
89 None => return false,
90 };
91
92 let descriptor =
93 &self.mmap[meta.descriptor_start..meta.descriptor_start + meta.descriptor_size];
94 let fingerprints =
95 &self.mmap[meta.fingerprints_start..meta.fingerprints_start + meta.fingerprints_size];
96 BinaryFuse8Ref::from_dma(descriptor, fingerprints).contains(hash)
97 }
98
99 #[inline]
100 pub fn entries(&self) -> &[Entry] {
101 if self.entry_count == 0 {
102 return &[];
103 }
104 let start = self.data_offset + self.filter_size;
105 let end = start + self.entry_count * ENTRY_SIZE;
106 bytemuck::cast_slice(&self.mmap[start..end])
107 }
108
109 #[inline]
110 pub fn entry_count(&self) -> usize {
111 self.entry_count
112 }
113
114 #[inline]
115 pub fn is_empty(&self) -> bool {
116 self.filter_size == 0 && self.entry_count == 0
117 }
118}
119
120pub struct JamReader {
121 file: Arc<File>,
122 mmap: Mmap,
123 header: Header,
124 bucket_table: Vec<BucketMeta>,
125 filters: Vec<Option<FilterMeta>>,
126 bias_table: Option<Arc<HashBiasTable>>,
127 sample_names: Vec<String>,
128 sample_sizes: Vec<u64>,
129}
130
131impl JamReader {
132 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, ReaderError> {
133 let file = Arc::new(File::open(path.as_ref())?);
134 let mmap = unsafe { Mmap::map(file.as_ref())? };
135
136 if mmap.len() < HEADER_SIZE {
137 return Err(ReaderError::FileTooSmall {
138 expected: HEADER_SIZE,
139 actual: mmap.len(),
140 });
141 }
142
143 let header: Header = *bytemuck::from_bytes(&mmap[..HEADER_SIZE]);
144 header.validate()?;
145
146 let table_end = HEADER_SIZE + BUCKET_TABLE_SIZE;
147 if mmap.len() < table_end {
148 return Err(ReaderError::FileTooSmall {
149 expected: table_end,
150 actual: mmap.len(),
151 });
152 }
153
154 let bucket_table: Vec<BucketMeta> =
155 bytemuck::cast_slice(&mmap[HEADER_SIZE..table_end]).to_vec();
156
157 let mut filters = Vec::with_capacity(BUCKET_COUNT);
158 for (i, meta) in bucket_table.iter().enumerate() {
159 if meta.filter_size == 0 {
160 filters.push(None);
161 continue;
162 }
163
164 let filter_meta = parse_filter_meta(&mmap, meta, i)?;
165 filters.push(Some(filter_meta));
166 }
167
168 let bias_table = if header.flags & FLAG_HAS_BIAS_TABLE != 0
169 && header.bias_table_offset > 0
170 && header.bias_table_size > 0
171 {
172 let offset = header.bias_table_offset as usize;
173 let size = header.bias_table_size as usize;
174 if offset + size > mmap.len() {
175 return Err(ReaderError::FileTooSmall {
176 expected: offset + size,
177 actual: mmap.len(),
178 });
179 }
180 let bias_data = &mmap[offset..offset + size];
181 let table =
182 HashBiasTable::from_bytes(bias_data).map_err(|e| ReaderError::InvalidFilter {
183 bucket: 0,
184 message: format!("Failed to parse embedded bias table: {}", e),
185 })?;
186 Some(Arc::new(table))
187 } else {
188 None
189 };
190
191 let sample_names = if header.sample_names_offset > 0 && header.sample_names_size > 0 {
192 let offset = header.sample_names_offset as usize;
193 let size = header.sample_names_size as usize;
194 if offset + size > mmap.len() {
195 return Err(ReaderError::FileTooSmall {
196 expected: offset + size,
197 actual: mmap.len(),
198 });
199 }
200 let names = parse_sample_names(&mmap[offset..offset + size], header.sample_count)?;
201 if names.len() != header.sample_count as usize {
202 return Err(ReaderError::InvalidSampleData {
203 message: format!(
204 "sample names count mismatch: got {}, expected {}",
205 names.len(),
206 header.sample_count
207 ),
208 });
209 }
210 names
211 } else {
212 (0..header.sample_count)
213 .map(|i| format!("sample_{}", i))
214 .collect()
215 };
216
217 let sample_sizes = if header.sample_sizes_offset > 0 && header.sample_sizes_size > 0 {
218 let offset = header.sample_sizes_offset as usize;
219 let size = header.sample_sizes_size as usize;
220 let expected_size = header.sample_count as usize * 8;
221 if size != expected_size {
222 return Err(ReaderError::InvalidSampleData {
223 message: format!(
224 "sample sizes section size mismatch: got {}, expected {}",
225 size, expected_size
226 ),
227 });
228 }
229 if offset + size > mmap.len() {
230 return Err(ReaderError::FileTooSmall {
231 expected: offset + size,
232 actual: mmap.len(),
233 });
234 }
235 parse_sample_sizes(&mmap[offset..offset + size])
236 } else {
237 vec![0u64; header.sample_count as usize]
238 };
239
240 Ok(Self {
241 file,
242 mmap,
243 header,
244 bucket_table,
245 filters,
246 bias_table,
247 sample_names,
248 sample_sizes,
249 })
250 }
251
252 pub fn open_bucket_region(&self, bucket_idx: usize) -> Result<BucketRegion, ReaderError> {
253 let meta = &self.bucket_table[bucket_idx];
254
255 if meta.filter_size == 0 && meta.entry_count == 0 {
256 let empty_mmap = MmapOptions::new().len(1).map_anon()?.make_read_only()?;
257 return Ok(BucketRegion {
258 mmap: empty_mmap,
259 data_offset: 0,
260 filter_size: 0,
261 entry_count: 0,
262 filter_meta: None,
263 });
264 }
265
266 let region_start = meta.filter_offset as usize;
267 let data_size = meta.filter_size as usize + (meta.entry_count as usize) * ENTRY_SIZE;
268
269 let page_start = region_start & !(PAGE_SIZE - 1);
270 let data_offset = region_start - page_start;
271 let mmap_len = data_offset + data_size;
272
273 let mmap = unsafe {
274 MmapOptions::new()
275 .offset(page_start as u64)
276 .len(mmap_len)
277 .map(self.file.as_ref())?
278 };
279
280 #[cfg(unix)]
281 {
282 let _ = mmap.advise(Advice::Sequential);
283 }
284
285 let filter_meta = if meta.filter_size > 0 {
286 let filter_data_start = data_offset;
287 let filter_data =
288 &mmap[filter_data_start..filter_data_start + meta.filter_size as usize];
289
290 if filter_data.len() >= 8 {
291 let descriptor_size =
292 u32::from_le_bytes(filter_data[0..4].try_into().unwrap()) as usize;
293 let fingerprints_size =
294 u32::from_le_bytes(filter_data[4..8].try_into().unwrap()) as usize;
295
296 if descriptor_size != FILTER_DESCRIPTOR_SIZE {
297 return Err(ReaderError::InvalidFilter {
298 bucket: bucket_idx,
299 message: format!(
300 "unexpected descriptor size in bucket region: {} (expected {})",
301 descriptor_size, FILTER_DESCRIPTOR_SIZE
302 ),
303 });
304 }
305
306 if filter_data.len() >= 8 + descriptor_size + fingerprints_size {
307 Some(CachedFilterMeta {
308 descriptor_start: filter_data_start + 8,
309 descriptor_size,
310 fingerprints_start: filter_data_start + 8 + descriptor_size,
311 fingerprints_size,
312 })
313 } else {
314 return Err(ReaderError::InvalidFilter {
315 bucket: bucket_idx,
316 message: format!(
317 "filter data truncated: need {} bytes, have {}",
318 8 + descriptor_size + fingerprints_size,
319 filter_data.len()
320 ),
321 });
322 }
323 } else {
324 return Err(ReaderError::InvalidFilter {
325 bucket: bucket_idx,
326 message: format!(
327 "filter header too small: need 8 bytes, have {}",
328 filter_data.len()
329 ),
330 });
331 }
332 } else {
333 None
334 };
335
336 Ok(BucketRegion {
337 mmap,
338 data_offset,
339 filter_size: meta.filter_size as usize,
340 entry_count: meta.entry_count as usize,
341 filter_meta,
342 })
343 }
344
345 #[inline]
346 pub fn bucket_meta(&self, bucket_idx: usize) -> &BucketMeta {
347 &self.bucket_table[bucket_idx]
348 }
349
350 #[inline]
351 pub fn threshold(&self) -> u64 {
352 self.header.hash_threshold
353 }
354
355 #[inline]
356 pub fn kmer_size(&self) -> u8 {
357 self.header.kmer_size
358 }
359
360 #[inline]
361 pub fn bias_table(&self) -> Option<Arc<HashBiasTable>> {
362 self.bias_table.clone()
363 }
364
365 #[inline]
366 pub fn has_bias_table(&self) -> bool {
367 self.bias_table.is_some()
368 }
369
370 pub fn sample_names(&self) -> &[String] {
371 &self.sample_names
372 }
373
374 pub fn sample_name(&self, id: u32) -> Option<&str> {
375 self.sample_names.get(id as usize).map(|s| s.as_str())
376 }
377
378 pub fn sample_sizes(&self) -> &[u64] {
379 &self.sample_sizes
380 }
381
382 pub fn sample_size(&self, id: u32) -> Option<u64> {
383 self.sample_sizes.get(id as usize).copied()
384 }
385
386 pub fn stats(&self) -> ReaderStats {
387 let mut bucket_entry_counts = [0u64; BUCKET_COUNT];
388 for (i, meta) in self.bucket_table.iter().enumerate() {
389 bucket_entry_counts[i] = meta.entry_count;
390 }
391
392 ReaderStats {
393 entry_count: self.header.entry_count,
394 unique_hash_count: self.header.unique_hash_count,
395 sample_count: self.header.sample_count,
396 file_size: self.mmap.len() as u64,
397 kmer_size: self.header.kmer_size,
398 hash_threshold: self.header.hash_threshold,
399 bucket_entry_counts,
400 has_bias_table: self.bias_table.is_some(),
401 }
402 }
403
404 #[inline]
405 pub fn bucket_entries(&self, bucket_idx: usize) -> &[Entry] {
406 let meta = &self.bucket_table[bucket_idx];
407 if meta.entry_count == 0 {
408 return &[];
409 }
410
411 let start = meta.entry_offset as usize;
412 let end = start + (meta.entry_count as usize) * ENTRY_SIZE;
413 bytemuck::cast_slice(&self.mmap[start..end])
414 }
415
416 #[inline]
417 pub fn bucket_entry_byte_range(&self, bucket_idx: usize) -> (usize, usize) {
418 let meta = &self.bucket_table[bucket_idx];
419 let start = meta.entry_offset as usize;
420 let end = start + (meta.entry_count as usize) * ENTRY_SIZE;
421 (start, end)
422 }
423
424 #[inline]
425 pub fn bucket_filter_byte_range(&self, bucket_idx: usize) -> (usize, usize) {
426 let meta = &self.bucket_table[bucket_idx];
427 let start = meta.filter_offset as usize;
428 let end = start + meta.filter_size as usize;
429 (start, end)
430 }
431
432 #[cfg(unix)]
433 pub fn release_pages(&self, start: usize, end: usize) {
434 if start >= end {
435 return;
436 }
437 let page_start = start & !(PAGE_SIZE - 1);
438 let page_end = (end + PAGE_SIZE - 1) & !(PAGE_SIZE - 1);
439 let len = page_end.saturating_sub(page_start);
440 if len > 0 && page_end <= self.mmap.len() {
441 let _ = unsafe {
442 self.mmap
443 .unchecked_advise_range(UncheckedAdvice::DontNeed, page_start, len)
444 };
445 }
446 }
447
448 #[cfg(not(unix))]
449 pub fn release_pages(&self, _start: usize, _end: usize) {
450 }
451
452 pub fn release_bucket(&self, bucket_idx: usize) {
453 let (filter_start, filter_end) = self.bucket_filter_byte_range(bucket_idx);
454 let (entry_start, entry_end) = self.bucket_entry_byte_range(bucket_idx);
455 self.release_pages(filter_start, filter_end);
456 self.release_pages(entry_start, entry_end);
457 }
458
459 #[cfg(unix)]
460 pub fn advise_random(&self) {
461 let _ = self.mmap.advise(Advice::Random);
462 }
463
464 #[cfg(not(unix))]
465 pub fn advise_random(&self) {
466 }
467
468 #[inline]
469 pub fn bucket_filter(&self, bucket_idx: usize) -> Option<BucketFilter<'_>> {
470 self.filters[bucket_idx].as_ref().map(|meta| BucketFilter {
471 mmap: &self.mmap,
472 meta,
473 })
474 }
475
476 #[inline]
477 pub fn contains(&self, hash: u64) -> bool {
478 let bucket_idx = bucket_id(hash);
479
480 if let Some(filter) = self.bucket_filter(bucket_idx) {
481 if !filter.contains(&hash) {
482 return false;
483 }
484 } else {
485 return false;
486 }
487
488 let entries = self.bucket_entries(bucket_idx);
489 self.interpolation_search(entries, hash).is_some()
490 }
491
492 #[inline]
493 pub fn search(&self, hash: u64) -> impl Iterator<Item = u32> + '_ {
494 let bucket_idx = bucket_id(hash);
495
496 let dominated = self
497 .bucket_filter(bucket_idx)
498 .is_some_and(|f| f.contains(&hash));
499
500 let entries = if dominated {
501 self.bucket_entries(bucket_idx)
502 } else {
503 &[]
504 };
505
506 let start = if entries.is_empty() {
507 0
508 } else {
509 self.interpolation_find_start(entries, hash)
510 };
511
512 entries[start..]
513 .iter()
514 .skip_while(move |e| e.hash < hash)
515 .take_while(move |e| e.hash == hash)
516 .map(|e| e.sample_id)
517 }
518
519 fn interpolation_search(&self, entries: &[Entry], key: u64) -> Option<usize> {
520 if entries.is_empty() {
521 return None;
522 }
523
524 let start = self.interpolation_find_start(entries, key);
525
526 for (i, entry) in entries[start..].iter().enumerate() {
527 if entry.hash == key {
528 return Some(start + i);
529 }
530 if entry.hash > key {
531 break;
532 }
533 }
534
535 None
536 }
537
538 #[inline]
539 fn interpolation_find_start(&self, entries: &[Entry], key: u64) -> usize {
540 let count = entries.len();
541 let threshold = self.threshold();
542
543 let est = ((key as u128 * count as u128) / threshold as u128) as usize;
544
545 let est = est.saturating_sub(16).min(count - 1);
546
547 if entries[est].hash > key {
548 let mut i = est;
549 while i > 0 && entries[i - 1].hash >= key {
550 i -= 1;
551 }
552 i
553 } else {
554 est
555 }
556 }
557}
558
559fn parse_sample_names(data: &[u8], count: u32) -> Result<Vec<String>, ReaderError> {
560 let mut names = Vec::with_capacity(count as usize);
561 let mut offset = 0;
562
563 for i in 0..count {
564 if offset + 2 > data.len() {
565 return Err(ReaderError::InvalidSampleData {
566 message: format!(
567 "truncated sample names section: cannot read length for sample {}",
568 i
569 ),
570 });
571 }
572 let len = u16::from_le_bytes(data[offset..offset + 2].try_into().unwrap()) as usize;
573 offset += 2;
574 if offset + len > data.len() {
575 return Err(ReaderError::InvalidSampleData {
576 message: format!(
577 "truncated sample names section: cannot read name for sample {} (need {} bytes, have {})",
578 i,
579 len,
580 data.len() - offset
581 ),
582 });
583 }
584 names.push(String::from_utf8_lossy(&data[offset..offset + len]).to_string());
585 offset += len;
586 }
587
588 Ok(names)
589}
590
591fn parse_sample_sizes(data: &[u8]) -> Vec<u64> {
592 data.chunks_exact(8)
593 .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
594 .collect()
595}
596
597fn parse_filter_meta(
598 mmap: &Mmap,
599 meta: &BucketMeta,
600 bucket_idx: usize,
601) -> Result<FilterMeta, ReaderError> {
602 let start = meta.filter_offset as usize;
603 let end = start + meta.filter_size as usize;
604
605 if end > mmap.len() {
606 return Err(ReaderError::InvalidFilter {
607 bucket: bucket_idx,
608 message: format!(
609 "filter extends beyond file: {}..{} > {}",
610 start,
611 end,
612 mmap.len()
613 ),
614 });
615 }
616
617 let data = &mmap[start..end];
618
619 if data.len() < 8 {
620 return Err(ReaderError::InvalidFilter {
621 bucket: bucket_idx,
622 message: "filter data too small for header".to_string(),
623 });
624 }
625
626 let descriptor_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
627 let fingerprints_size = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize;
628
629 if descriptor_size != FILTER_DESCRIPTOR_SIZE {
630 return Err(ReaderError::InvalidFilter {
631 bucket: bucket_idx,
632 message: format!(
633 "unexpected descriptor size: {} (expected {})",
634 descriptor_size, FILTER_DESCRIPTOR_SIZE
635 ),
636 });
637 }
638
639 let expected_size = 8 + descriptor_size + fingerprints_size;
640 if data.len() < expected_size {
641 return Err(ReaderError::InvalidFilter {
642 bucket: bucket_idx,
643 message: format!("filter data too small: {} < {}", data.len(), expected_size),
644 });
645 }
646
647 Ok(FilterMeta {
648 descriptor_offset: start + 8,
649 fingerprints_offset: start + 8 + descriptor_size,
650 fingerprints_size,
651 })
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657 use crate::writer::{BuildConfig, build};
658 use std::io::Write;
659 use tempfile::NamedTempFile;
660
661 fn make_fasta(seqs: &[(&str, &str)]) -> NamedTempFile {
662 let mut f = NamedTempFile::with_suffix(".fa").unwrap();
663 for (name, seq) in seqs {
664 writeln!(f, ">{name}").unwrap();
665 writeln!(f, "{seq}").unwrap();
666 }
667 f
668 }
669
670 #[test]
671 fn test_reader_open() {
672 let input = make_fasta(&[("seq1", "ATCGATCGATCGATCGATCGATCGATCGATCG")]);
673 let output_dir = tempfile::tempdir().unwrap();
674 let output_path = output_dir.path().join("test.jam");
675
676 let config = BuildConfig {
677 kmer_size: 11,
678 fscale: 1,
679 num_threads: 2,
680 memory: 1,
681 ..Default::default()
682 };
683
684 build(&[input.path().to_path_buf()], &output_path, &config).unwrap();
685
686 let reader = JamReader::open(&output_path).unwrap();
687 let stats = reader.stats();
688
689 assert!(stats.entry_count > 0);
690 assert_eq!(stats.sample_count, 1);
691 assert_eq!(stats.kmer_size, 11);
692 }
693
694 #[test]
695 fn test_reader_search() {
696 let input = make_fasta(&[("seq1", "ATCGATCGATCGATCGATCGATCGATCGATCG")]);
697 let output_dir = tempfile::tempdir().unwrap();
698 let output_path = output_dir.path().join("test.jam");
699
700 let config = BuildConfig {
701 kmer_size: 11,
702 fscale: 1, num_threads: 1,
704 memory: 1,
705 ..Default::default()
706 };
707
708 build(&[input.path().to_path_buf()], &output_path, &config).unwrap();
709
710 let reader = JamReader::open(&output_path).unwrap();
711
712 let entries = reader.bucket_entries(0);
713 if !entries.is_empty() {
714 let test_hash = entries[0].hash;
715 assert!(reader.contains(test_hash));
716
717 let samples: Vec<_> = reader.search(test_hash).collect();
718 assert!(!samples.is_empty());
719 }
720 }
721
722 #[test]
723 fn test_reader_nonexistent_hash() {
724 let input = make_fasta(&[("seq1", "ATCGATCGATCGATCGATCGATCGATCGATCG")]);
725 let output_dir = tempfile::tempdir().unwrap();
726 let output_path = output_dir.path().join("test.jam");
727
728 let config = BuildConfig {
729 kmer_size: 11,
730 fscale: 1000, num_threads: 1,
732 memory: 1,
733 ..Default::default()
734 };
735
736 build(&[input.path().to_path_buf()], &output_path, &config).unwrap();
737
738 let reader = JamReader::open(&output_path).unwrap();
739
740 let fake_hash = u64::MAX - 1;
741 assert!(!reader.contains(fake_hash));
742
743 let samples: Vec<_> = reader.search(fake_hash).collect();
744 assert!(samples.is_empty());
745 }
746
747 #[test]
748 fn test_reader_multiple_samples() {
749 let input = make_fasta(&[
750 ("seq1", "ATCGATCGATCGATCGATCGATCGATCGATCG"),
751 ("seq2", "ATCGATCGATCGATCGATCGATCGATCGATCG"),
752 ]);
753 let output_dir = tempfile::tempdir().unwrap();
754 let output_path = output_dir.path().join("test.jam");
755
756 let config = BuildConfig {
757 kmer_size: 11,
758 fscale: 1,
759 singleton: true, num_threads: 1,
761 memory: 1,
762 ..Default::default()
763 };
764
765 build(&[input.path().to_path_buf()], &output_path, &config).unwrap();
766
767 let reader = JamReader::open(&output_path).unwrap();
768 assert_eq!(reader.stats().sample_count, 2);
769
770 for bucket_idx in 0..BUCKET_COUNT {
771 let entries = reader.bucket_entries(bucket_idx);
772 if entries.len() >= 2 {
773 let test_hash = entries[0].hash;
774 let samples: Vec<_> = reader.search(test_hash).collect();
775 if samples.len() == 2 {
776 assert!(samples.contains(&0) || samples.contains(&1));
777 return;
778 }
779 }
780 }
781 }
782
783 #[test]
784 fn test_reader_bucket_entries() {
785 let input = make_fasta(&[("seq1", "ATCGATCGATCGATCGATCGATCGATCGATCG")]);
786 let output_dir = tempfile::tempdir().unwrap();
787 let output_path = output_dir.path().join("test.jam");
788
789 let config = BuildConfig {
790 kmer_size: 11,
791 fscale: 1,
792 num_threads: 1,
793 memory: 1,
794 ..Default::default()
795 };
796
797 build(&[input.path().to_path_buf()], &output_path, &config).unwrap();
798
799 let reader = JamReader::open(&output_path).unwrap();
800
801 for bucket_idx in 0..BUCKET_COUNT {
802 let entries = reader.bucket_entries(bucket_idx);
803 for window in entries.windows(2) {
804 assert!(
805 window[0] <= window[1],
806 "Entries not sorted in bucket {}",
807 bucket_idx
808 );
809 }
810
811 for entry in entries {
812 assert_eq!(bucket_id(entry.hash), bucket_idx);
813 }
814 }
815 }
816}