use seqhash::SplitSeqHash;
const MAX_HDIST: usize = 3;
fn map_sequence(index: &SplitSeqHash, sequence: &[u8]) -> Option<usize> {
let result = index.query(sequence);
if let Some(idx) = result.agreed_idx() {
println!(" ✓ Fast match: both halves agree on parent {}", idx);
return Some(idx);
}
if result.is_conflicted() {
println!(" ✗ Conflict: halves matched different parents");
return None;
}
if let Some((idx, matched_half)) = result.single_match() {
let remaining = result.remaining_hdist(MAX_HDIST).unwrap_or(0);
println!(
" → Fallback: {:?} half matched parent {}, checking {:?} half with budget {}",
matched_half,
idx,
matched_half.other(),
remaining
);
if let Some(true_idx) =
index.is_within_hdist(sequence, idx, matched_half.other(), remaining)
{
println!(" ✓ Fallback match: parent {}", true_idx);
return Some(true_idx);
} else {
println!(" ✗ Fallback failed: other half exceeds budget");
}
}
println!(" ✗ No match: neither half matched");
None
}
fn main() {
println!("=== SplitSeqHash Demo ===\n");
let barcodes: Vec<&[u8]> = vec![
b"ACGTACGTACGTACGT", b"GGGGCCCCGGGGCCCC", b"TTTTAAAATTTTAAAA", b"CCCCGGGGCCCCGGGG", ];
println!("Building index with {} barcodes:", barcodes.len());
for (i, bc) in barcodes.iter().enumerate() {
println!(" {}: {}", i, String::from_utf8_lossy(bc));
}
let index = SplitSeqHash::new(&barcodes).expect("Failed to build index");
println!(
"\nIndex details: {} bases, split at position {}",
index.seq_len(),
index.split_pos()
);
println!(" Left half: {} bases", index.left_len());
println!(" Right half: {} bases\n", index.right_len());
let test_cases = vec![
("Exact match", b"ACGTACGTACGTACGT"),
("1 mismatch (left)", b"NCGTACGTACGTACGT"),
("1 mismatch (right)", b"ACGTACGTACGTACGN"),
("2 mismatches (both halves)", b"NCGTACGTACGTACGN"),
("2 mismatches (one half)", b"NNGTACGTACGTACGT"),
("3 mismatches (1 left, 2 right)", b"NCGTACGTACGTACNN"),
("4 mismatches (exceeds budget)", b"NNGTACGTACGTACNN"),
("No match", b"NNNNNNNNNNNNNNNN"),
];
println!(
"=== Test Cases (max hamming distance = {}) ===\n",
MAX_HDIST
);
for (description, query) in test_cases {
println!(
"Query: {} ({})",
String::from_utf8_lossy(query),
description
);
let result = map_sequence(&index, query);
if let Some(idx) = result {
println!(
" → Matched to barcode {}: {}\n",
idx,
String::from_utf8_lossy(barcodes[idx])
);
} else {
println!(" → No match found\n");
}
}
println!("=== Conflict Detection ===\n");
let chimera = b"ACGTACGTGGGGCCCC";
println!(
"Query: {} (left from BC0, right from BC1)",
String::from_utf8_lossy(chimera)
);
let result = index.query(chimera);
println!(" Left match: {:?}", result.left);
println!(" Right match: {:?}", result.right);
if result.is_conflicted() {
println!(" → Detected conflict: halves matched different parents");
}
println!("\n=== Performance Characteristics ===");
println!("- Query with 0-2 mismatches: O(1) hash lookups (fast path)");
println!("- Query with 3 mismatches: O(1) + O(n/2) hamming scan (fallback)");
println!("- Construction: O(n * seq_len) where n = number of barcodes");
println!("- Memory: ~2x SeqHash storage (one per half)");
}