use anyhow::Result;
use crate::index::{Index, MinimizerIndex, Occ};
use crate::sketch::{MinimizerConfig, minimizers};
use crate::types::{Anchor, Minimizer, ReadRecord, SeedHit, Strand};
#[derive(Clone, Copy, Debug)]
pub struct SeedingConfig {
pub min_anchor_len: u32,
pub max_occ: usize,
pub long_read_threshold: usize,
}
fn add_seed_hits(
read: &ReadRecord,
mins: &[Minimizer],
k: usize,
cfg: SeedingConfig,
mut get_len: impl FnMut(u64) -> Option<usize>,
mut for_each: impl FnMut(u64, &mut dyn FnMut(Occ)),
out: &mut Vec<SeedHit>,
) {
for m in mins {
let Some(occ_len) = get_len(m.hash) else {
continue;
};
if occ_len > cfg.max_occ {
continue;
}
let mut push_occ = |o: Occ| {
let is_rev = o.strand != m.strand;
let read_pos = if is_rev {
(read.seq.len() - m.pos as usize - k) as u32
} else {
m.pos
};
out.push(SeedHit {
hash: m.hash,
read_pos,
ref_id: o.ref_id,
ref_pos: o.pos,
strand: if is_rev {
Strand::Reverse
} else {
Strand::Forward
},
});
};
for_each(m.hash, &mut push_occ);
}
}
pub fn collect_seed_hits(
read: &ReadRecord,
index: &Index,
cfg: SeedingConfig,
) -> Result<Vec<SeedHit>> {
let (table, k, w) = select_index(read.seq.len(), index, cfg.long_read_threshold);
let mins = minimizers(&read.seq, &MinimizerConfig { k, w });
let mut hits = Vec::new();
add_seed_hits(
read,
&mins,
k,
cfg,
|hash| index.bucket_len(table, hash),
|hash, f| index.for_each_occ(table, hash, f),
&mut hits,
);
Ok(hits)
}
pub fn collect_seed_hits_from_minimizers(
read: &ReadRecord,
mins: &[Minimizer],
index: &Index,
table: &MinimizerIndex,
k: usize,
cfg: SeedingConfig,
) -> Vec<SeedHit> {
let mut hits = Vec::new();
collect_seed_hits_from_minimizers_into(read, mins, index, table, k, cfg, &mut hits);
hits
}
pub fn collect_seed_hits_from_minimizers_into(
read: &ReadRecord,
mins: &[Minimizer],
index: &Index,
table: &MinimizerIndex,
k: usize,
cfg: SeedingConfig,
out: &mut Vec<SeedHit>,
) {
out.clear();
add_seed_hits(
read,
mins,
k,
cfg,
|hash| index.bucket_len(table, hash),
|hash, f| index.for_each_occ(table, hash, f),
out,
);
}
pub fn extend_seeds(
read: &ReadRecord,
index: &Index,
hits: &[SeedHit],
cfg: SeedingConfig,
) -> Vec<Anchor> {
let mut anchors = Vec::new();
extend_seeds_into(read, index, hits, cfg, &mut anchors);
anchors
}
pub fn extend_seeds_into(
read: &ReadRecord,
index: &Index,
hits: &[SeedHit],
cfg: SeedingConfig,
out: &mut Vec<Anchor>,
) {
out.clear();
let rc_read = reverse_complement(&read.seq);
let k = select_index(read.seq.len(), index, cfg.long_read_threshold).1 as i32;
for hit in hits {
let ref_seq = index.ref_bases(hit.ref_id as usize);
if hit.ref_pos as usize >= ref_seq.len() {
continue;
}
let (read_seq, strand) = if hit.strand == Strand::Reverse {
(&rc_read, Strand::Reverse)
} else {
(&read.seq, Strand::Forward)
};
let mut q_start = hit.read_pos as i32;
let mut r_start = hit.ref_pos as i32;
let mut q_end = q_start + k;
let mut r_end = r_start + k;
while q_start > 0 && r_start > 0 {
let qb = read_seq[(q_start - 1) as usize];
let rb = ref_seq[(r_start - 1) as usize];
if qb != rb {
break;
}
q_start -= 1;
r_start -= 1;
}
while (q_end as usize) < read_seq.len() && (r_end as usize) < ref_seq.len() {
let qb = read_seq[q_end as usize];
let rb = ref_seq[r_end as usize];
if qb != rb {
break;
}
q_end += 1;
r_end += 1;
}
let len = (q_end - q_start).max(0) as u32;
if len < cfg.min_anchor_len {
continue;
}
out.push(Anchor {
read_start: q_start as u32,
read_end: q_end as u32,
ref_id: hit.ref_id,
ref_start: r_start as u32,
ref_end: r_end as u32,
strand,
score: len as i32,
});
}
}
fn select_index(
read_len: usize,
index: &Index,
long_read_threshold: usize,
) -> (&MinimizerIndex, usize, usize) {
if read_len >= long_read_threshold {
(&index.long, index.long.k, index.long.w)
} else {
(&index.short, index.short.k, index.short.w)
}
}
fn reverse_complement(seq: &[u8]) -> Vec<u8> {
seq.iter()
.rev()
.map(|b| match *b {
b'A' => b'T',
b'C' => b'G',
b'G' => b'C',
b'T' => b'A',
_ => b'N',
})
.collect()
}