use gbz::{GBZ, Pos, FullPathName};
use simple_sds::sparse_vector::{SparseVector, SparseBuilder};
use simple_sds::ops::PredSucc;
use std::collections::HashMap;
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PathIndex {
path_to_offset: HashMap<usize, usize>,
offset_to_path: Vec<usize>,
path_lengths: Vec<usize>,
sequence_positions: Vec<SparseVector>,
gbwt_positions: Vec<Vec<Pos>>,
}
impl PathIndex {
pub fn new(graph: &GBZ, interval: usize, verbose: bool) -> Result<Self, String> {
if verbose {
eprintln!("Building path index");
}
let reference_paths = graph.reference_positions(interval, verbose);
if reference_paths.is_empty() {
return Err(String::from("No reference paths to index"));
}
let mut path_to_offset: HashMap<usize, usize> = HashMap::with_capacity(reference_paths.len());
let mut offset_to_path: Vec<usize> = vec![0; reference_paths.len()];
let mut path_lengths: Vec<usize> = Vec::with_capacity(reference_paths.len());
let mut sequence_positions = Vec::with_capacity(reference_paths.len());
let mut gbwt_positions = Vec::with_capacity(reference_paths.len());
for (offset, ref_path) in reference_paths.iter().enumerate() {
path_to_offset.insert(ref_path.id, offset);
offset_to_path[offset] = ref_path.id;
path_lengths.push(ref_path.len);
let mut sequence = SparseBuilder::new(ref_path.len, ref_path.positions.len())?;
let mut gbwt = Vec::with_capacity(ref_path.positions.len());
for (sequence_pos, gbwt_pos) in ref_path.positions.iter() {
sequence.set(*sequence_pos);
gbwt.push(*gbwt_pos);
}
sequence_positions.push(SparseVector::try_from(sequence)?);
gbwt_positions.push(gbwt);
}
Ok(PathIndex { path_to_offset, offset_to_path, path_lengths, sequence_positions, gbwt_positions })
}
#[inline]
pub fn path_count(&self) -> usize {
self.sequence_positions.len()
}
#[inline]
pub fn path_to_offset(&self, path_id: usize) -> Option<usize> {
self.path_to_offset.get(&path_id).cloned()
}
#[inline]
pub fn offset_to_path(&self, index_offset: usize) -> Option<usize> {
self.offset_to_path.get(index_offset).cloned()
}
pub fn find_path(&self, graph: &GBZ, path_name: &FullPathName) -> Option<usize> {
let metadata = graph.metadata()?;
let path_id = metadata.find_path(path_name)?;
self.path_to_offset(path_id)
}
#[inline]
pub fn path_length(&self, index_offset: usize) -> Option<usize> {
self.path_lengths.get(index_offset).cloned()
}
pub fn indexed_position(&self, index_offset: usize, query_offset: usize) -> Option<(usize, Pos)> {
let mut iter = self.sequence_positions[index_offset].predecessor(query_offset);
if let Some((sample_offset, sequence_offset)) = iter.next() {
let gbwt_pos = self.gbwt_positions[index_offset][sample_offset];
Some((sequence_offset, gbwt_pos))
} else {
None
}
}
}