use std::collections::HashMap;
use crate::Kmer;
pub struct SimplePosIndex {
k: usize,
empty: Vec<usize>,
index: HashMap<Kmer, Vec<usize>>,
}
impl SimplePosIndex {
pub fn new(k: usize) -> SimplePosIndex {
SimplePosIndex {
k,
empty: Vec::new(),
index: HashMap::new(),
}
}
pub fn add_seq<S>(&mut self, seq: &S)
where
S: AsRef<[u8]>,
{
Kmer::with_many_both_pos(self.k, seq, |pos, x, _y| {
self.index.entry(x.clone()).or_insert(Vec::new()).push(pos);
});
}
pub fn add_seq_both<S>(&mut self, seq: &S)
where
S: AsRef<[u8]>,
{
Kmer::with_many_both_pos(self.k, seq, |pos, x, y| {
self.index.entry(x.clone()).or_insert(Vec::new()).push(pos);
self.index.entry(y.clone()).or_insert(Vec::new()).push(pos);
});
}
pub fn find(&self, x: &Kmer) -> &[usize] {
match self.index.get(x) {
None => &self.empty,
Some(positions) => &positions,
}
}
pub fn find_locations<S>(&self, seq: &S) -> HashMap<i64, usize>
where
S: AsRef<[u8]>,
{
let mut res = HashMap::new();
Kmer::with_many_both_pos(self.k, seq, |pos, x, _y| {
let p = pos as i64;
for q in self.find(x) {
let r = (*q as i64) - p;
*res.entry(r).or_insert(0) += 1;
}
});
res
}
}
pub struct SimpleSeqPosIndex {
k: usize,
name_vector: Vec<String>,
index: HashMap<Kmer,Vec<(usize,usize)>>,
empty: Vec<(usize, usize)>
}
impl SimpleSeqPosIndex {
pub fn new(k: usize) -> SimpleSeqPosIndex {
SimpleSeqPosIndex { k, name_vector: Vec::new(), index: HashMap::new(), empty: Vec::new() }
}
pub fn names(&self) -> &[String] {
&self.name_vector
}
pub fn add_seq<S>(&mut self, name: &str, seq: &S)
where
S: AsRef<[u8]>,
{
let n = self.name_vector.len();
self.name_vector.push(name.to_string());
Kmer::with_many_both_pos(self.k, seq, |pos, x, _y| {
self.index.entry(x.clone()).or_insert(Vec::new()).push((n,pos));
});
}
pub fn add_seq_both<S>(&mut self, name: &str, seq: &S)
where
S: AsRef<[u8]>,
{
let n = self.name_vector.len();
self.name_vector.push(name.to_string());
Kmer::with_many_both_pos(self.k, seq, |pos, x, y| {
self.index.entry(x.clone()).or_insert(Vec::new()).push((n, pos));
self.index.entry(y.clone()).or_insert(Vec::new()).push((n, pos));
});
}
pub fn find(&self, x: &Kmer) -> &[(usize,usize)] {
match self.index.get(x) {
None => &self.empty,
Some(positions) => &positions,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_1() {
let k = 11;
let mut idx = SimplePosIndex::new(k);
{
let seq = "CCACGACACATCATAGTCAAATTTCTAAAAATTAAGGACCACACAAACACCTTAAAAACAGAGAAAGAAAAATGACACATTACCTATACTGCAAAAACAATTCAAATGACAAAGTATTATTCATTGGAAACCACAGAGTTCAGAATGAAG";
idx.add_seq(&seq);
}
let seq = "CACCTTAAAAACAGAGAAAGAAAAAATGACACATTACCTATACTGCAAAAACAA";
let hits = idx.find_locations(&seq);
assert_eq!(hits.len(), 2);
assert_eq!(hits.get(&46), Some(&23));
assert_eq!(hits.get(&47), Some(&15));
}
#[test]
fn test_2() {
let k = 11;
let mut idx = SimplePosIndex::new(k);
{
let seq = "CCACGACACATCATAGTCAAATTTCTAAAAATTAAGGACCACACAAACACCTTAAAAACAGAGAAAGAAAAATGACACATTACCTATACTGCAAAAACAATTCAAATGACAAAGTATTATTCATTGGAAACCACAGAGTTCAGAATGAAG";
idx.add_seq_both(&seq);
}
let seq = "CACCTTAAAAACAGAGAAAGAAAAAATGACACATTACCTATACTGCAAAAACAA";
let hits = idx.find_locations(&seq);
assert_eq!(hits.len(), 2);
assert_eq!(hits.get(&46), Some(&23));
assert_eq!(hits.get(&47), Some(&15));
}
}