use std::collections::HashMap;
use std::io::BufRead;
#[derive(Debug)]
pub struct ScoringMatrix<T> {
size: Option<usize>,
same: Option<T>,
missing: Option<T>,
data: HashMap<(usize, usize), T>,
}
impl<T> ScoringMatrix<T>
where
T: Default + Copy,
{
pub fn new() -> Self {
ScoringMatrix {
size: None,
same: None,
missing: None,
data: HashMap::new(),
}
}
pub fn with_defaults(same: T, missing: T) -> Self {
ScoringMatrix {
size: None,
same: Some(same),
missing: Some(missing),
data: HashMap::new(),
}
}
pub fn with_size_and_defaults(size: usize, same: T, missing: T) -> Self {
ScoringMatrix {
size: Some(size),
same: Some(same),
missing: Some(missing),
data: HashMap::new(),
}
}
pub fn with_size(size: usize) -> Self {
ScoringMatrix {
size: Some(size),
same: None,
missing: None,
data: HashMap::new(),
}
}
pub fn size(&self) -> usize {
self.size.unwrap_or_else(|| {
self.data
.keys()
.map(|&(i, j)| i.max(j) + 1)
.max()
.unwrap_or(0)
})
}
pub fn set_size(&mut self, size: usize) {
self.size = Some(size);
}
pub fn set(&mut self, row: usize, col: usize, value: T) {
if row <= col {
self.data.insert((row, col), value);
} else {
self.data.insert((col, row), value);
}
}
pub fn get(&self, row: usize, col: usize) -> T {
if row == col {
self.data
.get(&(row, col))
.copied()
.unwrap_or_else(|| self.same.unwrap_or(T::default()))
} else {
let (r, c) = if row < col { (row, col) } else { (col, row) };
self.data
.get(&(r, c))
.copied()
.unwrap_or_else(|| self.missing.unwrap_or(T::default()))
}
}
}
impl ScoringMatrix<f32> {
pub fn from_pair_scores(infile: &str, same: f32, missing: f32) -> (Self, Vec<String>) {
let mut names = indexmap::IndexSet::new();
let mut matrix = Self::with_defaults(same, missing);
let reader = crate::reader(infile);
for line in reader.lines().map_while(Result::ok) {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() >= 3 {
let n1 = fields[0].to_string();
let n2 = fields[1].to_string();
let score = fields[2].parse().unwrap();
names.insert(n1.clone());
names.insert(n2.clone());
matrix.set(
names.get_index_of(&n1).unwrap(),
names.get_index_of(&n2).unwrap(),
score,
);
}
}
matrix.set_size(names.len());
(matrix, names.into_iter().collect())
}
}
#[derive(Debug)]
pub struct NamedMatrix {
size: usize,
names: indexmap::IndexMap<String, usize>,
values: Vec<f32>,
}
impl NamedMatrix {
pub fn new(names: Vec<String>) -> Self {
let size = names.len();
let values = vec![f32::default(); size * size];
let names: indexmap::IndexMap<_, _> = names
.into_iter()
.enumerate()
.map(|(i, name)| (name, i))
.collect();
NamedMatrix {
size,
names,
values,
}
}
pub fn size(&self) -> usize {
self.size
}
pub fn get(&self, row: usize, col: usize) -> f32 {
self.values[row * self.size + col]
}
pub fn set(&mut self, row: usize, col: usize, value: f32) {
self.values[row * self.size + col] = value;
if row != col {
self.values[col * self.size + row] = value;
}
}
pub fn get_names(&self) -> Vec<&String> {
self.names.keys().collect()
}
pub fn get_by_name(&self, name1: &str, name2: &str) -> Option<f32> {
let i = self.names.get(name1)?;
let j = self.names.get(name2)?;
Some(self.get(*i, *j))
}
pub fn set_by_name(&mut self, name1: &str, name2: &str, value: f32) -> Result<(), String> {
match (self.names.get(name1), self.names.get(name2)) {
(Some(&i), Some(&j)) => {
self.set(i, j, value);
Ok(())
}
(None, _) => Err(format!("Name not found: {}", name1)),
(_, None) => Err(format!("Name not found: {}", name2)),
}
}
pub fn from_pair_scores(infile: &str, same: f32, missing: f32) -> Self {
let (scoring_matrix, index_name) = ScoringMatrix::from_pair_scores(infile, same, missing);
let size = index_name.len();
let mut values = vec![f32::default(); size * size];
for i in 0..size {
for j in 0..size {
values[i * size + j] = scoring_matrix.get(i, j);
}
}
let names: indexmap::IndexMap<_, _> = index_name
.into_iter()
.enumerate()
.map(|(i, name)| (name, i))
.collect();
Self {
size,
names,
values,
}
}
pub fn from_relaxed_phylip(infile: &str) -> Self {
let mut names = Vec::new();
let mut values = Vec::new();
let reader = crate::reader(infile);
let mut lines = reader.lines();
if let Some(Ok(line)) = lines.next() {
if line.trim().parse::<usize>().is_err() {
Self::process_phylip_line(&line, &mut names, &mut values);
}
}
for line in lines.map_while(Result::ok) {
Self::process_phylip_line(&line, &mut names, &mut values);
}
let size = names.len();
let mut matrix = Self::new(names);
for i in 0..size {
for j in 0..=i {
let value = values[i * (i + 1) / 2 + j];
matrix.set(i, j, value);
}
}
matrix
}
fn process_phylip_line(line: &str, names: &mut Vec<String>, values: &mut Vec<f32>) {
let parts: Vec<&str> = line.trim().split_whitespace().collect();
if !parts.is_empty() {
let name = parts[0].to_string();
names.push(name);
let distances: Vec<f32> = parts[1..=names.len()]
.iter()
.map(|&s| s.parse().unwrap())
.collect();
values.extend(distances);
}
}
}