mod dmatrix;
use std::{
fs::{self, File},
io::{BufRead, BufReader, Write},
ops::{Deref, DerefMut},
path::{Path, PathBuf},
};
mod error;
use dmatrix::DMatrix;
pub use error::Error;
pub type Return<T> = std::result::Result<T, Error>;
use methylome::{MethylationSite, MethylationStatus};
use ndarray::{Array2, ArrayView};
#[derive(Clone, Debug)]
struct Node {
id: usize,
file: PathBuf,
name: String,
generation: u32,
meth: bool,
proportion_unmethylated: Option<f64>,
rc_meth_lvl: Option<f64>,
sites: Option<Vec<MethylationSite>>, }
#[derive(Debug)]
struct Edge<'a> {
from: &'a Node,
to: &'a Node,
}
#[derive(Clone, Debug)]
pub struct Pedigree(Array2<f64>);
type UnmethylatedLevel = f64;
impl Pedigree {
pub fn from_file(filename: &str) -> Self {
let mut pedigree = Array2::<f64>::zeros((0, 4));
let file = std::fs::read_to_string(filename).unwrap();
file.split('\n').skip(1).for_each(|line| {
if line.is_empty() {
return;
}
let mut entries = line.split(' ');
let row = [
entries.next().unwrap().parse::<f64>().unwrap(),
entries.next().unwrap().parse::<f64>().unwrap(),
entries.next().unwrap().parse::<f64>().unwrap(),
entries.next().unwrap().parse::<f64>().unwrap(),
];
pedigree.push_row(ArrayView::from(&row)).unwrap();
});
Pedigree(pedigree)
}
pub fn to_file<P>(&self, path: P) -> std::io::Result<()>
where
P: AsRef<Path> + std::fmt::Debug,
{
println!("Writing pedigree to file: {:?}", path);
let mut file = File::create(path)?;
let mut content = String::new();
content += "time0\ttime1\ttime2\tD.value\n";
for row in self.rows() {
content.push_str(&format!("{}\t{}\t{}\t{}\n", row[0], row[1], row[2], row[3]));
}
file.write_all(content.as_bytes())
}
pub fn build<P>(
nodelist: P,
edgelist: P,
posterior_max_filter: f64,
) -> Return<(Self, UnmethylatedLevel)>
where
P: AsRef<Path>,
{
let nodes = fs::read_to_string(nodelist)?;
let edges = fs::read_to_string(edgelist)?;
let nodes: Vec<Node> = nodes
.split(['\n', '\r'])
.skip(1)
.enumerate()
.filter_map(|(i, line)| {
let mut entries = line.split([',', '\t', ' ']);
Some(Node {
id: i,
file: PathBuf::from(entries.next()?),
name: String::from(entries.next()?),
generation: entries.next()?.parse::<u32>().ok()?,
meth: entries.next()? == "Y",
proportion_unmethylated: None,
rc_meth_lvl: None,
sites: None,
})
})
.collect();
if nodes.is_empty() {
return Err(Error::NodeParsing);
}
let edges: Vec<Edge> = edges
.split(['\n', '\r'])
.skip(1)
.filter_map(|line| {
let mut entries = line.split(['\t', ' ', ',']);
let from = entries.next()?;
let to = entries.next()?;
Some(Edge {
from: nodes.iter().find(|n| n.name == from)?,
to: nodes.iter().find(|n| n.name == to)?,
})
})
.collect();
let mut nodes: Vec<Node> = nodes.iter().filter(|n| n.meth).cloned().collect();
let pb = progress_bars::Progress::new("Loading Methylation Data", nodes.len());
for node in nodes.iter_mut() {
pb.inc(1);
let f = File::open(&node.file).map_err(Error::NodeFile)?;
let reader = BufReader::new(f);
let mut sites = Vec::new();
for line in reader.lines() {
let line = line.expect("Could not read line");
let methylation = MethylationSite::from_methylome_file_line(&line, false);
if methylation.is_none() {
continue;
}
let methylation = methylation.unwrap();
sites.push(methylation);
}
let valid_sites: Vec<&MethylationSite> = sites
.iter()
.filter(|s| s.posteriormax >= posterior_max_filter)
.collect();
let p_uu_count = valid_sites
.iter()
.filter(|s| s.status == MethylationStatus::U)
.count();
let p_uu_share = p_uu_count as f64 / valid_sites.len() as f64;
let avg_meth_lvl =
valid_sites.iter().map(|s| s.meth_lvl).sum::<f64>() / valid_sites.len() as f64;
node.proportion_unmethylated = Some(p_uu_share);
node.rc_meth_lvl = Some(avg_meth_lvl);
node.sites = Some(sites);
}
pb.finish();
let tmp0uu_meth_lvl = nodes
.iter()
.map(|n| 1.0 - n.rc_meth_lvl.unwrap())
.sum::<f64>()
/ nodes.len() as f64;
let divergence = DMatrix::from(&nodes, posterior_max_filter);
let pedigree = divergence.convert(&nodes, &edges);
Ok((pedigree, tmp0uu_meth_lvl))
}
}
impl Deref for Pedigree {
type Target = Array2<f64>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Pedigree {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn build_pedigree() {
let nodelist = Path::new("./data/nodelist.txt");
let edgelist = Path::new("./data/edgelist.txt");
let pedigree = Pedigree::build(nodelist, edgelist, 0.99).expect("Could not build pedigree");
assert_eq!(pedigree.0.shape(), &[4 * 3 / 2, 4]);
pedigree
.0
.to_file(Path::new("./data/pedigree_generated.txt"))
.unwrap();
}
}