extern crate byteorder;
extern crate csv as ext_csv;
extern crate flate2;
extern crate num;
extern crate num_traits;
extern crate sprs;
pub mod csv;
pub mod eds;
pub mod file_names;
pub mod iter;
pub mod mtx;
pub mod utils;
use sprs::num_matrixmarket::Displayable;
use sprs::CsMat;
use std::error::Error;
use std::fmt;
use std::path::{Path, PathBuf};
use trait_set::trait_set;
trait_set! {
pub trait MatrixValueTrait = sprs::num_matrixmarket::MatrixMarketRead + sprs::num_matrixmarket::MatrixMarketConjugate + sprs::num_kinds::PrimitiveKind + std::ops::Add;
}
#[derive(PartialEq)]
pub struct SingleCellExperiment<T: MatrixValueTrait>
where
for<'n> Displayable<&'n T>: std::fmt::Display,
{
counts: CsMat<T>,
rows: Vec<String>,
cols: Vec<String>,
}
pub enum MatrixKind {
CSV,
EDS,
MTX,
}
impl<T: MatrixValueTrait> fmt::Debug for SingleCellExperiment<T>
where
for<'n> Displayable<&'n T>: std::fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SingleCellExperiment")
.field("matrix (#rows, #columns)", &self.shape())
.finish()
}
}
impl<T: MatrixValueTrait> SingleCellExperiment<T>
where
for<'n> Displayable<&'n T>: std::fmt::Display,
{
pub fn cols(&self) -> usize {
self.counts.cols()
}
pub fn rows(&self) -> usize {
self.counts.rows()
}
pub fn shape(&self) -> (usize, usize) {
(self.counts.rows(), self.counts.cols())
}
pub fn nnz(&self) -> usize {
self.counts.nnz()
}
pub fn row_names(&self) -> &Vec<String> {
&self.rows
}
pub fn get_row_name(&self, index: usize) -> Option<&String> {
if self.rows() <= index {
return None;
}
Some(&self.rows[index])
}
pub fn col_names(&self) -> &Vec<String> {
&self.cols
}
pub fn counts(&self) -> &CsMat<T> {
&self.counts
}
pub fn transpose_into(self) -> SingleCellExperiment<T> {
let rows = self.row_names().to_owned();
let cols = self.col_names().to_owned();
SingleCellExperiment {
counts: self.counts.transpose_into(),
rows,
cols,
}
}
pub fn from_csr(
counts: CsMat<T>,
rows: Vec<String>,
cols: Vec<String>,
) -> Result<SingleCellExperiment<T>, String> {
if rows.len() != counts.rows() {
return Err("Number of rows in the matrix doesn't match the row names".to_owned());
}
if cols.len() != counts.cols() {
return Err(
"Number of columns in the matrix doesn't match the column names".to_owned(),
);
}
Ok(SingleCellExperiment { counts, rows, cols })
}
pub fn from_csv(
file_path: &str,
rows: Vec<String>,
cols: Vec<String>,
) -> Result<SingleCellExperiment<T>, Box<dyn Error>>
where
T: std::str::FromStr + num::Num + Clone,
{
let file = Path::new(file_path);
let counts: CsMat<T> = csv::reader(file, rows.len(), cols.len())?;
Ok(SingleCellExperiment { counts, rows, cols })
}
pub fn to_mtx(&self, file_path: &str) -> Result<(), Box<dyn Error>>
where
T: std::fmt::Display + Copy + sprs::num_kinds::PrimitiveKind,
{
let file = Path::new(file_path);
mtx::writer(file, self.counts())
}
pub fn to_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>>
where
T: Copy + num::traits::Zero + std::fmt::Display,
{
let file = Path::new(file_path);
csv::writer(file, self.counts())
}
}
impl<T: MatrixValueTrait> SingleCellExperiment<T>
where
T: std::ops::Neg<Output = T>,
for<'n> Displayable<&'n T>: std::fmt::Display,
{
pub fn from_mtx(
file_path: &str,
rows: Vec<String>,
cols: Vec<String>,
) -> Result<SingleCellExperiment<T>, Box<dyn Error>>
where
T: Clone + num_traits::Num + num_traits::NumCast + std::ops::Neg<Output = T>,
{
let file = Path::new(file_path);
let counts: CsMat<T> = mtx::reader(file)?;
Ok(SingleCellExperiment { counts, rows, cols })
}
pub fn from_tenx_v2(cr_out: PathBuf) -> Result<SingleCellExperiment<T>, Box<dyn Error>>
where
T: Copy + num::Num + num::NumCast,
{
let file_names = file_names::MatFileNames::tenx_v2(cr_out)?;
let feature_names = utils::read_features(file_names.column_file())?;
let cellbarcode_names = utils::read_features(file_names.row_file())?;
SingleCellExperiment::from_mtx(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)
}
pub fn from_tenx_v3(cr_out: PathBuf) -> Result<SingleCellExperiment<T>, Box<dyn Error>>
where
T: Copy + num::Num + num::NumCast,
{
let file_names = file_names::MatFileNames::tenx_v2(cr_out)?;
let feature_names = utils::read_compressed_features(file_names.column_file())?;
let cellbarcode_names = utils::read_compressed_features(file_names.row_file())?;
SingleCellExperiment::from_mtx(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)
}
}
impl SingleCellExperiment<f32> {
pub fn from_eds(
file_path: &str,
rows: Vec<String>,
cols: Vec<String>,
) -> Result<SingleCellExperiment<f32>, Box<dyn Error>> {
let file = Path::new(file_path);
let counts: CsMat<f32> = eds::reader(file, rows.len(), cols.len())?;
Ok(SingleCellExperiment { counts, rows, cols })
}
pub fn to_eds(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
let file = Path::new(file_path);
eds::writer(file, self.counts())
}
pub fn from_alevin(alv_out: PathBuf) -> Result<SingleCellExperiment<f32>, Box<dyn Error>> {
let file_names = file_names::MatFileNames::from_alevin(alv_out)?;
let feature_names = utils::read_features(file_names.column_file())?;
let cellbarcode_names = utils::read_features(file_names.row_file())?;
SingleCellExperiment::from_eds(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)
}
pub fn from_mat_file_names(
file_names: file_names::MatFileNames,
is_features_compressed: bool,
matrix_kind: MatrixKind,
) -> Result<SingleCellExperiment<f32>, Box<dyn Error>> {
let (feature_names, cellbarcode_names) = match is_features_compressed {
true => (
utils::read_compressed_features(file_names.column_file())?,
utils::read_compressed_features(file_names.row_file())?,
),
false => (
utils::read_features(file_names.column_file())?,
utils::read_features(file_names.row_file())?,
),
};
let matrix = match matrix_kind {
MatrixKind::CSV => SingleCellExperiment::from_csv(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)?,
MatrixKind::MTX => SingleCellExperiment::from_mtx(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)?,
MatrixKind::EDS => SingleCellExperiment::from_eds(
file_names.matrix_file().to_str().unwrap(),
cellbarcode_names,
feature_names,
)?,
};
Ok(matrix)
}
}
#[cfg(test)]
mod tests {
use super::SingleCellExperiment;
use sprs::CsMat;
fn get_test_matrix_f32() -> CsMat<f32> {
CsMat::new(
(3, 3),
vec![0, 2, 4, 5],
vec![0, 1, 0, 2, 2],
vec![1.2, 2.3, 3.4, 4.5, 5.6],
)
}
fn get_test_sce_f32_data() -> (CsMat<f32>, Vec<String>, Vec<String>) {
let a = get_test_matrix_f32();
let b: Vec<String> = vec!["1", "2", "3"]
.into_iter()
.map(|x| x.to_string())
.collect();
let c: Vec<String> = vec!["4", "5", "6"]
.into_iter()
.map(|x| x.to_string())
.collect();
(a, b, c)
}
#[test]
fn test_single_cell_experiment() {
let (a, b, c) = get_test_sce_f32_data();
let sce = match SingleCellExperiment::from_csr(a.clone(), b.clone(), c.clone()) {
Ok(x) => x,
Err(y) => panic!("ERROR: {}", y),
};
assert_eq!(sce.cols(), 3);
assert_eq!(sce.rows(), 3);
assert_eq!(sce.row_names().to_owned(), b);
assert_eq!(sce.col_names().to_owned(), c);
assert_eq!(sce.shape(), (3, 3));
assert_eq!(sce.nnz(), 5);
assert_eq!(sce.counts().clone(), get_test_matrix_f32());
let sce_t = match SingleCellExperiment::from_csr(a.transpose_into(), b.clone(), c.clone()) {
Ok(x) => x,
Err(y) => panic!("ERROR: {}", y),
};
assert_eq!(sce.transpose_into(), sce_t);
}
}