1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
use flate2::read::MultiGzDecoder;
use io_utils::{open_for_read, read_maybe_unzipped};
use std::path::Path;
use std::{
fs::File,
io::{BufRead, BufReader},
};
use std::{i32, str, usize};
use string_utils::TextUtils;
pub fn load_feature_bc_matrix(
outs: impl AsRef<Path>,
features: &mut Vec<String>,
barcodes: &mut Vec<String>,
gex_sparse_matrix: &mut Vec<Vec<(i32, i32)>>,
) {
let outs = outs.as_ref();
let mut dir = outs.join("raw_feature_bc_matrix");
if dir.exists() {
dir.set_file_name("raw_gene_bc_matrices_mex");
}
dir.push("GRCh38");
if dir.exists() {
dir.push("genes.tsv.gz");
} else {
dir.pop();
dir.push("features.tsv.gz");
}
read_maybe_unzipped(&dir, features);
dir.set_file_name("barcodes.tsv.gz");
read_maybe_unzipped(&dir, barcodes);
dir.set_file_name("matrix.mtx.gz");
let mut matrix_file = dir;
if matrix_file.exists() {
let gz = MultiGzDecoder::new(File::open(&matrix_file).unwrap());
_load_feature_bc_matrix(BufReader::new(gz), barcodes, gex_sparse_matrix);
} else {
matrix_file.set_extension("");
_load_feature_bc_matrix(open_for_read![&matrix_file], barcodes, gex_sparse_matrix);
};
fn _load_feature_bc_matrix(
f: impl BufRead,
barcodes: &[String],
gex_sparse_matrix: &mut Vec<Vec<(i32, i32)>>,
) {
gex_sparse_matrix.resize_with(
gex_sparse_matrix.len() + barcodes.len(),
Vec::<(i32, i32)>::new,
);
for (line_num, line) in f.lines().enumerate() {
let s = line.unwrap();
if line_num > 2 {
let fields = s.splitn(4, ' ').collect::<Vec<&str>>();
let feature = fields[0].force_i32() - 1;
let bc = fields[1].force_i32() - 1;
let count = fields[2].force_i32();
gex_sparse_matrix[bc as usize].push((feature, count));
}
}
}
}