single_utilities/types/
mod.rs

1use crate::utils::validate_net;
2use std::collections::HashMap;
3use std::hash::Hash;
4
5/// Represents the direction of operations in matrix or array computations.
6///
7/// This enum is used to specify whether operations should be performed
8/// along rows or columns of a data structure.
9pub enum Direction {
10    /// Operations performed along columns (vertical direction)
11    COLUMN,
12    /// Operations performed along rows (horizontal direction)
13    ROW,
14}
15
16impl Clone for Direction {
17    fn clone(&self) -> Self {
18        match self {
19            Self::ROW => Self::ROW,
20            Self::COLUMN => Self::COLUMN,
21        }
22    }
23}
24
25impl Direction {
26    /// Checks if the direction is row-wise.
27    ///
28    /// # Returns
29    /// `true` if the direction is `ROW`, `false` if it's `COLUMN`
30    pub fn is_row(&self) -> bool {
31        match self {
32            Self::ROW => true,
33            Self::COLUMN => false,
34        }
35    }
36}
37
38/// A trait for types that can serve as batch identifiers.
39///
40/// This trait is used to identify and group data in batch processing operations.
41/// Types implementing this trait must be cloneable, comparable for equality,
42/// and hashable for efficient lookup operations.
43pub trait BatchIdentifier: Clone + Eq + Hash {}
44
45// Implement BatchIdentifier for common types
46impl BatchIdentifier for String {}
47impl BatchIdentifier for &str {}
48impl BatchIdentifier for i32 {}
49impl BatchIdentifier for u32 {}
50impl BatchIdentifier for usize {}
51
52/// Enumeration of distance metrics for mathematical computations.
53///
54/// This enum defines common distance metrics used in machine learning,
55/// clustering, and similarity calculations. Each variant represents
56/// a different approach to measuring the distance between points or vectors.
57#[derive(Debug, Clone, Copy)]
58pub enum DistanceMetric {
59    /// Euclidean distance (L2 norm) - straight-line distance between points
60    Euclidean,
61    /// Manhattan distance (L1 norm) - sum of absolute differences along each dimension
62    Manhattan,
63    /// Cosine distance - measures the cosine of the angle between vectors
64    Cosine,
65}
66
67pub struct PathwayNetwork {
68    names: Vec<String>,  // name of pathways
69    starts: Vec<usize>,  // start of pathway
70    offsets: Vec<usize>, // length of pathway
71    cnct: Vec<usize>,    // gene index of pathway
72    weights: Vec<f32>,   // weight of each gene in the pathway
73}
74
75impl PathwayNetwork {
76    pub fn new(
77        names: Vec<String>,
78        starts: Vec<usize>,
79        offsets: Vec<usize>,
80        cnct: Vec<usize>,
81        weights: Vec<f32>,
82    ) -> Self {
83        Self {
84            names,
85            starts,
86            offsets,
87            cnct,
88            weights,
89        }
90    }
91
92    pub fn new_wo_weights(
93        names: Vec<String>,
94        starts: Vec<usize>,
95        offsets: Vec<usize>,
96        cnct: Vec<usize>,
97    ) -> Self {
98        let weights = vec![1f32; cnct.len()];
99        Self {
100            names,
101            starts,
102            offsets,
103            cnct,
104            weights,
105        }
106    }
107
108    pub fn new_from_vec(
109        sources: Vec<String>,
110        targets: Vec<String>,
111        weights: Option<Vec<f32>>,
112        features: Vec<String>,
113        tmin: u32,
114    ) -> Self {
115        let res = validate_net(sources, targets, weights, false).unwrap();
116        let tmin = tmin as usize;
117        let filtered: HashMap<String, Vec<(String, f32)>> = res
118            .into_iter()
119            .filter_map(|(k, v)| if v.len() >= tmin { Some((k, v)) } else { None })
120            .collect();
121
122        let name_to_id: HashMap<String, usize> = features
123            .iter()
124            .enumerate()
125            .map(|(idx, name)| (name.clone(), idx))
126            .collect();
127
128        let total_lengths = filtered.values().fold(0usize, |v, a| v + a.len());
129        let num_pathways = filtered.len();
130
131        let mut names: Vec<String> = Vec::with_capacity(num_pathways);
132        let mut starts: Vec<usize> = Vec::with_capacity(num_pathways);
133        let mut offsets: Vec<usize> = Vec::with_capacity(num_pathways);
134        let mut cnct: Vec<usize> = Vec::with_capacity(total_lengths);
135        let mut weights_vec: Vec<f32> = Vec::with_capacity(total_lengths);
136
137        let mut i = 0usize;
138
139        for (k, v) in filtered.into_iter() {
140            let len = v.len();
141
142            for (g_name, g_weight) in v {
143                let g_idx = name_to_id.get(&g_name).unwrap();
144                cnct.push(*g_idx);
145                weights_vec.push(g_weight);
146            }
147
148            names.push(k);
149            starts.push(i);
150            offsets.push(len);
151            i += len;
152        }
153
154        Self {
155            names,
156            starts,
157            offsets,
158            cnct,
159            weights: weights_vec,
160        }
161    }
162
163    pub fn get_pathway_name(&self, idx: usize) -> &str {
164        self.names[idx].as_str()
165    }
166
167    pub fn get_pathway_features(&self, idx: usize) -> &[usize] {
168        let srt = self.starts[idx];
169        let off = srt + self.offsets[idx];
170        &self.cnct[srt..off]
171    }
172
173    pub fn get_pathway_features_and_weights(&self, idx: usize) -> (&[usize], &[f32]) {
174        let srt = self.starts[idx];
175        let off = srt + self.offsets[idx];
176        (&self.cnct[srt..off], &self.weights[srt..off])
177    }
178
179    pub fn get_num_pathways(&self) -> usize {
180        self.names.len()
181    }
182}