linfa_reduction/diffusion_map/
algorithms.rs1#[cfg(not(feature = "blas"))]
10use linfa_linalg::{
11 eigh::*,
12 lobpcg::{self, LobpcgResult, Order as TruncatedOrder},
13};
14use ndarray::{Array1, Array2};
15#[cfg(feature = "blas")]
16use ndarray_linalg::{eigh::EighInto, lobpcg, lobpcg::LobpcgResult, Scalar, TruncatedOrder, UPLO};
17use ndarray_rand::{rand_distr::Uniform, RandomExt};
18
19use linfa::dataset::{WithLapack, WithoutLapack};
20use linfa::{traits::Transformer, Float};
21use linfa_kernel::Kernel;
22use rand::{prelude::SmallRng, SeedableRng};
23
24use super::hyperparams::DiffusionMapValidParams;
25
26#[derive(Debug, Clone, PartialEq)]
64pub struct DiffusionMap<F> {
65 embedding: Array2<F>,
66 eigvals: Array1<F>,
67}
68
69impl<'a, F: Float> Transformer<&'a Kernel<F>, DiffusionMap<F>> for DiffusionMapValidParams {
70 fn transform(&self, kernel: &'a Kernel<F>) -> DiffusionMap<F> {
80 let (embedding, eigvals) =
82 compute_diffusion_map(kernel, self.steps(), 0.0, self.embedding_size(), None);
83
84 DiffusionMap { embedding, eigvals }
85 }
86}
87
88impl<F: Float> DiffusionMap<F> {
89 pub fn estimate_clusters(&self) -> usize {
91 let mean = self.eigvals.sum() / F::cast(self.eigvals.len());
92 self.eigvals.iter().filter(|x| *x > &mean).count() + 1
93 }
94
95 pub fn eigvals(&self) -> &Array1<F> {
97 &self.eigvals
98 }
99
100 pub fn embedding(&self) -> &Array2<F> {
102 &self.embedding
103 }
104}
105
106#[allow(unused)]
107fn compute_diffusion_map<F: Float>(
108 kernel: &Kernel<F>,
109 steps: usize,
110 alpha: f32,
111 embedding_size: usize,
112 guess: Option<Array2<F>>,
113) -> (Array2<F>, Array1<F>) {
114 assert!(embedding_size < kernel.size());
115
116 let d = kernel.sum().mapv(|x| x.recip());
117
118 let (vals, vecs) = if kernel.size() < 5 * embedding_size + 1 {
119 let mut matrix = kernel.dot(&Array2::from_diag(&d).view());
121 matrix
122 .columns_mut()
123 .into_iter()
124 .zip(d.iter())
125 .for_each(|(mut a, b)| a *= *b);
126
127 let matrix = matrix.with_lapack();
128 #[cfg(feature = "blas")]
130 let (vals, vecs) = {
131 let (vals, vecs) = matrix.eigh_into(UPLO::Lower).unwrap();
132 (
133 vals.slice_move(s![..; -1]).mapv(Scalar::from_real),
134 vecs.slice_move(s![.., ..; -1]),
135 )
136 };
137 #[cfg(not(feature = "blas"))]
138 let (vals, vecs) = matrix.eigh_into().unwrap().sort_eig_desc();
139 (
140 vals.slice_move(s![1..=embedding_size]),
141 vecs.slice_move(s![.., 1..=embedding_size]),
142 )
143 } else {
144 let d2 = d.mapv(|x| x.powf(F::cast(0.5 + alpha)));
145 let x = guess
147 .unwrap_or_else(|| {
148 Array2::random_using(
149 (kernel.size(), embedding_size + 1),
150 Uniform::new(0.0f64, 1.0),
151 &mut SmallRng::seed_from_u64(31),
152 )
153 .mapv(F::cast)
154 })
155 .with_lapack();
156
157 let result = lobpcg::lobpcg(
158 |y| {
159 let mut y = y.to_owned().without_lapack();
160 y.rows_mut()
161 .into_iter()
162 .zip(d2.iter())
163 .for_each(|(mut a, b)| a *= *b);
164 let mut y = kernel.dot(&y.view());
165
166 y.rows_mut()
167 .into_iter()
168 .zip(d2.iter())
169 .for_each(|(mut a, b)| a *= *b);
170
171 y.with_lapack()
172 },
173 x,
174 |_| {},
175 None,
176 1e-7,
177 200,
178 TruncatedOrder::Largest,
179 );
180
181 let (vals, vecs) = match result {
182 #[cfg(feature = "blas")]
183 LobpcgResult::Ok(vals, vecs, _) | LobpcgResult::Err(vals, vecs, _, _) => (vals, vecs),
184 #[cfg(not(feature = "blas"))]
185 LobpcgResult::Ok(lobpcg) | LobpcgResult::Err((_, Some(lobpcg))) => {
186 (lobpcg.eigvals, lobpcg.eigvecs)
187 }
188 _ => panic!("Eigendecomposition failed!"),
189 };
190
191 (vals.slice_move(s![1..]), vecs.slice_move(s![.., 1..]))
193 };
194
195 let (vals, mut vecs): (Array1<F>, _) = (vals.without_lapack(), vecs.without_lapack());
196 let d = d.mapv(|x| x.sqrt());
197
198 for (mut col, val) in vecs.rows_mut().into_iter().zip(d.iter()) {
199 col *= *val;
200 }
201
202 let steps = F::cast(steps);
203 for (mut vec, val) in vecs.columns_mut().into_iter().zip(vals.iter()) {
204 vec *= val.powf(steps);
205 }
206
207 (vecs, vals)
208}