newton_sos/problem.rs
1//! Defines the optimization problem structure, as well as methods for computing
2//! the features matrix and kernel matrix.
3
4use std::fmt::Debug;
5
6use faer::{Side, linalg::solvers::LltError, prelude::*};
7use rayon::prelude::*;
8
9#[derive(Debug, Clone, Copy)]
10/// Enum representing the natively supported kernel types.
11///
12/// The following kernels are supported:
13/// - Laplacian kernel with bandwidth parameter $\sigma$, defined as:
14/// $$k(x, y) = \exp\left(-\frac{||x - y||_2}{\sigma}\right)$$
15/// - Gaussian kernel with bandwidth parameter $\sigma$, defined as:
16/// $$k(x, y) = \exp\left(-\frac{||x - y||_2^2}{2 \sigma^2}\right)$$
17pub enum Kernel {
18 /// Laplacian kernel with the specified bandwidth parameter.
19 Laplacian(f64),
20 /// Gaussian kernel with the specified bandwidth parameter.
21 Gaussian(f64),
22}
23
24/// Represents errors that can occur during problem setup and initialization if the `Problem` struct.
25pub enum ProblemError {
26 /// Indicates that an invalid parameter was provided.
27 InvalidParameter(String),
28 /// Indicates that the kernel matrix has already been initialized.
29 KernelAlreadyInitialized,
30 /// Wraps a faer LLT decomposition error.
31 FaerLltError(LltError),
32 /// Raised when Phi is requested before $K$ has been initialized.
33 KernelNotInitialized,
34}
35
36impl Debug for ProblemError {
37 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38 match self {
39 ProblemError::InvalidParameter(msg) => write!(f, "Invalid parameter: {}", msg),
40 ProblemError::KernelAlreadyInitialized => {
41 write!(f, "Kernel has already been initialized")
42 }
43 ProblemError::FaerLltError(e) => write!(f, "LLT decomposition error: {:?}", e),
44 ProblemError::KernelNotInitialized => write!(
45 f,
46 "Kernel matrix not initialized. Please call Problem::initialize_native_kernel before Problem::compute_phi."
47 ),
48 }
49 }
50}
51
52#[allow(non_snake_case)]
53#[derive(Debug, Clone)]
54/// Represents an instance of the optimization problem.
55///
56/// The problem is defined as:
57/// $$\max_{c\in\mathbb{R}, B \in \mathbb{S}^n_+} c - \lambda \text{Tr}(B) + t \log \det (B) \qquad \text{s.t. }\quad f_i - c = \Phi_i^T B \Phi_i, \\:\\:\forall i\in[\\![1, N]\\!]$$
58/// where:
59/// - $\lambda$ is the trace penalty,
60/// - $t$ is the relative precision, with $t = \varepsilon / n$,
61/// - $x_i$ are the sample points,
62/// - $f_i$ are the function values at the sample points,
63/// - $\Phi$ is the features matrix derived from the kernel matrix $K$,
64/// - $K$ is the kernel matrix computed from the sample points using a specified kernel function.
65///
66/// In practice, $K$ only is computed first, and $\Phi$ is optionally computed later.
67pub struct Problem {
68 /// Trace penalty
69 pub(crate) lambda: f64,
70 /// Relative precision ($\varepsilon / n$)
71 pub(crate) t: f64,
72 /// Sample points
73 pub(crate) x_samples: Mat<f64>,
74 /// Function values at the samples
75 pub(crate) f_samples: Mat<f64>,
76 /// Features matrix (columns of the Cholesky factor $R$ of the kernel matrix $K$)
77 pub(crate) phi: Option<Mat<f64>>,
78 /// Kernel matrix $K$
79 pub(crate) K: Option<Mat<f64>>,
80}
81
82impl Problem {
83 /// Creates a new problem instance from samples and parameters.
84 ///
85 /// The features matrix $\Phi$ and kernel matrix $K$ are not computed at this stage.
86 ///
87 /// # Arguments
88 /// * `lambda` - Trace penalty parameter.
89 /// * `t` - Relative precision parameter.
90 /// * `x_samples` - Sample points matrix of shape (n, d).
91 /// * `f_samples` - Function values at the sample points of shape (n, 1).
92 ///
93 /// **Note**: this function does not compute the kernel matrix $K$ or the features matrix $\Phi$.
94 /// To compute them, please call `initialize_native_kernel` and `compute_phi` respectively.
95 pub fn new(
96 lambda: f64,
97 t: f64,
98 x_samples: Mat<f64>,
99 f_samples: Mat<f64>,
100 ) -> Result<Self, ProblemError> {
101 if x_samples.nrows() != f_samples.nrows() {
102 return Err(ProblemError::InvalidParameter(format!(
103 "Number of x_samples ({}) must match number of f_samples ({}).",
104 x_samples.nrows(),
105 f_samples.nrows()
106 )));
107 }
108 if f_samples.ncols() != 1 {
109 return Err(ProblemError::InvalidParameter(format!(
110 "f_samples must be a column vector (got {} columns).",
111 f_samples.ncols()
112 )));
113 }
114 if x_samples.nrows() == 0 {
115 return Err(ProblemError::InvalidParameter(format!(
116 "Number of samples must be greater than zero (got {}).",
117 x_samples.nrows()
118 )));
119 }
120 if lambda < 0.0 {
121 return Err(ProblemError::InvalidParameter(format!(
122 "Lambda must be non-negative (got {}).",
123 lambda
124 )));
125 }
126 if t <= 0.0 {
127 return Err(ProblemError::InvalidParameter(format!(
128 "t must be positive (got {}).",
129 t
130 )));
131 }
132
133 Ok(Self {
134 lambda,
135 t,
136 x_samples,
137 f_samples,
138 phi: None,
139 K: None,
140 })
141 }
142
143 /// Initializes the kernel matrix $K$ using the specified native kernel.
144 ///
145 /// This method computes the kernel matrix based on the provided kernel type and its parameters,
146 /// and then derives the features matrix from the Cholesky decomposition of the kernel matrix.
147 ///
148 /// # Arguments
149 /// * `kernel` - The kernel type and its associated parameter (see [`Kernel`] enum).
150 ///
151 /// # Errors
152 /// Returns `ProblemError::KernelAlreadyInitialized` if the kernel has already been initialized.
153 /// Returns a faer variant of `ProblemError` if there is an error during the Cholesky decomposition.
154 pub fn initialize_native_kernel(&mut self, kernel: Kernel) -> Result<(), ProblemError> {
155 if self.K.is_some() || self.phi.is_some() {
156 return Err(ProblemError::KernelAlreadyInitialized);
157 }
158
159 // Verify kernel parameters
160 match kernel {
161 Kernel::Laplacian(sigma) | Kernel::Gaussian(sigma) => {
162 if sigma <= 0.0 {
163 return Err(ProblemError::InvalidParameter(format!(
164 "Kernel bandwidth parameter sigma must be positive (got {}).",
165 sigma
166 )));
167 }
168 }
169 }
170
171 let n_samples = self.x_samples.nrows();
172 let x_samples = &self.x_samples;
173
174 // Compute the kernel matrix using the selected kernel type
175 let mut kernel_matrix = Mat::<f64>::zeros(n_samples, n_samples);
176
177 match kernel {
178 Kernel::Laplacian(sigma) => {
179 kernel_matrix
180 .par_col_iter_mut()
181 .enumerate()
182 .for_each(|(j, col)| {
183 col.par_iter_mut().enumerate().for_each(|(i, val)| {
184 let diff = x_samples.row(i) - x_samples.row(j);
185 *val = (-diff.norm_l2() / sigma).exp();
186 });
187 });
188 }
189 Kernel::Gaussian(sigma) => {
190 kernel_matrix
191 .par_col_iter_mut()
192 .enumerate()
193 .for_each(|(j, col)| {
194 col.par_iter_mut().enumerate().for_each(|(i, val)| {
195 let diff = x_samples.row(i) - x_samples.row(j);
196 *val = (-diff.norm_l2().powi(2) / (2.0 * sigma.powi(2))).exp();
197 });
198 });
199 }
200 }
201
202 self.K = Some(kernel_matrix);
203
204 Ok(())
205 }
206
207 /// Computes the features matrix $\Phi$ from the kernel matrix $K$ using Cholesky decomposition.
208 ///
209 /// This function must be called after `initialize_native_kernel`.
210 #[allow(non_snake_case)]
211 pub fn compute_phi(&mut self) -> Result<(), ProblemError> {
212 // If phi is already computed, return early
213 if self.phi.is_some() {
214 return Ok(());
215 }
216
217 let K = match &self.K {
218 Some(K) => K,
219 None => return Err(ProblemError::KernelNotInitialized),
220 };
221 let llt = K.llt(Side::Lower).map_err(ProblemError::FaerLltError)?;
222 let r = llt.L();
223 self.phi = Some(r.transpose().to_owned());
224 // TODO: implement other decompositions (LDLT, ...)
225
226 Ok(())
227 }
228}
229
230#[cfg(test)]
231mod tests {
232 use super::*;
233
234 #[test]
235 fn test_problem_initialization_gaussian() {
236 let n = 10;
237 let x_samples = Mat::<f64>::from_fn(n, 20, |i, j| (i + j) as f64);
238 let f_samples = Mat::<f64>::from_fn(n, 1, |i, _| i as f64);
239
240 let problem = Problem::new(1.0, 1.0, x_samples, f_samples);
241 assert!(problem.is_ok());
242 let result = problem
243 .unwrap()
244 .initialize_native_kernel(Kernel::Gaussian(1.0));
245 assert!(result.is_ok());
246 }
247
248 #[test]
249 fn test_problem_initialization_laplacian() {
250 let n = 10;
251 let x_samples = Mat::<f64>::from_fn(n, 20, |i, j| (i + j) as f64);
252 let f_samples = Mat::<f64>::from_fn(n, 1, |i, _| i as f64);
253
254 let problem = Problem::new(1.0, 1.0, x_samples, f_samples);
255 assert!(problem.is_ok());
256 let result = problem
257 .unwrap()
258 .initialize_native_kernel(Kernel::Laplacian(1.0));
259 assert!(result.is_ok());
260 }
261
262 #[test]
263 fn test_problem_f_non_real() {
264 let n = 10;
265 let x_samples = Mat::<f64>::from_fn(n, 20, |i, j| (i + j) as f64);
266 let f_samples = Mat::<f64>::from_fn(n, 30, |i, _| i as f64);
267
268 let problem = Problem::new(1.0, 1.0, x_samples, f_samples);
269 assert!(problem.is_err());
270 }
271}