1use nalgebra::{DMatrix, DVector};
4use runmat_builtins::{StructValue, Value};
5use runmat_macros::runtime_builtin;
6
7use crate::builtins::common::spec::{
8 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
9 ReductionNaN, ResidencyPolicy, ShapeRequirements,
10};
11use crate::builtins::math::optim::common::{
12 call_function, initial_guess, optim_error, option_f64, option_string, option_usize,
13 value_to_real_vector, vector_to_value,
14};
15use crate::builtins::math::optim::type_resolvers::nonlinear_solve_type;
16use crate::BuiltinResult;
17
18const NAME: &str = "fsolve";
19const DEFAULT_TOL_X: f64 = 1.0e-6;
20const DEFAULT_TOL_FUN: f64 = 1.0e-6;
21const DEFAULT_MAX_ITER: usize = 400;
22
23#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::math::optim::fsolve")]
24pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
25 name: "fsolve",
26 op_kind: GpuOpKind::Custom("nonlinear-solve"),
27 supported_precisions: &[],
28 broadcast: BroadcastSemantics::None,
29 provider_hooks: &[],
30 constant_strategy: ConstantStrategy::InlineLiteral,
31 residency: ResidencyPolicy::GatherImmediately,
32 nan_mode: ReductionNaN::Include,
33 two_pass_threshold: None,
34 workgroup_size: None,
35 accepts_nan_mode: false,
36 notes: "Host finite-difference Levenberg-Marquardt solver. Callback computations may use GPU-aware builtins.",
37};
38
39#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::math::optim::fsolve")]
40pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
41 name: "fsolve",
42 shape: ShapeRequirements::Any,
43 constant_strategy: ConstantStrategy::InlineLiteral,
44 elementwise: None,
45 reduction: None,
46 emits_nan: false,
47 notes: "Nonlinear solving repeatedly invokes user code and terminates fusion planning.",
48};
49
50#[runtime_builtin(
51 name = "fsolve",
52 category = "math/optim",
53 summary = "Solve a scalar or vector nonlinear equation system using finite-difference Levenberg-Marquardt iterations.",
54 keywords = "fsolve,nonlinear solve,root finding,levenberg-marquardt,jacobian",
55 accel = "sink",
56 type_resolver(nonlinear_solve_type),
57 builtin_path = "crate::builtins::math::optim::fsolve"
58)]
59async fn fsolve_builtin(function: Value, x0: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
60 if rest.len() > 1 {
61 return Err(optim_error(NAME, "fsolve: too many input arguments"));
62 }
63 let options = parse_options(rest.first())?;
64 let opts = FsolveOptions::from_struct(options.as_ref())?;
65 let guess = initial_guess(NAME, x0).await?;
66 let solution = solve(&function, guess.values, &guess.shape, guess.scalar, &opts).await?;
67 vector_to_value(NAME, solution, &guess.shape, guess.scalar)
68}
69
70fn parse_options(value: Option<&Value>) -> BuiltinResult<Option<StructValue>> {
71 match value {
72 None => Ok(None),
73 Some(Value::Struct(options)) => Ok(Some(options.clone())),
74 Some(other) => Err(optim_error(
75 NAME,
76 format!("fsolve: options must be a struct, got {other:?}"),
77 )),
78 }
79}
80
81#[derive(Clone, Copy)]
82struct FsolveOptions {
83 tol_x: f64,
84 tol_fun: f64,
85 max_iter: usize,
86 max_fun_evals: usize,
87}
88
89impl FsolveOptions {
90 fn from_struct(options: Option<&StructValue>) -> BuiltinResult<Self> {
91 let display = option_string(options, "Display", "off")?;
92 if !matches!(display.as_str(), "off" | "none" | "final" | "iter") {
93 return Err(optim_error(
94 NAME,
95 "fsolve: option Display must be 'off', 'none', 'final', or 'iter'",
96 ));
97 }
98 let tol_x = option_f64(NAME, options, "TolX", DEFAULT_TOL_X)?;
99 let tol_fun = option_f64(NAME, options, "TolFun", DEFAULT_TOL_FUN)?;
100 if tol_x <= 0.0 || tol_fun <= 0.0 {
101 return Err(optim_error(
102 NAME,
103 "fsolve: options TolX and TolFun must be positive",
104 ));
105 }
106 let max_iter = option_usize(NAME, options, "MaxIter", DEFAULT_MAX_ITER)?.max(1);
107 let max_fun_evals = option_usize(NAME, options, "MaxFunEvals", 100 * max_iter)?.max(1);
108 Ok(Self {
109 tol_x,
110 tol_fun,
111 max_iter,
112 max_fun_evals,
113 })
114 }
115}
116
117async fn solve(
118 function: &Value,
119 mut x: Vec<f64>,
120 shape: &[usize],
121 scalar: bool,
122 options: &FsolveOptions,
123) -> BuiltinResult<Vec<f64>> {
124 let n = x.len();
125 if n == 0 {
126 return Err(optim_error(NAME, "fsolve: initial guess cannot be empty"));
127 }
128
129 let mut residual = eval_residual(function, &x, shape, scalar).await?;
130 let mut evals = 1usize;
131 let mut lambda = 1.0e-3;
132
133 if residual_norm_inf(&residual) <= options.tol_fun {
134 return Ok(x);
135 }
136
137 for _ in 0..options.max_iter {
138 if evals >= options.max_fun_evals {
139 return Err(optim_error(
140 NAME,
141 "fsolve: exceeded maximum function evaluations",
142 ));
143 }
144 let jacobian =
145 finite_difference_jacobian(function, &x, shape, scalar, &residual, &mut evals, options)
146 .await?;
147 let j = DMatrix::from_row_slice(residual.len(), n, &jacobian);
148 let f = DVector::from_column_slice(&residual);
149 let gradient = j.transpose() * &f;
150 let mut accepted = false;
151
152 for _ in 0..8 {
153 let normal = j.transpose() * &j + DMatrix::<f64>::identity(n, n) * lambda;
154 let rhs = -&gradient;
155 let Some(delta) = normal.lu().solve(&rhs) else {
156 lambda *= 10.0;
157 continue;
158 };
159 let trial = x
160 .iter()
161 .zip(delta.iter())
162 .map(|(xi, di)| xi + di)
163 .collect::<Vec<_>>();
164 let trial_residual = eval_residual(function, &trial, shape, scalar).await?;
165 evals += 1;
166
167 if norm2(&trial_residual) < norm2(&residual) {
168 let step_norm = delta
169 .iter()
170 .fold(0.0_f64, |acc, value| acc.max(value.abs()));
171 let x_norm = x.iter().fold(0.0_f64, |acc, value| acc.max(value.abs()));
172 x = trial;
173 residual = trial_residual;
174 lambda = (lambda * 0.3).max(1.0e-12);
175 accepted = true;
176 if residual_norm_inf(&residual) <= options.tol_fun
177 || step_norm <= options.tol_x * (1.0 + x_norm)
178 {
179 return Ok(x);
180 }
181 break;
182 }
183
184 lambda *= 10.0;
185 if evals >= options.max_fun_evals {
186 return Err(optim_error(
187 NAME,
188 "fsolve: exceeded maximum function evaluations",
189 ));
190 }
191 }
192
193 if !accepted {
194 return Err(optim_error(
195 NAME,
196 "fsolve: iteration stalled before convergence",
197 ));
198 }
199 }
200
201 Err(optim_error(NAME, "fsolve: exceeded maximum iterations"))
202}
203
204async fn eval_residual(
205 function: &Value,
206 x: &[f64],
207 shape: &[usize],
208 scalar: bool,
209) -> BuiltinResult<Vec<f64>> {
210 let arg = if scalar {
211 Value::Num(x[0])
212 } else {
213 Value::Tensor(
214 runmat_builtins::Tensor::new(x.to_vec(), shape.to_vec())
215 .map_err(|e| optim_error(NAME, format!("fsolve: {e}")))?,
216 )
217 };
218 let value = call_function(function, vec![arg]).await?;
219 let residual = value_to_real_vector(NAME, value).await?;
220 if residual.is_empty() {
221 Err(optim_error(
222 NAME,
223 "fsolve: function value must not be empty",
224 ))
225 } else {
226 Ok(residual)
227 }
228}
229
230async fn finite_difference_jacobian(
231 function: &Value,
232 x: &[f64],
233 shape: &[usize],
234 scalar: bool,
235 residual: &[f64],
236 evals: &mut usize,
237 options: &FsolveOptions,
238) -> BuiltinResult<Vec<f64>> {
239 let m = residual.len();
240 let n = x.len();
241 let mut jacobian = vec![0.0; m * n];
242
243 for col in 0..n {
244 if *evals >= options.max_fun_evals {
245 return Err(optim_error(
246 NAME,
247 "fsolve: exceeded maximum function evaluations",
248 ));
249 }
250 let mut perturbed = x.to_vec();
251 let step = f64::EPSILON.sqrt() * (x[col].abs() + 1.0);
252 perturbed[col] += step;
253 let next = eval_residual(function, &perturbed, shape, scalar).await?;
254 *evals += 1;
255 if next.len() != m {
256 return Err(optim_error(
257 NAME,
258 "fsolve: function output size changed during finite differencing",
259 ));
260 }
261 for row in 0..m {
262 jacobian[row * n + col] = (next[row] - residual[row]) / step;
263 }
264 }
265
266 Ok(jacobian)
267}
268
269fn norm2(values: &[f64]) -> f64 {
270 values.iter().map(|value| value * value).sum::<f64>().sqrt()
271}
272
273fn residual_norm_inf(values: &[f64]) -> f64 {
274 values
275 .iter()
276 .fold(0.0_f64, |acc, value| acc.max(value.abs()))
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use futures::executor::block_on;
283 use runmat_builtins::Tensor;
284 use std::sync::{Arc, Mutex};
285
286 #[test]
287 fn fsolve_scalar_builtin_handle() {
288 let root = block_on(fsolve_builtin(
289 Value::FunctionHandle("sin".into()),
290 Value::Num(3.0),
291 Vec::new(),
292 ))
293 .unwrap();
294 match root {
295 Value::Num(n) => assert!((n - std::f64::consts::PI).abs() < 1.0e-5),
296 other => panic!("unexpected value {other:?}"),
297 }
298 }
299
300 #[test]
301 fn fsolve_vector_system_via_named_user_invoker() {
302 let _guard = crate::user_functions::install_user_function_invoker(Some(
303 std::sync::Arc::new(|_name, args| {
304 let x = match &args[0] {
305 Value::Tensor(t) => t.data.clone(),
306 _ => panic!("expected tensor input"),
307 };
308 Box::pin(async move {
309 Ok(Value::Tensor(
310 Tensor::new(
311 vec![x[0] * x[0] + x[1] * x[1] - 4.0, x[0] * x[1] - 1.0],
312 vec![2, 1],
313 )
314 .unwrap(),
315 ))
316 })
317 }),
318 ));
319 let x0 = Tensor::new(vec![1.0, 1.0], vec![2, 1]).unwrap();
320 let root = block_on(fsolve_builtin(
321 Value::FunctionHandle("system".into()),
322 Value::Tensor(x0),
323 Vec::new(),
324 ))
325 .unwrap();
326 match root {
327 Value::Tensor(t) => {
328 assert!((t.data[0] * t.data[0] + t.data[1] * t.data[1] - 4.0).abs() < 1.0e-5);
329 assert!((t.data[0] * t.data[1] - 1.0).abs() < 1.0e-5);
330 }
331 other => panic!("unexpected value {other:?}"),
332 }
333 }
334
335 #[test]
336 fn fsolve_preserves_row_vector_shape_for_callback() {
337 let seen_shapes = Arc::new(Mutex::new(Vec::new()));
338 let seen_shapes_for_invoker = Arc::clone(&seen_shapes);
339 let _guard = crate::user_functions::install_user_function_invoker(Some(Arc::new(
340 move |_name, args| {
341 let (x, shape) = match &args[0] {
342 Value::Tensor(t) => (t.data.clone(), t.shape.clone()),
343 other => panic!("expected tensor input, got {other:?}"),
344 };
345 assert_eq!(shape, vec![1, 2]);
346 seen_shapes_for_invoker.lock().unwrap().push(shape.clone());
347 Box::pin(async move {
348 Ok(Value::Tensor(
349 Tensor::new(vec![x[0] - 3.0, x[1] - 4.0], shape).unwrap(),
350 ))
351 })
352 },
353 )));
354 let x0 = Tensor::new(vec![0.0, 0.0], vec![1, 2]).unwrap();
355 let root = block_on(fsolve_builtin(
356 Value::FunctionHandle("row_system".into()),
357 Value::Tensor(x0),
358 Vec::new(),
359 ))
360 .unwrap();
361 match root {
362 Value::Tensor(t) => {
363 assert_eq!(t.shape, vec![1, 2]);
364 assert!((t.data[0] - 3.0).abs() < 1.0e-5);
365 assert!((t.data[1] - 4.0).abs() < 1.0e-5);
366 }
367 other => panic!("unexpected value {other:?}"),
368 }
369 assert!(!seen_shapes.lock().unwrap().is_empty());
370 }
371
372 #[test]
373 fn fsolve_preserves_matrix_shape_for_callback() {
374 let seen_shapes = Arc::new(Mutex::new(Vec::new()));
375 let seen_shapes_for_invoker = Arc::clone(&seen_shapes);
376 let _guard = crate::user_functions::install_user_function_invoker(Some(Arc::new(
377 move |_name, args| {
378 let (x, shape) = match &args[0] {
379 Value::Tensor(t) => (t.data.clone(), t.shape.clone()),
380 other => panic!("expected tensor input, got {other:?}"),
381 };
382 assert_eq!(shape, vec![2, 2]);
383 seen_shapes_for_invoker.lock().unwrap().push(shape.clone());
384 Box::pin(async move {
385 Ok(Value::Tensor(
386 Tensor::new(vec![x[0] - 1.0, x[1] - 2.0, x[2] - 3.0, x[3] - 4.0], shape)
387 .unwrap(),
388 ))
389 })
390 },
391 )));
392 let x0 = Tensor::new(vec![0.0, 0.0, 0.0, 0.0], vec![2, 2]).unwrap();
393 let root = block_on(fsolve_builtin(
394 Value::FunctionHandle("matrix_system".into()),
395 Value::Tensor(x0),
396 Vec::new(),
397 ))
398 .unwrap();
399 match root {
400 Value::Tensor(t) => {
401 assert_eq!(t.shape, vec![2, 2]);
402 assert!((t.data[0] - 1.0).abs() < 1.0e-5);
403 assert!((t.data[1] - 2.0).abs() < 1.0e-5);
404 assert!((t.data[2] - 3.0).abs() < 1.0e-5);
405 assert!((t.data[3] - 4.0).abs() < 1.0e-5);
406 }
407 other => panic!("unexpected value {other:?}"),
408 }
409 assert!(!seen_shapes.lock().unwrap().is_empty());
410 }
411}