scirs2_optimize/constrained/
trust_constr.rs

1//! Trust-region algorithm for constrained optimization
2
3use crate::constrained::{Constraint, ConstraintFn, ConstraintKind, Options};
4use crate::error::{OptimizeError, OptimizeResult};
5use crate::result::OptimizeResults;
6use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1};
7
8#[allow(clippy::many_single_char_names)]
9pub fn minimize_trust_constr<F, S>(
10    func: F,
11    x0: &ArrayBase<S, Ix1>,
12    constraints: &[Constraint<ConstraintFn>],
13    options: &Options,
14) -> OptimizeResult<OptimizeResults<f64>>
15where
16    F: Fn(&[f64]) -> f64,
17    S: Data<Elem = f64>,
18{
19    // Get options or use defaults
20    let ftol = options.ftol.unwrap_or(1e-8);
21    let gtol = options.gtol.unwrap_or(1e-8);
22    let ctol = options.ctol.unwrap_or(1e-8);
23    let maxiter = options.maxiter.unwrap_or(100 * x0.len());
24    let eps = options.eps.unwrap_or(1e-8);
25
26    // Initialize variables
27    let n = x0.len();
28    let mut x = x0.to_owned();
29    let mut f = func(x.as_slice().unwrap());
30    let mut nfev = 1;
31
32    // Initialize the Lagrange multipliers
33    let mut lambda = Array1::zeros(constraints.len());
34
35    // Calculate initial gradient using finite differences
36    let mut g = Array1::zeros(n);
37    for i in 0..n {
38        let mut x_h = x.clone();
39        x_h[i] += eps;
40        let f_h = func(x_h.as_slice().unwrap());
41        g[i] = (f_h - f) / eps;
42        nfev += 1;
43    }
44
45    // Evaluate initial constraints
46    let mut c = Array1::zeros(constraints.len());
47    for (i, constraint) in constraints.iter().enumerate() {
48        if !constraint.is_bounds() {
49            let val = (constraint.fun)(x.as_slice().unwrap());
50
51            match constraint.kind {
52                ConstraintKind::Inequality => {
53                    c[i] = val; // g(x) >= 0 constraint
54                }
55                ConstraintKind::Equality => {
56                    // For simplicity, we don't fully support equality constraints yet
57                    return Err(OptimizeError::NotImplementedError(
58                        "Equality constraints not fully implemented in Trust-Region yet"
59                            .to_string(),
60                    ));
61                }
62            }
63        }
64    }
65
66    // Calculate constraint Jacobian
67    let mut a = Array2::zeros((constraints.len(), n));
68    for (i, constraint) in constraints.iter().enumerate() {
69        if !constraint.is_bounds() {
70            for j in 0..n {
71                let mut x_h = x.clone();
72                x_h[j] += eps;
73                let c_h = (constraint.fun)(x_h.as_slice().unwrap());
74                a[[i, j]] = (c_h - c[i]) / eps;
75                nfev += 1;
76            }
77        }
78    }
79
80    // Initialize trust region radius
81    let mut delta = 1.0;
82
83    // Initialize approximation of the Hessian of the Lagrangian
84    let mut b = Array2::eye(n);
85
86    // Main optimization loop
87    let mut iter = 0;
88
89    while iter < maxiter {
90        // Check constraint violation
91        let mut max_constraint_violation = 0.0;
92        for (i, &ci) in c.iter().enumerate() {
93            if constraints[i].kind == ConstraintKind::Inequality && ci < -ctol {
94                max_constraint_violation = f64::max(max_constraint_violation, -ci);
95            }
96        }
97
98        // Check convergence on gradient and constraints
99        if g.iter().all(|&gi| gi.abs() < gtol) && max_constraint_violation < ctol {
100            break;
101        }
102
103        // Compute the Lagrangian gradient
104        let mut lag_grad = g.clone();
105        for (i, &li) in lambda.iter().enumerate() {
106            if li > 0.0 || c[i] < ctol {
107                // Active or violated constraint
108                for j in 0..n {
109                    // L = f - sum(lambda_i * c_i) for inequality constraints
110                    // So gradient of L is grad(f) - sum(lambda_i * grad(c_i))
111                    lag_grad[j] -= li * a[[i, j]];
112                }
113            }
114        }
115
116        // Compute the step using a trust-region approach
117        // Solve the constrained quadratic subproblem:
118        // min 0.5 * p^T B p + g^T p  s.t. ||p|| <= delta and linearized constraints
119
120        let (p, predicted_reduction) =
121            compute_trust_region_step_constrained(&lag_grad, &b, &a, &c, delta, constraints, ctol);
122
123        // Try the step
124        let x_new = &x + &p;
125
126        // Evaluate function and constraints at new point
127        let f_new = func(x_new.as_slice().unwrap());
128        nfev += 1;
129
130        let mut c_new = Array1::zeros(constraints.len());
131        for (i, constraint) in constraints.iter().enumerate() {
132            if !constraint.is_bounds() {
133                c_new[i] = (constraint.fun)(x_new.as_slice().unwrap());
134                nfev += 1;
135            }
136        }
137
138        // Compute change in merit function (includes constraint violation)
139        let mut merit = f;
140        let mut merit_new = f_new;
141
142        // Add constraint violation penalty
143        let penalty = 10.0; // Simple fixed penalty parameter
144        for (i, &ci) in c.iter().enumerate() {
145            if constraints[i].kind == ConstraintKind::Inequality {
146                merit += penalty * f64::max(0.0, -ci);
147                merit_new += penalty * f64::max(0.0, -c_new[i]);
148            }
149        }
150
151        // Compute actual reduction in merit function
152        let actual_reduction = merit - merit_new;
153
154        // Compute ratio of actual to predicted reduction
155        let rho = if predicted_reduction > 0.0 {
156            actual_reduction / predicted_reduction
157        } else {
158            0.0
159        };
160
161        // Update trust region radius based on the quality of the step
162        if rho < 0.25 {
163            delta *= 0.5;
164        } else if rho > 0.75 && p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt() >= 0.9 * delta {
165            delta *= 2.0;
166        }
167
168        // Accept or reject the step
169        if rho > 0.1 {
170            // Accept the step
171            x = x_new;
172            f = f_new;
173            c = c_new;
174
175            // Check convergence on function value
176            if (merit - merit_new).abs() < ftol * (1.0 + merit.abs()) {
177                break;
178            }
179
180            // Compute new gradient
181            let mut g_new = Array1::zeros(n);
182            for i in 0..n {
183                let mut x_h = x.clone();
184                x_h[i] += eps;
185                let f_h = func(x_h.as_slice().unwrap());
186                g_new[i] = (f_h - f) / eps;
187                nfev += 1;
188            }
189
190            // Compute new constraint Jacobian
191            let mut a_new = Array2::zeros((constraints.len(), n));
192            for (i, constraint) in constraints.iter().enumerate() {
193                if !constraint.is_bounds() {
194                    for j in 0..n {
195                        let mut x_h = x.clone();
196                        x_h[j] += eps;
197                        let c_h = (constraint.fun)(x_h.as_slice().unwrap());
198                        a_new[[i, j]] = (c_h - c[i]) / eps;
199                        nfev += 1;
200                    }
201                }
202            }
203
204            // Update Lagrange multipliers using projected gradient method
205            for (i, constraint) in constraints.iter().enumerate() {
206                if constraint.kind == ConstraintKind::Inequality {
207                    if c[i] < ctol {
208                        // Active or violated constraint
209                        // Increase multiplier if constraint is violated
210                        lambda[i] = f64::max(0.0, lambda[i] - c[i] * penalty);
211                    } else {
212                        // Decrease multiplier towards zero
213                        lambda[i] = f64::max(0.0, lambda[i] - 0.1 * lambda[i]);
214                    }
215                }
216            }
217
218            // Update Hessian approximation using BFGS or SR1
219            let s = &p;
220            let y = &g_new - &g;
221
222            // Simple BFGS update for the Hessian approximation
223            let s_dot_y = s.dot(&y);
224            if s_dot_y > 1e-10 {
225                let s_col = s.clone().insert_axis(Axis(1));
226                let s_row = s.clone().insert_axis(Axis(0));
227
228                let bs = b.dot(s);
229                let bs_col = bs.clone().insert_axis(Axis(1));
230                let bs_row = bs.clone().insert_axis(Axis(0));
231
232                let term1 = s_dot_y + s.dot(&bs);
233                let term2 = &s_col.dot(&s_row) * (term1 / (s_dot_y * s_dot_y));
234
235                let term3 = &bs_col.dot(&s_row) + &s_col.dot(&bs_row);
236
237                b = &b + &term2 - &(&term3 / s_dot_y);
238            }
239
240            // Update variables for next iteration
241            g = g_new;
242            a = a_new;
243        }
244
245        iter += 1;
246    }
247
248    // Prepare constraint values for the result
249    let mut c_result = Array1::zeros(constraints.len());
250    for (i, constraint) in constraints.iter().enumerate() {
251        if !constraint.is_bounds() {
252            c_result[i] = c[i];
253        }
254    }
255
256    // Create and return result
257    let mut result = OptimizeResults::default();
258    result.x = x;
259    result.fun = f;
260    result.jac = Some(g.into_raw_vec_and_offset().0);
261    result.constr = Some(c_result);
262    result.nfev = nfev;
263    result.nit = iter;
264    result.success = iter < maxiter;
265
266    if result.success {
267        result.message = "Optimization terminated successfully.".to_string();
268    } else {
269        result.message = "Maximum iterations reached.".to_string();
270    }
271
272    Ok(result)
273}
274
275/// Compute a trust-region step for constrained optimization
276#[allow(clippy::many_single_char_names)]
277fn compute_trust_region_step_constrained(
278    g: &Array1<f64>,
279    b: &Array2<f64>,
280    a: &Array2<f64>,
281    c: &Array1<f64>,
282    delta: f64,
283    constraints: &[Constraint<ConstraintFn>],
284    ctol: f64,
285) -> (Array1<f64>, f64) {
286    let n = g.len();
287    let n_constr = constraints.len();
288
289    // Compute the unconstrained Cauchy point (steepest descent direction)
290    let p_unconstrained = compute_unconstrained_cauchy_point(g, b, delta);
291
292    // Check if unconstrained Cauchy point satisfies linearized constraints
293    let mut constraint_violated = false;
294    for i in 0..n_constr {
295        if constraints[i].kind == ConstraintKind::Inequality {
296            let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p_unconstrained[j]).sum::<f64>();
297            if c[i] + grad_c_dot_p < -ctol {
298                constraint_violated = true;
299                break;
300            }
301        }
302    }
303
304    // If unconstrained point is feasible, use it
305    if !constraint_violated {
306        // Compute predicted reduction
307        let g_dot_p = g.dot(&p_unconstrained);
308        let bp = b.dot(&p_unconstrained);
309        let p_dot_bp = p_unconstrained.dot(&bp);
310        let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
311
312        return (p_unconstrained, predicted_reduction);
313    }
314
315    // Otherwise, project onto the linearized feasible region
316    // This is a simplified approach - in practice, you would solve a CQP
317
318    // Start with the steepest descent direction
319    let mut p = Array1::zeros(n);
320    for i in 0..n {
321        p[i] = -g[i];
322    }
323
324    // Normalize to trust region radius
325    let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
326    if p_norm > 1e-10 {
327        p = &p * (delta / p_norm);
328    }
329
330    // Project onto each constraint
331    for _iter in 0..5 {
332        // Limited iterations for projection
333        let mut max_viol = 0.0;
334        let mut most_violated = 0;
335
336        // Find most violated constraint
337        for i in 0..n_constr {
338            if constraints[i].kind == ConstraintKind::Inequality {
339                let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p[j]).sum::<f64>();
340                let viol = -(c[i] + grad_c_dot_p);
341                if viol > max_viol {
342                    max_viol = viol;
343                    most_violated = i;
344                }
345            }
346        }
347
348        if max_viol < ctol {
349            break;
350        }
351
352        // Project p onto the constraint
353        let mut a_norm_sq = 0.0;
354        for j in 0..n {
355            a_norm_sq += a[[most_violated, j]] * a[[most_violated, j]];
356        }
357
358        if a_norm_sq > 1e-10 {
359            let grad_c_dot_p = (0..n).map(|j| a[[most_violated, j]] * p[j]).sum::<f64>();
360            let proj_dist = (c[most_violated] + grad_c_dot_p) / a_norm_sq;
361
362            // Project p
363            for j in 0..n {
364                p[j] += a[[most_violated, j]] * proj_dist;
365            }
366
367            // Rescale to trust region
368            let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
369            if p_norm > delta {
370                p = &p * (delta / p_norm);
371            }
372        }
373    }
374
375    // Compute predicted reduction
376    let g_dot_p = g.dot(&p);
377    let bp = b.dot(&p);
378    let p_dot_bp = p.dot(&bp);
379    let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
380
381    (p, predicted_reduction)
382}
383
384/// Compute the unconstrained Cauchy point (steepest descent to trust region boundary)
385fn compute_unconstrained_cauchy_point(g: &Array1<f64>, b: &Array2<f64>, delta: f64) -> Array1<f64> {
386    let n = g.len();
387
388    // Compute the steepest descent direction: -g
389    let mut p = Array1::zeros(n);
390    for i in 0..n {
391        p[i] = -g[i];
392    }
393
394    // Compute g^T B g and ||g||^2
395    let bg = b.dot(g);
396    let g_dot_bg = g.dot(&bg);
397    let g_norm_sq = g.dot(g);
398
399    // Check if the gradient is practically zero
400    if g_norm_sq < 1e-10 {
401        // If gradient is practically zero, don't move
402        return Array1::zeros(n);
403    }
404
405    // Compute tau (step to the boundary)
406    let tau = if g_dot_bg <= 0.0 {
407        // Negative curvature or zero curvature case
408        delta / g_norm_sq.sqrt()
409    } else {
410        // Positive curvature case
411        f64::min(delta / g_norm_sq.sqrt(), g_norm_sq / g_dot_bg)
412    };
413
414    // Scale the direction by tau
415    for i in 0..n {
416        p[i] *= tau;
417    }
418
419    p
420}