scirs2_optimize/constrained/
trust_constr.rs

1//! Trust-region algorithm for constrained optimization
2
3use crate::constrained::{Constraint, ConstraintFn, ConstraintKind, Options};
4use crate::error::{OptimizeError, OptimizeResult};
5use crate::result::OptimizeResults;
6use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1};
7
8pub fn minimize_trust_constr<F, S>(
9    func: F,
10    x0: &ArrayBase<S, Ix1>,
11    constraints: &[Constraint<ConstraintFn>],
12    options: &Options,
13) -> OptimizeResult<OptimizeResults<f64>>
14where
15    F: Fn(&[f64]) -> f64,
16    S: Data<Elem = f64>,
17{
18    // Get options or use defaults
19    let ftol = options.ftol.unwrap_or(1e-8);
20    let gtol = options.gtol.unwrap_or(1e-8);
21    let ctol = options.ctol.unwrap_or(1e-8);
22    let maxiter = options.maxiter.unwrap_or(100 * x0.len());
23    let eps = options.eps.unwrap_or(1e-8);
24
25    // Initialize variables
26    let n = x0.len();
27    let mut x = x0.to_owned();
28    let mut f = func(x.as_slice().unwrap());
29    let mut nfev = 1;
30
31    // Initialize the Lagrange multipliers
32    let mut lambda = Array1::zeros(constraints.len());
33
34    // Calculate initial gradient using finite differences
35    let mut g = Array1::zeros(n);
36    for i in 0..n {
37        let mut x_h = x.clone();
38        x_h[i] += eps;
39        let f_h = func(x_h.as_slice().unwrap());
40        g[i] = (f_h - f) / eps;
41        nfev += 1;
42    }
43
44    // Evaluate initial constraints
45    let mut c = Array1::zeros(constraints.len());
46    for (i, constraint) in constraints.iter().enumerate() {
47        if !constraint.is_bounds() {
48            let val = (constraint.fun)(x.as_slice().unwrap());
49
50            match constraint.kind {
51                ConstraintKind::Inequality => {
52                    c[i] = val; // g(x) >= 0 constraint
53                }
54                ConstraintKind::Equality => {
55                    // For simplicity, we don't fully support equality constraints yet
56                    return Err(OptimizeError::NotImplementedError(
57                        "Equality constraints not fully implemented in Trust-Region yet"
58                            .to_string(),
59                    ));
60                }
61            }
62        }
63    }
64
65    // Calculate constraint Jacobian
66    let mut a = Array2::zeros((constraints.len(), n));
67    for (i, constraint) in constraints.iter().enumerate() {
68        if !constraint.is_bounds() {
69            for j in 0..n {
70                let mut x_h = x.clone();
71                x_h[j] += eps;
72                let c_h = (constraint.fun)(x_h.as_slice().unwrap());
73                a[[i, j]] = (c_h - c[i]) / eps;
74                nfev += 1;
75            }
76        }
77    }
78
79    // Initialize trust region radius
80    let mut delta = 1.0;
81
82    // Initialize approximation of the Hessian of the Lagrangian
83    let mut b = Array2::eye(n);
84
85    // Main optimization loop
86    let mut iter = 0;
87
88    while iter < maxiter {
89        // Check constraint violation
90        let mut max_constraint_violation = 0.0;
91        for (i, &ci) in c.iter().enumerate() {
92            if constraints[i].kind == ConstraintKind::Inequality && ci < -ctol {
93                max_constraint_violation = f64::max(max_constraint_violation, -ci);
94            }
95        }
96
97        // Check convergence on gradient and constraints
98        if g.iter().all(|&gi| gi.abs() < gtol) && max_constraint_violation < ctol {
99            break;
100        }
101
102        // Compute the Lagrangian gradient
103        let mut lag_grad = g.clone();
104        for (i, &li) in lambda.iter().enumerate() {
105            if li > 0.0 || c[i] < ctol {
106                // Active or violated constraint
107                for j in 0..n {
108                    // L = f - sum(lambda_i * c_i) for inequality constraints
109                    // So gradient of L is grad(f) - sum(lambda_i * grad(c_i))
110                    lag_grad[j] -= li * a[[i, j]];
111                }
112            }
113        }
114
115        // Compute the step using a trust-region approach
116        // Solve the constrained quadratic subproblem:
117        // min 0.5 * p^T B p + g^T p  s.t. ||p|| <= delta and linearized constraints
118
119        let (p, predicted_reduction) =
120            compute_trust_region_step_constrained(&lag_grad, &b, &a, &c, delta, constraints, ctol);
121
122        // Try the step
123        let x_new = &x + &p;
124
125        // Evaluate function and constraints at new point
126        let f_new = func(x_new.as_slice().unwrap());
127        nfev += 1;
128
129        let mut c_new = Array1::zeros(constraints.len());
130        for (i, constraint) in constraints.iter().enumerate() {
131            if !constraint.is_bounds() {
132                c_new[i] = (constraint.fun)(x_new.as_slice().unwrap());
133                nfev += 1;
134            }
135        }
136
137        // Compute change in merit function (includes constraint violation)
138        let mut merit = f;
139        let mut merit_new = f_new;
140
141        // Add constraint violation penalty
142        let penalty = 10.0; // Simple fixed penalty parameter
143        for (i, &ci) in c.iter().enumerate() {
144            if constraints[i].kind == ConstraintKind::Inequality {
145                merit += penalty * f64::max(0.0, -ci);
146                merit_new += penalty * f64::max(0.0, -c_new[i]);
147            }
148        }
149
150        // Compute actual reduction in merit function
151        let actual_reduction = merit - merit_new;
152
153        // Compute ratio of actual to predicted reduction
154        let rho = if predicted_reduction > 0.0 {
155            actual_reduction / predicted_reduction
156        } else {
157            0.0
158        };
159
160        // Update trust region radius based on the quality of the step
161        if rho < 0.25 {
162            delta *= 0.5;
163        } else if rho > 0.75 && p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt() >= 0.9 * delta {
164            delta *= 2.0;
165        }
166
167        // Accept or reject the step
168        if rho > 0.1 {
169            // Accept the step
170            x = x_new;
171            f = f_new;
172            c = c_new;
173
174            // Check convergence on function value
175            if (merit - merit_new).abs() < ftol * (1.0 + merit.abs()) {
176                break;
177            }
178
179            // Compute new gradient
180            let mut g_new = Array1::zeros(n);
181            for i in 0..n {
182                let mut x_h = x.clone();
183                x_h[i] += eps;
184                let f_h = func(x_h.as_slice().unwrap());
185                g_new[i] = (f_h - f) / eps;
186                nfev += 1;
187            }
188
189            // Compute new constraint Jacobian
190            let mut a_new = Array2::zeros((constraints.len(), n));
191            for (i, constraint) in constraints.iter().enumerate() {
192                if !constraint.is_bounds() {
193                    for j in 0..n {
194                        let mut x_h = x.clone();
195                        x_h[j] += eps;
196                        let c_h = (constraint.fun)(x_h.as_slice().unwrap());
197                        a_new[[i, j]] = (c_h - c[i]) / eps;
198                        nfev += 1;
199                    }
200                }
201            }
202
203            // Update Lagrange multipliers using projected gradient method
204            for (i, constraint) in constraints.iter().enumerate() {
205                if constraint.kind == ConstraintKind::Inequality {
206                    if c[i] < ctol {
207                        // Active or violated constraint
208                        // Increase multiplier if constraint is violated
209                        lambda[i] = f64::max(0.0, lambda[i] - c[i] * penalty);
210                    } else {
211                        // Decrease multiplier towards zero
212                        lambda[i] = f64::max(0.0, lambda[i] - 0.1 * lambda[i]);
213                    }
214                }
215            }
216
217            // Update Hessian approximation using BFGS or SR1
218            let s = &p;
219            let y = &g_new - &g;
220
221            // Simple BFGS update for the Hessian approximation
222            let s_dot_y = s.dot(&y);
223            if s_dot_y > 1e-10 {
224                let s_col = s.clone().insert_axis(Axis(1));
225                let s_row = s.clone().insert_axis(Axis(0));
226
227                let bs = b.dot(s);
228                let bs_col = bs.clone().insert_axis(Axis(1));
229                let bs_row = bs.clone().insert_axis(Axis(0));
230
231                let term1 = s_dot_y + s.dot(&bs);
232                let term2 = &s_col.dot(&s_row) * (term1 / (s_dot_y * s_dot_y));
233
234                let term3 = &bs_col.dot(&s_row) + &s_col.dot(&bs_row);
235
236                b = &b + &term2 - &(&term3 / s_dot_y);
237            }
238
239            // Update variables for next iteration
240            g = g_new;
241            a = a_new;
242        }
243
244        iter += 1;
245    }
246
247    // Prepare constraint values for the result
248    let mut c_result = Array1::zeros(constraints.len());
249    for (i, constraint) in constraints.iter().enumerate() {
250        if !constraint.is_bounds() {
251            c_result[i] = c[i];
252        }
253    }
254
255    // Create and return result
256    let mut result = OptimizeResults::default();
257    result.x = x;
258    result.fun = f;
259    result.jac = Some(g.into_raw_vec_and_offset().0);
260    result.constr = Some(c_result);
261    result.nfev = nfev;
262    result.nit = iter;
263    result.success = iter < maxiter;
264
265    if result.success {
266        result.message = "Optimization terminated successfully.".to_string();
267    } else {
268        result.message = "Maximum iterations reached.".to_string();
269    }
270
271    Ok(result)
272}
273
274/// Compute a trust-region step for constrained optimization
275fn compute_trust_region_step_constrained(
276    g: &Array1<f64>,
277    b: &Array2<f64>,
278    a: &Array2<f64>,
279    c: &Array1<f64>,
280    delta: f64,
281    constraints: &[Constraint<ConstraintFn>],
282    ctol: f64,
283) -> (Array1<f64>, f64) {
284    let n = g.len();
285    let n_constr = constraints.len();
286
287    // Compute the unconstrained Cauchy point (steepest descent direction)
288    let p_unconstrained = compute_unconstrained_cauchy_point(g, b, delta);
289
290    // Check if unconstrained Cauchy point satisfies linearized constraints
291    let mut constraint_violated = false;
292    for i in 0..n_constr {
293        if constraints[i].kind == ConstraintKind::Inequality {
294            let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p_unconstrained[j]).sum::<f64>();
295            if c[i] + grad_c_dot_p < -ctol {
296                constraint_violated = true;
297                break;
298            }
299        }
300    }
301
302    // If unconstrained point is feasible, use it
303    if !constraint_violated {
304        // Compute predicted reduction
305        let g_dot_p = g.dot(&p_unconstrained);
306        let bp = b.dot(&p_unconstrained);
307        let p_dot_bp = p_unconstrained.dot(&bp);
308        let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
309
310        return (p_unconstrained, predicted_reduction);
311    }
312
313    // Otherwise, project onto the linearized feasible region
314    // This is a simplified approach - in practice, you would solve a CQP
315
316    // Start with the steepest descent direction
317    let mut p = Array1::zeros(n);
318    for i in 0..n {
319        p[i] = -g[i];
320    }
321
322    // Normalize to trust region radius
323    let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
324    if p_norm > 1e-10 {
325        p = &p * (delta / p_norm);
326    }
327
328    // Project onto each constraint
329    for _iter in 0..5 {
330        // Limited iterations for projection
331        let mut max_viol = 0.0;
332        let mut most_violated = 0;
333
334        // Find most violated constraint
335        for i in 0..n_constr {
336            if constraints[i].kind == ConstraintKind::Inequality {
337                let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p[j]).sum::<f64>();
338                let viol = -(c[i] + grad_c_dot_p);
339                if viol > max_viol {
340                    max_viol = viol;
341                    most_violated = i;
342                }
343            }
344        }
345
346        if max_viol < ctol {
347            break;
348        }
349
350        // Project p onto the constraint
351        let mut a_norm_sq = 0.0;
352        for j in 0..n {
353            a_norm_sq += a[[most_violated, j]] * a[[most_violated, j]];
354        }
355
356        if a_norm_sq > 1e-10 {
357            let grad_c_dot_p = (0..n).map(|j| a[[most_violated, j]] * p[j]).sum::<f64>();
358            let proj_dist = (c[most_violated] + grad_c_dot_p) / a_norm_sq;
359
360            // Project p
361            for j in 0..n {
362                p[j] += a[[most_violated, j]] * proj_dist;
363            }
364
365            // Rescale to trust region
366            let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
367            if p_norm > delta {
368                p = &p * (delta / p_norm);
369            }
370        }
371    }
372
373    // Compute predicted reduction
374    let g_dot_p = g.dot(&p);
375    let bp = b.dot(&p);
376    let p_dot_bp = p.dot(&bp);
377    let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
378
379    (p, predicted_reduction)
380}
381
382/// Compute the unconstrained Cauchy point (steepest descent to trust region boundary)
383fn compute_unconstrained_cauchy_point(g: &Array1<f64>, b: &Array2<f64>, delta: f64) -> Array1<f64> {
384    let n = g.len();
385
386    // Compute the steepest descent direction: -g
387    let mut p = Array1::zeros(n);
388    for i in 0..n {
389        p[i] = -g[i];
390    }
391
392    // Compute g^T B g and ||g||^2
393    let bg = b.dot(g);
394    let g_dot_bg = g.dot(&bg);
395    let g_norm_sq = g.dot(g);
396
397    // Check if the gradient is practically zero
398    if g_norm_sq < 1e-10 {
399        // If gradient is practically zero, don't move
400        return Array1::zeros(n);
401    }
402
403    // Compute tau (step to the boundary)
404    let tau = if g_dot_bg <= 0.0 {
405        // Negative curvature or zero curvature case
406        delta / g_norm_sq.sqrt()
407    } else {
408        // Positive curvature case
409        f64::min(delta / g_norm_sq.sqrt(), g_norm_sq / g_dot_bg)
410    };
411
412    // Scale the direction by tau
413    for i in 0..n {
414        p[i] *= tau;
415    }
416
417    p
418}