clarabel/solver/core/cones/
powcone.rs

1use super::*;
2use crate::algebra::*;
3
4// -------------------------------------
5// Power Cone
6// -------------------------------------
7
8pub struct PowerCone<T> {
9    // power defining the cone
10    α: T,
11    // Hessian of the dual barrier at z
12    H_dual: DenseMatrixSym3<T>,
13
14    // scaling matrix, i.e. μH(z)
15    Hs: DenseMatrixSym3<T>,
16
17    // gradient of the dual barrier at z
18    grad: [T; 3],
19
20    // holds copy of z at scaling point
21    z: [T; 3],
22}
23
24impl<T> PowerCone<T>
25where
26    T: FloatT,
27{
28    pub fn new(α: T) -> Self {
29        Self {
30            α,
31            H_dual: DenseMatrixSym3::zeros(),
32            Hs: DenseMatrixSym3::zeros(),
33            grad: [T::zero(); 3],
34            z: [T::zero(); 3],
35        }
36    }
37}
38
39impl<T> Cone<T> for PowerCone<T>
40where
41    T: FloatT,
42{
43    fn degree(&self) -> usize {
44        3
45    }
46
47    fn numel(&self) -> usize {
48        3
49    }
50
51    fn is_symmetric(&self) -> bool {
52        false
53    }
54
55    fn is_sparse_expandable(&self) -> bool {
56        false
57    }
58
59    fn allows_primal_dual_scaling(&self) -> bool {
60        true
61    }
62
63    fn rectify_equilibration(&self, δ: &mut [T], e: &[T]) -> bool {
64        δ.copy_from(e).recip().scale(e.mean());
65        true // scalar equilibration
66    }
67
68    fn margins(&mut self, _z: &mut [T], _pd: PrimalOrDualCone) -> (T, T) {
69        // We should never end up shifting to this cone, since
70        // asymmetric problems should always use unit_initialization
71        unreachable!();
72    }
73    fn scaled_unit_shift(&self, _z: &mut [T], _α: T, _pd: PrimalOrDualCone) {
74        // We should never end up shifting to this cone, since
75        // asymmetric problems should always use unit_initialization
76        unreachable!();
77    }
78
79    fn unit_initialization(&self, z: &mut [T], s: &mut [T]) {
80        let α = self.α;
81
82        s[0] = (T::one() + α).sqrt();
83        s[1] = (T::one() + (T::one() - α)).sqrt();
84        s[2] = T::zero();
85
86        (z[0], z[1], z[2]) = (s[0], s[1], s[2]);
87    }
88
89    fn set_identity_scaling(&mut self) {
90        // We should never use identity scaling because
91        // we never want to allow symmetric initialization
92        unreachable!();
93    }
94
95    fn update_scaling(
96        &mut self,
97        s: &[T],
98        z: &[T],
99        μ: T,
100        scaling_strategy: ScalingStrategy,
101    ) -> bool {
102        // update both gradient and Hessian for function f*(z) at the point z
103        self.update_dual_grad_H(z);
104
105        // update the scaling matrix Hs
106        self.update_Hs(s, z, μ, scaling_strategy);
107
108        // K.z .= z
109        self.z.copy_from(z);
110
111        true
112    }
113
114    fn Hs_is_diagonal(&self) -> bool {
115        false
116    }
117
118    fn get_Hs(&self, Hsblock: &mut [T]) {
119        // Hs data is already in packed triu form, so just copy
120        Hsblock.copy_from(&self.Hs.data);
121    }
122
123    fn mul_Hs(&mut self, y: &mut [T], x: &[T], _work: &mut [T]) {
124        self.Hs.mul(y, x);
125    }
126
127    fn affine_ds(&self, ds: &mut [T], s: &[T]) {
128        ds.copy_from(s);
129    }
130
131    fn combined_ds_shift(&mut self, shift: &mut [T], step_z: &mut [T], step_s: &mut [T], σμ: T) {
132        //3rd order correction requires input variables.z
133
134        let mut η = [T::zero(); 3];
135        self.higher_correction(&mut η, step_s, step_z);
136
137        for i in 0..3 {
138            shift[i] = self.grad[i] * σμ - η[i];
139        }
140    }
141
142    fn Δs_from_Δz_offset(&mut self, out: &mut [T], ds: &[T], _work: &mut [T], _z: &[T]) {
143        out.copy_from(ds);
144    }
145
146    fn step_length(
147        &mut self,
148        dz: &[T],
149        ds: &[T],
150        z: &[T],
151        s: &[T],
152        settings: &CoreSettings<T>,
153        αmax: T,
154    ) -> (T, T) {
155        let step = settings.linesearch_backtrack_step;
156        let αmin = settings.min_terminate_step_length;
157        let mut work = [T::zero(); 3];
158
159        let _is_prim_feasible_fcn = |s: &[T]| -> bool { self.is_primal_feasible(s) };
160        let _is_dual_feasible_fcn = |s: &[T]| -> bool { self.is_dual_feasible(s) };
161
162        let αz = backtrack_search(dz, z, αmax, αmin, step, _is_dual_feasible_fcn, &mut work);
163        let αs = backtrack_search(ds, s, αmax, αmin, step, _is_prim_feasible_fcn, &mut work);
164
165        (αz, αs)
166    }
167
168    fn compute_barrier(&mut self, z: &[T], s: &[T], dz: &[T], ds: &[T], α: T) -> T {
169        let mut barrier = T::zero();
170
171        let cur_z = [z[0] + α * dz[0], z[1] + α * dz[1], z[2] + α * dz[2]];
172        let cur_s = [s[0] + α * ds[0], s[1] + α * ds[1], s[2] + α * ds[2]];
173
174        barrier += self.barrier_dual(&cur_z);
175        barrier += self.barrier_primal(&cur_s);
176
177        barrier
178    }
179}
180
181//-------------------------------------
182// primal-dual scaling
183//-------------------------------------
184
185impl<T> NonsymmetricCone<T> for PowerCone<T>
186where
187    T: FloatT,
188{
189    // Returns true if s is primal feasible
190    fn is_primal_feasible(&self, s: &[T]) -> bool
191    where
192        T: FloatT,
193    {
194        let α = self.α;
195        let two: T = (2f64).as_T();
196        if s[0] > T::zero() && s[1] > T::zero() {
197            let res = T::exp(two * α * s[0].logsafe() + two * (T::one() - α) * s[1].logsafe())
198                - s[2] * s[2];
199            if res > T::zero() {
200                return true;
201            }
202        }
203        false
204    }
205
206    // Returns true if z is dual feasible
207    fn is_dual_feasible(&self, z: &[T]) -> bool
208    where
209        T: FloatT,
210    {
211        let α = self.α;
212        let two: T = (2.).as_T();
213
214        if z[0] > T::zero() && z[1] > T::zero() {
215            let res = T::exp(
216                (α * two) * (z[0] / α).logsafe()
217                    + (T::one() - α) * (z[1] / (T::one() - α)).logsafe() * two,
218            ) - z[2] * z[2];
219            if res > T::zero() {
220                return true;
221            }
222        }
223        false
224    }
225
226    fn barrier_primal(&mut self, s: &[T]) -> T
227    where
228        T: FloatT,
229    {
230        // Primal barrier: f(s) = ⟨s,g(s)⟩ - f*(-g(s))
231        // NB: ⟨s,g(s)⟩ = -3 = - ν
232
233        let α = self.α;
234        let two: T = (2.).as_T();
235        let three: T = (3.).as_T();
236
237        let g = self.gradient_primal(s);
238
239        let mut out = T::zero();
240
241        out += ((-g[0] / α).powf(two * α) * (-g[1] / (T::one() - α)).powf(two - α * two)
242            - g[2] * g[2])
243            .logsafe();
244        out += (T::one() - α) * (-g[0]).logsafe();
245        out += α * (-g[1]).logsafe() - three;
246        out
247    }
248
249    fn barrier_dual(&mut self, z: &[T]) -> T
250    where
251        T: FloatT,
252    {
253        // Dual barrier:
254        // f*(z) = -log((z1/α)^{2α} * (z2/(1-α))^{2(1-α)} - z3*z3) - (1-α)*log(z1) - α*log(z2):
255        let α = self.α;
256        let two: T = (2.).as_T();
257        let arg1 =
258            (z[0] / α).powf(two * α) * (z[1] / (T::one() - α)).powf(two - two * α) - z[2] * z[2];
259
260        -arg1.logsafe() - (T::one() - α) * z[0].logsafe() - α * z[1].logsafe()
261    }
262
263    fn higher_correction(&mut self, η: &mut [T], ds: &[T], v: &[T])
264    where
265        T: FloatT,
266    {
267        // u for H^{-1}*Δs
268        let H = &self.H_dual;
269        let mut u = [T::zero(); 3];
270        let z = &self.z;
271
272        //Fine to use symmetric here because the upper
273        //triangle is ignored anyway
274        let mut cholH = DenseMatrixSym3::zeros();
275
276        // solve H*u = ds
277        let is_success = cholH.cholesky_3x3_explicit_factor(H).is_ok();
278        if is_success {
279            cholH.cholesky_3x3_explicit_solve(&mut u[..], ds);
280        } else {
281            η.set(T::zero());
282            return;
283        }
284
285        let α = self.α;
286        let two: T = (2.).as_T();
287        let four: T = (4.).as_T();
288
289        let phi = (z[0] / α).powf(two * α) * (z[1] / (T::one() - α)).powf(two - two * α);
290        let ψ = phi - z[2] * z[2];
291
292        // Reuse cholH memory for further computation
293        let Hψ = &mut cholH;
294
295        η[0] = two * α * phi / z[0];
296        η[1] = two * (T::one() - α) * phi / z[1];
297        η[2] = -two * z[2];
298
299        // we only need to assign the upper triangle
300        // for our 3x3 symmetric type
301        Hψ[(0, 1)] = four * α * (T::one() - α) * phi / (z[0] * z[1]);
302        Hψ[(0, 0)] = two * α * (two * α - T::one()) * phi / (z[0] * z[0]);
303        Hψ[(0, 2)] = T::zero();
304        Hψ[(1, 1)] = two * (T::one() - α) * (T::one() - two * α) * phi / (z[1] * z[1]);
305        Hψ[(1, 2)] = T::zero();
306        Hψ[(2, 2)] = -two;
307
308        let dotψu = u.dot(η);
309        let dotψv = v.dot(η);
310
311        let mut Hψv = [T::zero(); 3];
312        Hψ.mul(&mut Hψv, v);
313
314        let coef = (u.dot(&Hψv) * ψ - two * dotψu * dotψv) / (ψ * ψ * ψ);
315        let coef2 = four
316            * α
317            * (two * α - T::one())
318            * (T::one() - α)
319            * phi
320            * (u[0] / z[0] - u[1] / z[1])
321            * (v[0] / z[0] - v[1] / z[1])
322            / ψ;
323        let inv_ψ2 = (ψ * ψ).recip();
324
325        η[0] = coef * η[0] - two * (T::one() - α) * u[0] * v[0] / (z[0] * z[0] * z[0])
326            + coef2 / z[0]
327            + Hψv[0] * dotψu * inv_ψ2;
328
329        η[1] = coef * η[1] - two * α * u[1] * v[1] / (z[1] * z[1] * z[1]) - coef2 / z[1]
330            + Hψv[1] * dotψu * inv_ψ2;
331
332        η[2] = coef * η[2] + Hψv[2] * dotψu * inv_ψ2;
333
334        // reuse vector Hψv
335        let Hψu = &mut Hψv;
336        Hψ.mul(Hψu, &u);
337
338        // @. η <= (η + Hψu*dotψv*inv_ψ2)/2
339        η[..].axpby(dotψv * inv_ψ2, Hψu, T::one());
340        η[..].scale((0.5).as_T());
341    }
342
343    // 3rd-order correction at the point z.  Output is η.
344    //
345    // 3rd order correction:
346    // η = -0.5*[(dot(u,Hψ,v)*ψ - 2*dotψu*dotψv)/(ψ*ψ*ψ)*gψ +
347    //            dotψu/(ψ*ψ)*Hψv + dotψv/(ψ*ψ)*Hψu -
348    //            dotψuv/ψ + dothuv]
349    // where:
350    // Hψ = [  2*α*(2*α-1)*ϕ/(z1*z1)     4*α*(1-α)*ϕ/(z1*z2)       0;
351    //         4*α*(1-α)*ϕ/(z1*z2)     2*(1-α)*(1-2*α)*ϕ/(z2*z2)   0;
352    //         0                       0                          -2;]
353
354    fn update_dual_grad_H(&mut self, z: &[T]) {
355        let H = &mut self.H_dual;
356        let α = self.α;
357        let two: T = (2.).as_T();
358        let four: T = (4.).as_T();
359
360        let phi = (z[0] / α).powf(two * α) * (z[1] / (T::one() - α)).powf(two - two * α);
361        let ψ = phi - z[2] * z[2];
362
363        // use K.grad as a temporary workspace
364        let gψ = &mut self.grad;
365        gψ[0] = two * α * phi / (z[0] * ψ);
366        gψ[1] = two * (T::one() - α) * phi / (z[1] * ψ);
367        gψ[2] = -two * z[2] / ψ;
368
369        // compute_Hessian(K,z,H).   Type is symmetric, so
370        // only need to assign upper triangle.
371        H[(0, 0)] = gψ[0] * gψ[0] - two * α * (two * α - T::one()) * phi / (z[0] * z[0] * ψ)
372            + (T::one() - α) / (z[0] * z[0]);
373        H[(0, 1)] = gψ[0] * gψ[1] - four * α * (T::one() - α) * phi / (z[0] * z[1] * ψ);
374        H[(1, 1)] = gψ[1] * gψ[1]
375            - two * (T::one() - α) * (T::one() - two * α) * phi / (z[1] * z[1] * ψ)
376            + α / (z[1] * z[1]);
377        H[(0, 2)] = gψ[0] * gψ[2];
378        H[(1, 2)] = gψ[1] * gψ[2];
379        H[(2, 2)] = gψ[2] * gψ[2] + two / ψ;
380
381        // compute the gradient at z
382        let grad = &mut self.grad;
383        grad[0] = -two * α * phi / (z[0] * ψ) - (T::one() - α) / z[0];
384        grad[1] = -two * (T::one() - α) * phi / (z[1] * ψ) - α / z[1];
385        grad[2] = two * z[2] / ψ;
386    }
387}
388
389impl<T> Nonsymmetric3DCone<T> for PowerCone<T>
390where
391    T: FloatT,
392{
393    // Compute the primal gradient of f(s) at s
394    fn gradient_primal(&self, s: &[T]) -> [T; 3]
395    where
396        T: FloatT,
397    {
398        let α = self.α;
399        let mut g = [T::zero(); 3];
400        let two: T = (2.).as_T();
401
402        // unscaled ϕ
403        let phi = (s[0]).powf(two * α) * (s[1]).powf(two - α * two);
404
405        // obtain last element of g from the Newton-Raphson method
406        let abs_s = s[2].abs();
407        if abs_s > T::epsilon() {
408            g[2] = _newton_raphson_powcone(abs_s, phi, α);
409            if s[2] < T::zero() {
410                g[2] = -g[2];
411            }
412            g[0] = -(α * g[2] * s[2] + T::one() + α) / s[0];
413            g[1] = -((T::one() - α) * g[2] * s[2] + two - α) / s[1];
414        } else {
415            g[2] = T::zero();
416            g[0] = -(T::one() + α) / s[0];
417            g[1] = -(two - α) / s[1];
418        }
419        g
420    }
421
422    //getters
423    fn split_borrow_mut(
424        &mut self,
425    ) -> (
426        &mut DenseMatrixSym3<T>,
427        &mut DenseMatrixSym3<T>,
428        &mut [T; 3],
429        &mut [T; 3],
430    ) {
431        (&mut self.H_dual, &mut self.Hs, &mut self.grad, &mut self.z)
432    }
433}
434
435// ----------------------------------------------
436//  internal operations for power cones
437//
438// Primal Power cone: s1^{α}s2^{1-α} ≥ s3, s1,s2 ≥ 0
439// Dual Power cone: (z1/α)^{α} * (z2/(1-α))^{1-α} ≥ z3, z1,z2 ≥ 0
440
441// Newton-Raphson method:
442// solve a one-dimensional equation f(x) = 0
443// x(k+1) = x(k) - f(x(k))/f'(x(k))
444// When we initialize x0 such that 0 < x0 < x*,
445// the Newton-Raphson method converges quadratically
446
447fn _newton_raphson_powcone<T>(s3: T, phi: T, α: T) -> T
448where
449    T: FloatT,
450{
451    let two: T = (2.).as_T();
452    let three: T = (3.).as_T();
453
454    // init point x0: since our dual barrier has an additional
455    // shift -2α*log(α) - 2(1-α)*log(1-α) > 0 in f(x),
456    // the previous selection is still feasible, i.e. f(x0) > 0
457
458    let x0 =
459        -s3.recip() + (s3 * two + T::sqrt((phi * phi) / (s3 * s3) + phi * three)) / (phi - s3 * s3);
460
461    // additional shift due to the choice of dual barrier
462    let t0 = -two * α * (α.logsafe()) - two * (T::one() - α) * (T::one() - α).logsafe();
463
464    // function for f(x) = 0
465    let f0 = {
466        |x: T| -> T {
467            let two = (2.).as_T();
468            let t1 = x * x;
469            let t2 = (x * two) / s3;
470            two * α * (two * α * t1 + (T::one() + α) * t2).logsafe()
471                + two * (T::one() - α) * (two * (T::one() - α) * t1 + (two - α) * t2).logsafe()
472                - phi.logsafe()
473                - (t1 + t2).logsafe()
474                - two * t2.logsafe()
475                + t0
476        }
477    };
478
479    // first derivative
480    let f1 = {
481        |x: T| -> T {
482            let two = (2.).as_T();
483            let t1 = x * x;
484            let t2 = (two * x) / s3;
485            (α * α * two) / (α * x + (T::one() + α) / s3)
486                + ((T::one() - α) * two) * (T::one() - α) / ((T::one() - α) * x + (two - α) / s3)
487                - ((x + s3.recip()) * two) / (t1 + t2)
488        }
489    };
490    newton_raphson_onesided(x0, f0, f1)
491}