1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
//! SIMD-parallel state structs for the Ehlers TrendMode.
//!
//! Two sub-modules are provided for the two SIMD parallelism modes:
//!
//! - [`assets`] — `N` independent assets with the same α. Each lane has its own
//! HD pipeline (`HdSimdState<N>`), its own CC pipeline, and its own price input.
//! - [`options`] — 1 asset with `N` different α values. HD is a single shared
//! scalar state (all lanes see the same price); CC runs in SIMD with per-lane
//! multipliers.
//!
//! ## Why two separate `SimdState` structs?
//!
//! The only structural difference is the `hd` field:
//! - `assets` needs `HdSimdState<N>` (N independent HD pipelines).
//! - `options` needs a scalar `homodynediscriminator::State` (one shared HD).
//!
//! ## Shared computation
//!
//! Everything after the CC step — peak envelope update and TrendMode classification
//! — is identical in both modes and is factored into the module-level
//! [`trendmode_pipeline`] function.
//!
//! `pk` is `Simd<f64, N>` so the peak update and mode detection are fully
//! vectorised with no per-lane scalar loop.
#[cfg(feature = "simd_assets")]
pub use crate::indicators::simd_indicators::by_asset::trendmode::indicator_by_assets;
#[cfg(feature = "simd_options")]
pub use crate::indicators::simd_indicators::by_option::trendmode::indicator_by_options;
use std::simd::{cmp::SimdPartialOrd, num::SimdFloat, Select, Simd};
/// Shared post-CC TrendMode pipeline used by both `assets` and `options` modes.
///
/// Updates the decaying peak envelope and returns the TrendMode classification
/// vector — all in SIMD, no per-lane loop.
///
/// Steps:
/// 1. `pk = max(pk × 0.991, |cycle|)`
/// 2. `trendmode = 1.0` if `pk > 0` and `|cycle| < 0.2 × pk`, else `0.0`
#[inline(always)]
fn trendmode_pipeline<const N: usize>(cycle: Simd<f64, N>, pk: &mut Simd<f64, N>) -> Simd<f64, N> {
*pk = (*pk * Simd::splat(0.991)).simd_max(cycle.abs());
let pk_positive = pk.simd_gt(Simd::splat(0.0));
let small_cycle = cycle.abs().simd_lt(Simd::splat(0.2) * *pk);
(pk_positive & small_cycle).select(Simd::splat(1.0_f64), Simd::splat(0.0_f64))
}
// ─────────────────────────────────────────────────────────────────────────────
// assets — N assets, same alpha
// ─────────────────────────────────────────────────────────────────────────────
/// SIMD state for `N` assets with the same α (used by `indicator_by_assets`).
pub mod assets {
use super::trendmode_pipeline;
use crate::indicators::simd_indicators::cybercycle_simd::SimdState as CcSimdState;
use crate::indicators::simd_indicators::homodynediscriminator_simd::SimdState as HdSimdState;
use crate::indicators::trendmode;
use std::simd::{num::SimdFloat, Simd};
/// SIMD state for N assets with a shared α.
///
/// `hd` is `HdSimdState<N>` because each asset has an independent price
/// history requiring its own HD pipeline. `pk` is `Simd<f64, N>` — the peak
/// envelope update and mode detection are fully vectorised.
pub struct SimdState<const N: usize> {
/// N independent HD pipelines — one per asset.
pub hd: HdSimdState<N>,
/// N independent CC pipelines — one per asset.
pub cc: CcSimdState<N>,
/// Per-asset decaying peak amplitude: `max(pk[1] × 0.991, |Cycle|)`.
pub pk: Simd<f64, N>,
}
impl<const N: usize> SimdState<N> {
/// Gathers `N` scalar [`trendmode::State`] references into a `SimdState`.
pub fn new(states: &mut [&mut trendmode::State]) -> Self {
let pk = Simd::from_array(std::array::from_fn(|j| states[j].pk));
let hd = {
let refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.hd).collect();
HdSimdState::new(&refs)
};
let cc = {
let mut refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.cc).collect();
CcSimdState::new(&mut refs)
};
Self { hd, cc, pk }
}
/// Scatters the SIMD state back into `N` scalar [`trendmode::State`] references.
pub fn write_states(&self, states: &mut [&mut trendmode::State]) {
{
let mut refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.hd).collect();
self.hd.write_states(&mut refs);
}
{
let mut refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.cc).collect();
self.cc.write_states(&mut refs);
}
let pk = self.pk.to_array();
for j in 0..N {
states[j].pk = pk[j];
}
}
/// One bar of TrendMode for N assets simultaneously.
///
/// HD and CC run in SIMD; post-CC peak + classification via
/// [`trendmode_pipeline`] — no scalar loop.
///
/// Returns `Simd<f64, N>` of `1.0` (Trend) / `0.0` (Cycle) per lane.
///
/// # Safety
///
/// All HD and CC ring buffers must be full. Guaranteed after
/// [`trendmode::State::init_state`] for every lane.
#[inline(always)]
pub unsafe fn calc_simd_unchecked(
&mut self,
real: Simd<f64, N>,
multipliers: (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>),
) -> Simd<f64, N> {
self.hd.calc_simd_unchecked(real);
let cycle = self.cc.calc_simd_unchecked(real, multipliers);
trendmode_pipeline(cycle, &mut self.pk)
}
/// One bar of TrendMode for N assets using **adaptive alpha per lane**.
///
/// HD runs in SIMD — each asset lane has its own `smooth_period`. The per-lane
/// adaptive alpha is derived via `2 / (smooth_period.max(3) + 1)`, then
/// per-lane multipliers are computed and fed into CC and `trendmode_pipeline`.
///
/// # Safety
/// All HD and CC ring buffers must be full. Guaranteed after
/// [`trendmode::State::init_state`] for every lane.
#[inline(always)]
pub unsafe fn calc_simd_unchecked_adaptive(&mut self, real: Simd<f64, N>) -> Simd<f64, N> {
self.hd.calc_simd_unchecked(real);
let effective_period = self.hd.smooth_period.simd_max(Simd::splat(3.0_f64));
let alpha = Simd::splat(2.0_f64) / (effective_period + Simd::splat(1.0_f64));
let one = Simd::splat(1.0_f64);
let c = one - Simd::splat(0.5_f64) * alpha;
let b = one - alpha;
let mults = (c * c, Simd::splat(2.0_f64) * b, b * b);
let cycle = self.cc.calc_simd_unchecked(real, mults);
trendmode_pipeline(cycle, &mut self.pk)
}
}
}
// ─────────────────────────────────────────────────────────────────────────────
// options — 1 asset, N alpha values
// ─────────────────────────────────────────────────────────────────────────────
/// SIMD state for 1 asset with `N` different α values (used by `indicator_by_options`).
pub mod options {
use super::trendmode_pipeline;
use crate::indicators::homodynediscriminator;
use crate::indicators::simd_indicators::cybercycle_simd::SimdState as CcSimdState;
use crate::indicators::trendmode;
use std::simd::Simd;
/// SIMD state for 1 asset with N different α values.
///
/// `hd` is a single scalar state because all N option lanes process the same
/// price series — they share one HD output. `pk` is `Simd<f64, N>` as in the
/// assets case.
pub struct SimdState<const N: usize> {
/// Single shared HD state — same price input for all N lanes.
pub hd: homodynediscriminator::State,
/// N CC pipelines with per-lane α multipliers.
pub cc: CcSimdState<N>,
/// Per-lane decaying peak amplitude.
pub pk: Simd<f64, N>,
}
impl<const N: usize> SimdState<N> {
/// Gathers `N` scalar [`trendmode::State`] references into a `SimdState`.
///
/// All N lanes have identical HD states (same price), so `states[0].hd`
/// is cloned as the shared scalar HD.
pub fn new(states: &mut [&mut trendmode::State]) -> Self {
let hd = states[0].hd.clone();
let pk = Simd::from_array(std::array::from_fn(|j| states[j].pk));
let cc = {
let mut refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.cc).collect();
CcSimdState::new(&mut refs)
};
Self { hd, cc, pk }
}
/// Scatters the SIMD state back into `N` scalar [`trendmode::State`] references.
pub fn write_states(&self, states: &mut [&mut trendmode::State]) {
{
let mut refs: Vec<&mut _> = states.iter_mut().map(|s| &mut s.cc).collect();
self.cc.write_states(&mut refs);
}
let pk = self.pk.to_array();
for j in 0..N {
states[j].hd = self.hd.clone();
states[j].pk = pk[j];
}
}
/// One bar of TrendMode for N α-option lanes simultaneously.
///
/// HD advances once (shared price, via `real[0]`). CC runs in SIMD with
/// per-lane multipliers. Post-CC via [`trendmode_pipeline`].
///
/// Returns `Simd<f64, N>` of `1.0` (Trend) / `0.0` (Cycle) per lane.
///
/// # Safety
///
/// All HD and CC ring buffers must be full. Guaranteed after
/// [`trendmode::State::init_state`] for every lane.
#[inline(always)]
pub unsafe fn calc_simd_unchecked(
&mut self,
real: Simd<f64, N>,
multipliers: (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>),
) -> Simd<f64, N> {
// All lanes share the same price — use lane 0 for the scalar HD.
self.hd.calc_unchecked(real[0]);
let cycle = self.cc.calc_simd_unchecked(real, multipliers);
trendmode_pipeline(cycle, &mut self.pk)
}
/// Advances the shared scalar HD one bar and returns the updated `smooth_period`.
///
/// Call this before [`advance_cc`] when computing per-bar adaptive multipliers.
///
/// # Safety
/// All HD ring buffers must be full on entry.
#[inline(always)]
pub unsafe fn advance_hd(&mut self, price: f64) -> f64 {
self.hd.calc_unchecked(price);
self.hd.smooth_period
}
/// Advances CC and the peak pipeline for one bar with per-lane `multipliers`.
///
/// Complements [`advance_hd`]: the caller computes the per-lane SIMD multipliers
/// (e.g. via adaptive mask+select) and passes them here.
///
/// # Safety
/// CC ring buffers must be full on entry.
#[inline(always)]
pub unsafe fn advance_cc(
&mut self,
real: Simd<f64, N>,
multipliers: (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>),
) -> Simd<f64, N> {
let cycle = self.cc.calc_simd_unchecked(real, multipliers);
trendmode_pipeline(cycle, &mut self.pk)
}
}
}