1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
use crate::indicators::hilberttransform::{State, C0, C1, C2, C3};
#[cfg(feature = "simd_assets")]
pub use crate::indicators::simd_indicators::by_asset::hilberttransform::indicator_by_assets;
#[cfg(feature = "simd_options")]
pub use crate::indicators::simd_indicators::by_option::hilberttransform::indicator_by_options;
use crate::indicators::simd_indicators::roofingfilter_simd::SimdState as RfSimdState;
use crate::ring_buffer::fixed_single_buffer::{FixedRingBuffer, FixedSimdRingBuffer};
use std::simd::{Simd, StdFloat};
/// Pre-broadcast SIMD constants for the seven-tap Hilbert kernel.
///
/// Stores [`C0`]–[`C3`] as `Simd::splat` values so that per-bar coefficient
/// multiplies are pure vector operations with no scalar-to-vector broadcast cost.
pub struct COEFS<const N: usize>;
impl<const N: usize> COEFS<N> {
pub const C0: Simd<f64, N> = Simd::splat(C0);
pub const C1: Simd<f64, N> = Simd::splat(C1);
pub const C2: Simd<f64, N> = Simd::splat(C2);
pub const C3: Simd<f64, N> = Simd::splat(C3);
pub const GAIN: Simd<f64, N> = Simd::splat(1.0);
}
/// Applies the 7-tap Hilbert kernel to a full ring buffer with an optional gain.
///
/// Returns `(center_tap, hilbert_sum × gain)` — i.e. `(I, Q)` semantics:
/// * `I` = `buf[3]` — the 3-bar-ago in-phase tap (not scaled by gain).
/// * `Q` = `(0.0962·buf[0] + 0.5769·buf[2] − 0.5769·buf[4] − 0.0962·buf[6]) × gain`.
///
/// Pass `gain = 1.0` for a plain (non-adaptive) transform; the compiler folds
/// the `× 1.0` away. Pass `0.075 · Period[1] + 0.54` for the Ehlers adaptive
/// variant used by the Homodyne Discriminator.
///
/// The buffer must be full (`buf.is_full() == true`) before calling.
#[inline(always)]
pub fn ht_kernel<const N: usize>(
buf: &FixedRingBuffer<Simd<f64, N>, 7>,
gain: Simd<f64, N>,
) -> (Simd<f64, N>, Simd<f64, N>) {
let q = ht_kernel_base(buf, gain);
(buf[3], q)
}
#[inline(always)]
pub fn ht_kernel_base<const N: usize>(
buf: &FixedRingBuffer<Simd<f64, N>, 7>,
gain: Simd<f64, N>,
) -> Simd<f64, N> {
let q = (COEFS::<N>::C0.mul_add(buf[0], COEFS::<N>::C1 * buf[2])
+ COEFS::<N>::C2.mul_add(buf[4], COEFS::<N>::C3 * buf[6]))
* gain;
q
}
/// Applies the 7-tap Hilbert kernel to two independent full SIMD ring buffers simultaneously.
///
/// SIMD equivalent of the scalar [`ht_kernel_pair`](crate::indicators::hilberttransform::ht_kernel_pair).
/// Interleaves the four FMAs across `buf_a` and `buf_b` so the CPU sees them as
/// independent operations across both the SIMD width and the two buffers.
///
/// Returns `(I_a, Q_a, I_b, Q_b)` where `I = buf[3]` and `Q = hilbert_sum × gain`.
/// Both buffers must be full before calling.
#[inline(always)]
pub fn ht_kernel_pair<const N: usize>(
buf_a: &FixedRingBuffer<Simd<f64, N>, 7>,
buf_b: &FixedRingBuffer<Simd<f64, N>, 7>,
gain: Simd<f64, N>,
) -> (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>, Simd<f64, N>) {
let qa_hi = COEFS::<N>::C0.mul_add(buf_a[0], COEFS::<N>::C1 * buf_a[2]);
let qb_hi = COEFS::<N>::C0.mul_add(buf_b[0], COEFS::<N>::C1 * buf_b[2]);
let qa_lo = COEFS::<N>::C2.mul_add(buf_a[4], COEFS::<N>::C3 * buf_a[6]);
let qb_lo = COEFS::<N>::C2.mul_add(buf_b[4], COEFS::<N>::C3 * buf_b[6]);
(
buf_a[3],
(qa_hi + qa_lo) * gain,
buf_b[3],
(qb_hi + qb_lo) * gain,
)
}
/// SIMD-parallel state for computing the Hilbert Transform across `N` assets simultaneously.
///
/// Holds two pieces of state:
/// * `buffer` — seven-slot ring buffer of SIMD vectors fed by the roofing filter output;
/// used to compute the in-phase (`I`) and quadrature (`Q`) components.
/// * `rf_state` — cascaded roofing filter (HighPass → SuperSmoother) that band-limits
/// the input before the Hilbert kernel.
pub struct SimdState<const N: usize> {
buffer: FixedRingBuffer<Simd<f64, N>, 7>,
rf_state: RfSimdState<N>,
}
impl<const N: usize> SimdState<N> {
/// Gathers `N` scalar [`State`] references into a single [`SimdState`],
/// packing each asset's ring buffer and roofing-filter state into their
/// respective SIMD lanes.
pub fn new(states: &mut [&mut State]) -> Self {
let mut buffer_refs = Vec::with_capacity(N);
let mut rf_state = Vec::with_capacity(N);
for state in states.iter_mut() {
buffer_refs.push(&state.buffer);
rf_state.push(&mut state.rf_state);
}
let rf_state = RfSimdState::new(&mut rf_state);
let buffer = FixedSimdRingBuffer::from_f64_buffers(&buffer_refs);
Self { rf_state, buffer }
}
/// Writes the SIMD state back into `N` existing mutable scalar [`State`] references in place,
/// scattering each lane's ring buffer and roofing-filter state back to its corresponding asset.
pub fn write_states(&self, states: &mut [&mut State]) {
let mut rf_refs = Vec::with_capacity(N);
let buffers = self.buffer.to_f64_buffers();
// Collect references and values
for (buffer, state) in buffers.into_iter().zip(states.iter_mut()) {
state.buffer = buffer;
rf_refs.push(&mut state.rf_state);
}
self.rf_state.write_states(&mut rf_refs);
}
/// Applies the seven-tap Hilbert kernel across all `N` assets simultaneously.
///
/// Pushes `real` (the roofing-filter output) into the ring buffer, then computes:
/// * **Q** (quadrature) — two independent FMAs on the even-offset taps
/// (`buf[0]`, `buf[2]`, `buf[4]`, `buf[6]`) with coefficients `C0`–`C3`.
/// * **I** (in-phase) — the center tap `buf[3]` (a 3-bar-ago sample).
///
/// # Returns
///
/// `(in_phase, quadrature)` as a pair of `Simd<f64, N>` vectors, one value per asset lane.
#[inline(always)]
pub fn calc_transform_simd(&mut self, real: Simd<f64, N>) -> (Simd<f64, N>, Simd<f64, N>) {
self.buffer.push(real);
ht_kernel(&self.buffer, COEFS::GAIN)
}
/// Unsafe variant of [`calc_transform`](Self::calc_transform) that skips the
/// ring-buffer fullness check on push.
///
/// # Safety
///
/// The caller must ensure the buffer is full (`buffer.len() == 7`) before calling.
#[inline(always)]
pub unsafe fn calc_transform_simd_unchecked(
&mut self,
real: Simd<f64, N>,
) -> (Simd<f64, N>, Simd<f64, N>) {
self.buffer.push_unchecked(real);
ht_kernel(&self.buffer, COEFS::<N>::GAIN)
}
/// Advances the full Hilbert Transform pipeline by one bar across all `N` assets simultaneously.
///
/// Applies the roofing filter (HighPass → SuperSmoother) to `real`, then passes
/// the band-limited result through the Hilbert kernel, matching the scalar `State::calc` logic.
///
/// # Arguments
///
/// * `real` - SIMD vector of current input prices, one per asset lane.
/// * `multipliers` - Pre-broadcast roofing-filter coefficients `((ss_a1, ss_a2, ss_b0), (hp_a1, hp_a2))`.
///
/// # Returns
///
/// `(in_phase, quadrature, roofing, highpass)` — four `Simd<f64, N>` vectors, one value per asset lane.
#[inline(always)]
pub fn calc_simd(
&mut self,
real: Simd<f64, N>,
multipliers: (
(Simd<f64, N>, Simd<f64, N>, Simd<f64, N>),
(Simd<f64, N>, Simd<f64, N>),
),
) -> (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>, Simd<f64, N>) {
let (rf, hp) = self.rf_state.calc_simd(real, multipliers);
let (i, q) = self.calc_transform_simd(rf);
(i, q, rf, hp)
}
/// Unsafe variant of [`calc`](Self::calc) that skips the ring-buffer fullness check.
///
/// # Safety
///
/// The caller must ensure the buffer is full (`buffer.len() == 7`) before calling.
#[inline(always)]
pub unsafe fn calc_simd_unchecked(
&mut self,
real: Simd<f64, N>,
multipliers: (
(Simd<f64, N>, Simd<f64, N>, Simd<f64, N>),
(Simd<f64, N>, Simd<f64, N>),
),
) -> (Simd<f64, N>, Simd<f64, N>, Simd<f64, N>, Simd<f64, N>) {
let (rf, hp) = self.rf_state.calc_simd(real, multipliers);
let (i, q) = self.calc_transform_simd_unchecked(rf);
(i, q, rf, hp)
}
}