shape_runtime/
simd_forward_fill.rs

1//! SIMD-optimized forward-fill operations for table alignment
2//!
3//! This module provides high-performance forward-fill operations using
4//! SIMD instructions when available.
5
6#[cfg(target_arch = "x86_64")]
7use std::arch::x86_64::*;
8
9/// Forward-fill data using SIMD instructions for x86_64
10///
11/// This function performs forward-fill on a slice of f64 values,
12/// propagating the last non-NaN value forward to fill NaN gaps.
13#[cfg(target_arch = "x86_64")]
14#[target_feature(enable = "avx2")]
15#[allow(unused_assignments)]
16unsafe fn forward_fill_avx2(data: &mut [f64]) {
17    if data.is_empty() {
18        return;
19    }
20
21    let mut last_valid = f64::NAN;
22    let mut i = 0;
23
24    // Find first non-NaN value
25    while i < data.len() && data[i].is_nan() {
26        i += 1;
27    }
28
29    if i < data.len() {
30        last_valid = data[i];
31    }
32
33    // Process 4 values at a time using AVX2
34    while i + 4 <= data.len() {
35        let chunk = unsafe { _mm256_loadu_pd(&data[i] as *const f64) };
36
37        // Create mask for NaN values
38        let nan_mask = _mm256_cmp_pd(chunk, chunk, _CMP_UNORD_Q);
39
40        // Create vector with last valid value
41        let last_valid_vec = _mm256_set1_pd(last_valid);
42
43        // Blend: use original value if not NaN, otherwise use last_valid
44        let result = _mm256_blendv_pd(chunk, last_valid_vec, nan_mask);
45
46        // Store result
47        unsafe { _mm256_storeu_pd(&mut data[i] as *mut f64, result) };
48
49        // Update last_valid with the last non-NaN value in this chunk
50        for j in (i..i + 4).rev() {
51            if !data[j].is_nan() {
52                last_valid = data[j];
53                break;
54            }
55        }
56
57        i += 4;
58    }
59
60    // Handle remaining elements
61    while i < data.len() {
62        if data[i].is_nan() {
63            data[i] = last_valid;
64        } else {
65            last_valid = data[i];
66        }
67        i += 1;
68    }
69}
70
71/// Forward-fill data using SIMD instructions for x86_64 with SSE2
72#[cfg(target_arch = "x86_64")]
73#[target_feature(enable = "sse2")]
74#[allow(unused_assignments)]
75unsafe fn forward_fill_sse2(data: &mut [f64]) {
76    if data.is_empty() {
77        return;
78    }
79
80    let mut last_valid = f64::NAN;
81    let mut i = 0;
82
83    // Find first non-NaN value
84    while i < data.len() && data[i].is_nan() {
85        i += 1;
86    }
87
88    if i < data.len() {
89        last_valid = data[i];
90    }
91
92    // Process 2 values at a time using SSE2
93    while i + 2 <= data.len() {
94        let chunk = unsafe { _mm_loadu_pd(&data[i] as *const f64) };
95
96        // Check for NaN values
97        let nan_mask = _mm_cmpunord_pd(chunk, chunk);
98
99        // Create vector with last valid value
100        let last_valid_vec = _mm_set1_pd(last_valid);
101
102        // Blend: use original value if not NaN, otherwise use last_valid
103        let result = _mm_or_pd(
104            _mm_and_pd(nan_mask, last_valid_vec),
105            _mm_andnot_pd(nan_mask, chunk),
106        );
107
108        // Store result
109        unsafe { _mm_storeu_pd(&mut data[i] as *mut f64, result) };
110
111        // Update last_valid with the last non-NaN value in this chunk
112        for j in (i..i + 2).rev() {
113            if !data[j].is_nan() {
114                last_valid = data[j];
115                break;
116            }
117        }
118
119        i += 2;
120    }
121
122    // Handle remaining element
123    if i < data.len() {
124        if data[i].is_nan() {
125            data[i] = last_valid;
126        } else {
127            last_valid = data[i];
128        }
129    }
130}
131
132/// Fallback scalar implementation for forward-fill
133fn forward_fill_scalar(data: &mut [f64]) {
134    if data.is_empty() {
135        return;
136    }
137
138    let mut last_valid = f64::NAN;
139
140    for value in data.iter_mut() {
141        if value.is_nan() {
142            *value = last_valid;
143        } else {
144            last_valid = *value;
145        }
146    }
147}
148
149/// Forward-fill data with automatic SIMD detection
150///
151/// This function automatically selects the best available SIMD
152/// implementation based on CPU features.
153pub fn forward_fill(data: &mut [f64]) {
154    #[cfg(target_arch = "x86_64")]
155    {
156        if is_x86_feature_detected!("avx2") {
157            unsafe { forward_fill_avx2(data) }
158        } else if is_x86_feature_detected!("sse2") {
159            unsafe { forward_fill_sse2(data) }
160        } else {
161            forward_fill_scalar(data)
162        }
163    }
164
165    #[cfg(not(target_arch = "x86_64"))]
166    {
167        forward_fill_scalar(data)
168    }
169}
170
171/// Forward-fill with interpolation for upsampling
172///
173/// This function performs forward-fill with optional linear interpolation
174/// between known values for smoother upsampling.
175pub fn forward_fill_interpolate(
176    source: &[f64],
177    source_indices: &[usize],
178    target_size: usize,
179    interpolate: bool,
180) -> Vec<f64> {
181    let mut result = vec![f64::NAN; target_size];
182
183    if source.is_empty() || source_indices.is_empty() {
184        return result;
185    }
186
187    // Place source values at their indices
188    for (&idx, &value) in source_indices.iter().zip(source.iter()) {
189        if idx < target_size {
190            result[idx] = value;
191        }
192    }
193
194    if interpolate {
195        // Linear interpolation between known values
196        let mut prev_idx = None;
197        let mut prev_val = f64::NAN;
198
199        for (i, &idx) in source_indices.iter().enumerate() {
200            if idx >= target_size {
201                break;
202            }
203
204            let val = source[i];
205
206            if let Some(p_idx) = prev_idx {
207                // Interpolate between prev_idx and idx
208                let gap = idx - p_idx;
209                if gap > 1 {
210                    let step = (val - prev_val) / gap as f64;
211                    for j in 1..gap {
212                        result[p_idx + j] = prev_val + step * j as f64;
213                    }
214                }
215            }
216
217            prev_idx = Some(idx);
218            prev_val = val;
219        }
220
221        // Forward-fill remaining NaN values
222        forward_fill(&mut result);
223    } else {
224        // Simple forward-fill without interpolation
225        forward_fill(&mut result);
226    }
227
228    result
229}
230
231/// Batch forward-fill for multiple series
232///
233/// This function performs forward-fill on multiple series in parallel
234/// using SIMD instructions for maximum performance.
235pub fn batch_forward_fill(series: &mut [Vec<f64>]) {
236    // Process each series
237    for data in series.iter_mut() {
238        forward_fill(data);
239    }
240}
shape_runtime/simd_forward_fill.rs

shape_runtime/
simd_forward_fill.rs