Skip to main content

runmat_runtime/builtins/math/poly/
polyval.rs

1//! MATLAB-compatible `polyval` builtin with GPU-aware semantics for RunMat.
2
3use log::debug;
4use num_complex::Complex64;
5use runmat_accelerate_api::{HostTensorView, ProviderPolyvalMu, ProviderPolyvalOptions};
6use runmat_builtins::{ComplexTensor, LogicalArray, Tensor, Value};
7use runmat_macros::runtime_builtin;
8
9use crate::builtins::common::spec::{
10    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
11    ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
12};
13use crate::builtins::common::{gpu_helpers, tensor};
14use crate::builtins::math::poly::type_resolvers::polyval_type;
15use crate::{build_runtime_error, dispatcher::download_handle_async, BuiltinResult, RuntimeError};
16
17const EPS: f64 = 1.0e-12;
18const BUILTIN_NAME: &str = "polyval";
19
20#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::math::poly::polyval")]
21pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
22    name: "polyval",
23    op_kind: GpuOpKind::Custom("polyval"),
24    supported_precisions: &[ScalarType::F32, ScalarType::F64],
25    broadcast: BroadcastSemantics::Matlab,
26    provider_hooks: &[ProviderHook::Custom("polyval")],
27    constant_strategy: ConstantStrategy::UniformBuffer,
28    residency: ResidencyPolicy::NewHandle,
29    nan_mode: ReductionNaN::Include,
30    two_pass_threshold: None,
31    workgroup_size: None,
32    accepts_nan_mode: false,
33    notes:
34        "Uses provider-level Horner kernels for real coefficients/inputs; falls back to host evaluation (with upload) for complex or prediction-interval paths.",
35};
36
37fn polyval_error(message: impl Into<String>) -> RuntimeError {
38    build_runtime_error(message)
39        .with_builtin(BUILTIN_NAME)
40        .build()
41}
42
43#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::math::poly::polyval")]
44pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
45    name: "polyval",
46    shape: ShapeRequirements::Any,
47    constant_strategy: ConstantStrategy::UniformBuffer,
48    elementwise: None,
49    reduction: None,
50    emits_nan: true,
51    notes: "Acts as a fusion sink; real-valued workloads stay on device, while complex/delta paths gather to the host.",
52};
53
54#[runtime_builtin(
55    name = "polyval",
56    category = "math/poly",
57    summary = "Evaluate a polynomial at given points with MATLAB-compatible options.",
58    keywords = "polyval,polynomial,polyfit,delta,gpu",
59    accel = "sink",
60    sink = true,
61    type_resolver(polyval_type),
62    builtin_path = "crate::builtins::math::poly::polyval"
63)]
64async fn polyval_builtin(p: Value, x: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
65    if let Some(out_count) = crate::output_count::current_output_count() {
66        let eval = evaluate(p, x, &rest, out_count >= 2).await?;
67        if out_count == 0 {
68            return Ok(Value::OutputList(Vec::new()));
69        }
70        let mut outputs = vec![eval.value()];
71        if out_count >= 2 {
72            outputs.push(eval.delta()?);
73        }
74        return Ok(crate::output_count::output_list_with_padding(
75            out_count, outputs,
76        ));
77    }
78    let eval = evaluate(p, x, &rest, false).await?;
79    Ok(eval.value())
80}
81
82/// Evaluate `polyval`, optionally computing the prediction interval.
83pub async fn evaluate(
84    coefficients: Value,
85    points: Value,
86    rest: &[Value],
87    want_delta: bool,
88) -> BuiltinResult<PolyvalEval> {
89    let options = parse_option_values(rest).await?;
90
91    let coeff_clone = coefficients.clone();
92    let points_clone = points.clone();
93
94    let coeff_was_gpu = matches!(coefficients, Value::GpuTensor(_));
95    let (coeffs, coeff_real) = convert_coefficients(coeff_clone).await?;
96
97    let (mut inputs, prefer_gpu_points) = convert_points(points_clone).await?;
98    let prefer_gpu_output = prefer_gpu_points || coeff_was_gpu;
99
100    let mu = match options.mu.clone() {
101        Some(mu_value) => Some(parse_mu(mu_value).await?),
102        None => None,
103    };
104
105    if prefer_gpu_output && !want_delta && options.s.is_none() {
106        if let Some(value) =
107            try_gpu_polyval(&coeffs, coeff_real, &inputs, mu, prefer_gpu_output).await?
108        {
109            return Ok(PolyvalEval::new(value, None));
110        }
111    }
112
113    if let Some(mu_val) = mu {
114        apply_mu(&mut inputs.data, mu_val)?;
115    }
116
117    let stats = if let Some(s_value) = options.s {
118        parse_stats(s_value, coeffs.len()).await?
119    } else {
120        None
121    };
122
123    if want_delta && stats.is_none() {
124        return Err(polyval_error(
125            "polyval: S input (structure returned by polyfit) is required for delta output",
126        ));
127    }
128
129    if inputs.data.is_empty() {
130        let y = zeros_like(&inputs.shape, prefer_gpu_output)?;
131        let delta = if want_delta {
132            Some(zeros_like(&inputs.shape, prefer_gpu_output)?)
133        } else {
134            None
135        };
136        return Ok(PolyvalEval::new(y, delta));
137    }
138
139    if coeffs.is_empty() {
140        let zeros = zeros_like(&inputs.shape, prefer_gpu_output)?;
141        let delta = if want_delta {
142            Some(zeros_like(&inputs.shape, prefer_gpu_output)?)
143        } else {
144            None
145        };
146        return Ok(PolyvalEval::new(zeros, delta));
147    }
148
149    let output_real = coeff_real && inputs.all_real;
150    let values = evaluate_polynomial(&coeffs, &inputs.data);
151    let result_value = finalize_values(
152        &values,
153        &inputs.shape,
154        prefer_gpu_output,
155        output_real && values_are_real(&values),
156    )?;
157
158    let delta_value = if want_delta {
159        let stats = stats.expect("delta requires stats");
160        let delta = compute_prediction_interval(&coeffs, &inputs.data, &stats)?;
161        let prefer = prefer_gpu_output && stats.is_real;
162        Some(finalize_delta(delta, &inputs.shape, prefer)?)
163    } else {
164        None
165    };
166
167    Ok(PolyvalEval::new(result_value, delta_value))
168}
169
170async fn try_gpu_polyval(
171    coeffs: &[Complex64],
172    coeff_real: bool,
173    inputs: &NumericArray,
174    mu: Option<Mu>,
175    prefer_gpu_output: bool,
176) -> BuiltinResult<Option<Value>> {
177    if !coeff_real || !inputs.all_real {
178        return Ok(None);
179    }
180    if coeffs.is_empty() || inputs.data.is_empty() {
181        return Ok(None);
182    }
183    let Some(provider) = runmat_accelerate_api::provider() else {
184        return Ok(None);
185    };
186
187    let coeff_data: Vec<f64> = coeffs.iter().map(|c| c.re).collect();
188    let coeff_shape = vec![1usize, coeffs.len()];
189    let coeff_view = HostTensorView {
190        data: &coeff_data,
191        shape: &coeff_shape,
192    };
193    let coeff_handle = match provider.upload(&coeff_view) {
194        Ok(handle) => handle,
195        Err(err) => {
196            debug!("polyval: GPU upload of coefficients failed, falling back: {err}");
197            return Ok(None);
198        }
199    };
200
201    let input_data: Vec<f64> = inputs.data.iter().map(|c| c.re).collect();
202    let input_shape = inputs.shape.clone();
203    let input_view = HostTensorView {
204        data: &input_data,
205        shape: &input_shape,
206    };
207    let input_handle = match provider.upload(&input_view) {
208        Ok(handle) => handle,
209        Err(err) => {
210            debug!("polyval: GPU upload of evaluation points failed, falling back: {err}");
211            let _ = provider.free(&coeff_handle);
212            return Ok(None);
213        }
214    };
215
216    let options = ProviderPolyvalOptions {
217        mu: mu.map(|m| ProviderPolyvalMu {
218            mean: m.mean,
219            scale: m.scale,
220        }),
221    };
222
223    let result_handle = match provider.polyval(&coeff_handle, &input_handle, &options) {
224        Ok(handle) => handle,
225        Err(err) => {
226            debug!("polyval: GPU kernel execution failed, falling back: {err}");
227            let _ = provider.free(&coeff_handle);
228            let _ = provider.free(&input_handle);
229            return Ok(None);
230        }
231    };
232
233    let _ = provider.free(&coeff_handle);
234    let _ = provider.free(&input_handle);
235
236    if prefer_gpu_output {
237        return Ok(Some(Value::GpuTensor(result_handle)));
238    }
239
240    let host = match download_handle_async(provider, &result_handle).await {
241        Ok(host) => host,
242        Err(err) => {
243            debug!("polyval: GPU download failed, falling back: {err}");
244            let _ = provider.free(&result_handle);
245            return Ok(None);
246        }
247    };
248    let _ = provider.free(&result_handle);
249
250    let tensor =
251        Tensor::new(host.data, host.shape).map_err(|e| polyval_error(format!("polyval: {e}")))?;
252    Ok(Some(tensor::tensor_into_value(tensor)))
253}
254
255/// Result object for polyval evaluation.
256#[derive(Debug)]
257pub struct PolyvalEval {
258    value: Value,
259    delta: Option<Value>,
260}
261
262impl PolyvalEval {
263    fn new(value: Value, delta: Option<Value>) -> Self {
264        Self { value, delta }
265    }
266
267    /// Primary output (`y`).
268    pub fn value(&self) -> Value {
269        self.value.clone()
270    }
271
272    /// Optional prediction interval (`delta`).
273    pub fn delta(&self) -> BuiltinResult<Value> {
274        self.delta
275            .clone()
276            .ok_or_else(|| polyval_error("polyval: delta output not computed"))
277    }
278
279    /// Consume into the main value.
280    pub fn into_value(self) -> Value {
281        self.value
282    }
283
284    /// Consume into `(value, delta)` pair.
285    pub fn into_pair(self) -> BuiltinResult<(Value, Value)> {
286        match self.delta {
287            Some(delta) => Ok((self.value, delta)),
288            None => Err(polyval_error("polyval: delta output not computed")),
289        }
290    }
291}
292
293#[derive(Clone, Copy)]
294struct Mu {
295    mean: f64,
296    scale: f64,
297}
298
299impl Mu {
300    fn new(mean: f64, scale: f64) -> BuiltinResult<Self> {
301        if !mean.is_finite() || !scale.is_finite() {
302            return Err(polyval_error("polyval: mu values must be finite"));
303        }
304        if scale.abs() <= EPS {
305            return Err(polyval_error("polyval: mu(2) must be non-zero"));
306        }
307        Ok(Self { mean, scale })
308    }
309}
310
311#[derive(Clone)]
312struct NumericArray {
313    data: Vec<Complex64>,
314    shape: Vec<usize>,
315    all_real: bool,
316}
317
318#[derive(Clone)]
319struct PolyfitStats {
320    r: Matrix,
321    df: f64,
322    normr: f64,
323    is_real: bool,
324}
325
326impl PolyfitStats {
327    fn is_effective(&self) -> bool {
328        self.r.len() > 0 && self.df > 0.0 && self.normr.is_finite()
329    }
330}
331
332#[derive(Clone)]
333struct Matrix {
334    rows: usize,
335    cols: usize,
336    data: Vec<Complex64>,
337}
338
339impl Matrix {
340    fn get(&self, row: usize, col: usize) -> Complex64 {
341        self.data[row + col * self.rows]
342    }
343
344    fn len(&self) -> usize {
345        self.rows * self.cols
346    }
347}
348
349struct ParsedOptions {
350    s: Option<Value>,
351    mu: Option<Value>,
352}
353
354async fn parse_option_values(rest: &[Value]) -> BuiltinResult<ParsedOptions> {
355    match rest.len() {
356        0 => Ok(ParsedOptions { s: None, mu: None }),
357        1 => Ok(ParsedOptions {
358            s: if is_empty_value(&rest[0]).await? {
359                None
360            } else {
361                Some(rest[0].clone())
362            },
363            mu: None,
364        }),
365        2 => Ok(ParsedOptions {
366            s: if is_empty_value(&rest[0]).await? {
367                None
368            } else {
369                Some(rest[0].clone())
370            },
371            mu: Some(rest[1].clone()),
372        }),
373        _ => Err(polyval_error("polyval: too many input arguments")),
374    }
375}
376
377#[async_recursion::async_recursion(?Send)]
378async fn convert_coefficients(value: Value) -> BuiltinResult<(Vec<Complex64>, bool)> {
379    match value {
380        Value::GpuTensor(handle) => {
381            let gathered =
382                gpu_helpers::gather_value_async(&Value::GpuTensor(handle.clone())).await?;
383            convert_coefficients(gathered).await
384        }
385        Value::Tensor(mut tensor) => {
386            ensure_vector_shape("polyval", &tensor.shape)?;
387            let data = tensor
388                .data
389                .drain(..)
390                .map(|re| Complex64::new(re, 0.0))
391                .collect();
392            Ok((data, true))
393        }
394        Value::ComplexTensor(mut tensor) => {
395            ensure_vector_shape("polyval", &tensor.shape)?;
396            let all_real = tensor.data.iter().all(|&(_, im)| im.abs() <= EPS);
397            let data = tensor
398                .data
399                .drain(..)
400                .map(|(re, im)| Complex64::new(re, im))
401                .collect();
402            Ok((data, all_real))
403        }
404        Value::LogicalArray(mut array) => {
405            ensure_vector_data_shape("polyval", &array.shape)?;
406            let data = array
407                .data
408                .drain(..)
409                .map(|bit| Complex64::new(if bit != 0 { 1.0 } else { 0.0 }, 0.0))
410                .collect();
411            Ok((data, true))
412        }
413        Value::Num(n) => Ok((vec![Complex64::new(n, 0.0)], true)),
414        Value::Int(i) => Ok((vec![Complex64::new(i.to_f64(), 0.0)], true)),
415        Value::Bool(flag) => Ok((
416            vec![Complex64::new(if flag { 1.0 } else { 0.0 }, 0.0)],
417            true,
418        )),
419        Value::Complex(re, im) => Ok((vec![Complex64::new(re, im)], im.abs() <= EPS)),
420        other => Err(polyval_error(format!(
421            "polyval: coefficients must be numeric, got {other:?}"
422        ))),
423    }
424}
425
426async fn convert_points(value: Value) -> BuiltinResult<(NumericArray, bool)> {
427    match value {
428        Value::GpuTensor(handle) => {
429            let tensor = gpu_helpers::gather_tensor_async(&handle).await?;
430            let array = NumericArray {
431                data: tensor
432                    .data
433                    .iter()
434                    .map(|&re| Complex64::new(re, 0.0))
435                    .collect(),
436                shape: tensor.shape.clone(),
437                all_real: true,
438            };
439            Ok((array, true))
440        }
441        Value::Tensor(tensor) => Ok((
442            NumericArray {
443                data: tensor
444                    .data
445                    .iter()
446                    .map(|&re| Complex64::new(re, 0.0))
447                    .collect(),
448                shape: tensor.shape.clone(),
449                all_real: true,
450            },
451            false,
452        )),
453        Value::ComplexTensor(tensor) => Ok((
454            NumericArray {
455                data: tensor
456                    .data
457                    .iter()
458                    .map(|&(re, im)| Complex64::new(re, im))
459                    .collect(),
460                shape: tensor.shape.clone(),
461                all_real: tensor.data.iter().all(|&(_, im)| im.abs() <= EPS),
462            },
463            false,
464        )),
465        Value::LogicalArray(array) => Ok((
466            NumericArray {
467                data: array
468                    .data
469                    .iter()
470                    .map(|&bit| Complex64::new(if bit != 0 { 1.0 } else { 0.0 }, 0.0))
471                    .collect(),
472                shape: array.shape.clone(),
473                all_real: true,
474            },
475            false,
476        )),
477        Value::Num(n) => Ok((
478            NumericArray {
479                data: vec![Complex64::new(n, 0.0)],
480                shape: vec![1, 1],
481                all_real: true,
482            },
483            false,
484        )),
485        Value::Int(i) => Ok((
486            NumericArray {
487                data: vec![Complex64::new(i.to_f64(), 0.0)],
488                shape: vec![1, 1],
489                all_real: true,
490            },
491            false,
492        )),
493        Value::Bool(flag) => Ok((
494            NumericArray {
495                data: vec![Complex64::new(if flag { 1.0 } else { 0.0 }, 0.0)],
496                shape: vec![1, 1],
497                all_real: true,
498            },
499            false,
500        )),
501        Value::Complex(re, im) => Ok((
502            NumericArray {
503                data: vec![Complex64::new(re, im)],
504                shape: vec![1, 1],
505                all_real: im.abs() <= EPS,
506            },
507            false,
508        )),
509        other => Err(polyval_error(format!(
510            "polyval: X must be numeric, got {other:?}"
511        ))),
512    }
513}
514
515#[async_recursion::async_recursion(?Send)]
516async fn parse_mu(value: Value) -> BuiltinResult<Mu> {
517    match value {
518        Value::GpuTensor(handle) => {
519            let gathered = gpu_helpers::gather_tensor_async(&handle).await?;
520            parse_mu(Value::Tensor(gathered)).await
521        }
522        Value::Tensor(tensor) => {
523            if tensor.data.len() < 2 {
524                return Err(polyval_error(
525                    "polyval: mu must contain at least two elements",
526                ));
527            }
528            Mu::new(tensor.data[0], tensor.data[1])
529        }
530        Value::LogicalArray(array) => {
531            if array.data.len() < 2 {
532                return Err(polyval_error(
533                    "polyval: mu must contain at least two elements",
534                ));
535            }
536            let mean = if array.data[0] != 0 { 1.0 } else { 0.0 };
537            let scale = if array.data[1] != 0 { 1.0 } else { 0.0 };
538            Mu::new(mean, scale)
539        }
540        Value::Num(_) | Value::Int(_) | Value::Bool(_) | Value::Complex(_, _) => Err(
541            polyval_error("polyval: mu must be a numeric vector with at least two values"),
542        ),
543        Value::ComplexTensor(tensor) => {
544            if tensor.data.len() < 2 {
545                return Err(polyval_error(
546                    "polyval: mu must contain at least two elements",
547                ));
548            }
549            let (mean_re, mean_im) = tensor.data[0];
550            let (scale_re, scale_im) = tensor.data[1];
551            if mean_im.abs() > EPS || scale_im.abs() > EPS {
552                return Err(polyval_error("polyval: mu values must be real"));
553            }
554            Mu::new(mean_re, scale_re)
555        }
556        _ => Err(polyval_error(
557            "polyval: mu must be a numeric vector with at least two values",
558        )),
559    }
560}
561
562#[async_recursion::async_recursion(?Send)]
563async fn parse_stats(value: Value, coeff_len: usize) -> BuiltinResult<Option<PolyfitStats>> {
564    if is_empty_value(&value).await? {
565        return Ok(None);
566    }
567    let struct_value = match value {
568        Value::Struct(s) => s,
569        Value::GpuTensor(handle) => {
570            let gathered = gpu_helpers::gather_value_async(&Value::GpuTensor(handle)).await?;
571            return parse_stats(gathered, coeff_len).await;
572        }
573        other => {
574            return Err(polyval_error(format!(
575                "polyval: S input must be the structure returned by polyfit, got {other:?}"
576            )))
577        }
578    };
579    let r_value = struct_value
580        .fields
581        .get("R")
582        .cloned()
583        .ok_or_else(|| polyval_error("polyval: S input is missing the field 'R'"))?;
584    let df_value = struct_value
585        .fields
586        .get("df")
587        .cloned()
588        .ok_or_else(|| polyval_error("polyval: S input is missing the field 'df'"))?;
589    let normr_value = struct_value
590        .fields
591        .get("normr")
592        .cloned()
593        .ok_or_else(|| polyval_error("polyval: S input is missing the field 'normr'"))?;
594
595    let (matrix, is_real) = convert_matrix(r_value, coeff_len).await?;
596    let df = scalar_to_f64(df_value, "polyval: S.df").await?;
597    let normr = scalar_to_f64(normr_value, "polyval: S.normr").await?;
598
599    Ok(Some(PolyfitStats {
600        r: matrix,
601        df,
602        normr,
603        is_real,
604    }))
605}
606
607#[async_recursion::async_recursion(?Send)]
608async fn convert_matrix(value: Value, coeff_len: usize) -> BuiltinResult<(Matrix, bool)> {
609    match value {
610        Value::GpuTensor(handle) => {
611            let tensor = gpu_helpers::gather_tensor_async(&handle).await?;
612            convert_matrix(Value::Tensor(tensor), coeff_len).await
613        }
614        Value::Tensor(tensor) => {
615            let Tensor {
616                data, rows, cols, ..
617            } = tensor;
618            if rows != coeff_len || cols != coeff_len {
619                return Err(polyval_error("polyval: size of S.R must match the coefficient vector"));
620            }
621            let data = data.into_iter().map(|re| Complex64::new(re, 0.0)).collect();
622            Ok((Matrix { rows, cols, data }, true))
623        }
624        Value::ComplexTensor(tensor) => {
625            let ComplexTensor {
626                data, rows, cols, ..
627            } = tensor;
628            if rows != coeff_len || cols != coeff_len {
629                return Err(polyval_error("polyval: size of S.R must match the coefficient vector"));
630            }
631            let imag_small = data.iter().all(|&(_, im)| im.abs() <= EPS);
632            let data = data
633                .into_iter()
634                .map(|(re, im)| Complex64::new(re, im))
635                .collect();
636            Ok((Matrix { rows, cols, data }, imag_small))
637        }
638        Value::LogicalArray(array) => {
639            let LogicalArray { data, shape } = array;
640            let rows = shape.first().copied().unwrap_or(0);
641            let cols = shape.get(1).copied().unwrap_or(0);
642            if rows != coeff_len || cols != coeff_len {
643                return Err(polyval_error("polyval: size of S.R must match the coefficient vector"));
644            }
645            let data = data
646                .into_iter()
647                .map(|bit| Complex64::new(if bit != 0 { 1.0 } else { 0.0 }, 0.0))
648                .collect();
649            Ok((Matrix { rows, cols, data }, true))
650        }
651        Value::Num(_) | Value::Int(_) | Value::Bool(_) | Value::Complex(_, _) => Err(
652            polyval_error(
653                "polyval: S.R must be a square numeric matrix matching the coefficient vector length",
654            ),
655        ),
656        Value::Struct(_)
657        | Value::Cell(_)
658        | Value::String(_)
659        | Value::StringArray(_)
660        | Value::CharArray(_) => Err(
661            polyval_error(
662                "polyval: S.R must be a square numeric matrix matching the coefficient vector length",
663            ),
664        ),
665        _ => Err(
666            polyval_error(
667                "polyval: S.R must be a square numeric matrix matching the coefficient vector length",
668            ),
669        ),
670    }
671}
672
673#[async_recursion::async_recursion(?Send)]
674async fn scalar_to_f64(value: Value, context: &str) -> BuiltinResult<f64> {
675    match value {
676        Value::Num(n) => Ok(n),
677        Value::Int(i) => Ok(i.to_f64()),
678        Value::Bool(flag) => Ok(if flag { 1.0 } else { 0.0 }),
679        Value::Tensor(tensor) => {
680            if tensor.data.len() != 1 {
681                return Err(polyval_error(format!("{context} must be a scalar")));
682            }
683            Ok(tensor.data[0])
684        }
685        Value::LogicalArray(array) => {
686            if array.data.len() != 1 {
687                return Err(polyval_error(format!("{context} must be a scalar")));
688            }
689            Ok(if array.data[0] != 0 { 1.0 } else { 0.0 })
690        }
691        Value::GpuTensor(handle) => {
692            let tensor = gpu_helpers::gather_tensor_async(&handle).await?;
693            scalar_to_f64(Value::Tensor(tensor), context).await
694        }
695        Value::Complex(_, _) | Value::ComplexTensor(_) => {
696            Err(polyval_error(format!("{context} must be real-valued")))
697        }
698        other => Err(polyval_error(format!(
699            "{context} must be a scalar, got {other:?}"
700        ))),
701    }
702}
703
704fn apply_mu(values: &mut [Complex64], mu: Mu) -> BuiltinResult<()> {
705    let mean = Complex64::new(mu.mean, 0.0);
706    let scale = Complex64::new(mu.scale, 0.0);
707    for v in values.iter_mut() {
708        *v = (*v - mean) / scale;
709    }
710    Ok(())
711}
712
713fn evaluate_polynomial(coeffs: &[Complex64], inputs: &[Complex64]) -> Vec<Complex64> {
714    let mut outputs = Vec::with_capacity(inputs.len());
715    for &x in inputs {
716        let mut acc = Complex64::new(0.0, 0.0);
717        for &c in coeffs {
718            acc = acc * x + c;
719        }
720        outputs.push(acc);
721    }
722    outputs
723}
724
725fn compute_prediction_interval(
726    coeffs: &[Complex64],
727    inputs: &[Complex64],
728    stats: &PolyfitStats,
729) -> BuiltinResult<Vec<f64>> {
730    if !stats.is_effective() {
731        return Ok(vec![0.0; inputs.len()]);
732    }
733    let n = coeffs.len();
734    let mut delta = Vec::with_capacity(inputs.len());
735    for &x in inputs {
736        let row = vandermonde_row(x, n);
737        let solved = solve_row_against_upper(&row, &stats.r)?;
738        let sum_sq: f64 = solved.iter().map(|c| c.norm_sqr()).sum();
739        let interval = (1.0 + sum_sq).sqrt() * (stats.normr / stats.df.sqrt());
740        delta.push(interval);
741    }
742    Ok(delta)
743}
744
745fn vandermonde_row(x: Complex64, len: usize) -> Vec<Complex64> {
746    if len == 0 {
747        return vec![Complex64::new(1.0, 0.0)];
748    }
749    let degree = len - 1;
750    let mut powers = vec![Complex64::new(1.0, 0.0); degree + 1];
751    for idx in 1..=degree {
752        powers[idx] = powers[idx - 1] * x;
753    }
754    let mut row = vec![Complex64::new(0.0, 0.0); degree + 1];
755    for (i, value) in powers.into_iter().enumerate() {
756        row[degree - i] = value;
757    }
758    row
759}
760
761fn solve_row_against_upper(row: &[Complex64], matrix: &Matrix) -> BuiltinResult<Vec<Complex64>> {
762    let n = row.len();
763    if matrix.rows != n || matrix.cols != n {
764        return Err(polyval_error(
765            "polyval: size of S.R must match the coefficient vector",
766        ));
767    }
768    let mut result = vec![Complex64::new(0.0, 0.0); n];
769    for j in (0..n).rev() {
770        let mut acc = row[j];
771        for (k, value) in result.iter().enumerate().skip(j + 1) {
772            acc -= *value * matrix.get(k, j);
773        }
774        let diag = matrix.get(j, j);
775        if diag.norm() <= EPS {
776            return Err(polyval_error("polyval: S.R is singular"));
777        }
778        result[j] = acc / diag;
779    }
780    Ok(result)
781}
782
783fn finalize_values(
784    data: &[Complex64],
785    shape: &[usize],
786    prefer_gpu: bool,
787    real_only: bool,
788) -> BuiltinResult<Value> {
789    if real_only {
790        let real_data: Vec<f64> = data.iter().map(|c| c.re).collect();
791        finalize_real(real_data, shape, prefer_gpu)
792    } else if data.len() == 1 {
793        let value = data[0];
794        Ok(Value::Complex(value.re, value.im))
795    } else {
796        let complex_data: Vec<(f64, f64)> = data.iter().map(|c| (c.re, c.im)).collect();
797        let tensor = ComplexTensor::new(complex_data, shape.to_vec())
798            .map_err(|e| polyval_error(format!("polyval: failed to build complex tensor: {e}")))?;
799        Ok(Value::ComplexTensor(tensor))
800    }
801}
802
803fn finalize_delta(data: Vec<f64>, shape: &[usize], prefer_gpu: bool) -> BuiltinResult<Value> {
804    finalize_real(data, shape, prefer_gpu)
805}
806
807fn finalize_real(data: Vec<f64>, shape: &[usize], prefer_gpu: bool) -> BuiltinResult<Value> {
808    let tensor = Tensor::new(data, shape.to_vec())
809        .map_err(|e| polyval_error(format!("polyval: failed to build tensor: {e}")))?;
810    if prefer_gpu {
811        if let Some(provider) = runmat_accelerate_api::provider() {
812            let view = HostTensorView {
813                data: &tensor.data,
814                shape: &tensor.shape,
815            };
816            if let Ok(handle) = provider.upload(&view) {
817                return Ok(Value::GpuTensor(handle));
818            }
819        }
820    }
821    Ok(tensor::tensor_into_value(tensor))
822}
823
824fn zeros_like(shape: &[usize], prefer_gpu: bool) -> BuiltinResult<Value> {
825    let len = shape.iter().product();
826    finalize_real(vec![0.0; len], shape, prefer_gpu)
827}
828
829fn ensure_vector_shape(name: &str, shape: &[usize]) -> BuiltinResult<()> {
830    if !is_vector_shape(shape) {
831        Err(polyval_error(format!(
832            "{name}: coefficients must be a scalar, row vector, or column vector"
833        )))
834    } else {
835        Ok(())
836    }
837}
838
839fn ensure_vector_data_shape(name: &str, shape: &[usize]) -> BuiltinResult<()> {
840    if !is_vector_shape(shape) {
841        Err(polyval_error(format!(
842            "{name}: inputs must be vectors or scalars"
843        )))
844    } else {
845        Ok(())
846    }
847}
848
849fn is_vector_shape(shape: &[usize]) -> bool {
850    shape.iter().filter(|&&dim| dim > 1).count() <= 1
851}
852
853#[async_recursion::async_recursion(?Send)]
854async fn is_empty_value(value: &Value) -> BuiltinResult<bool> {
855    match value {
856        Value::Tensor(t) => Ok(t.data.is_empty()),
857        Value::LogicalArray(l) => Ok(l.data.is_empty()),
858        Value::Cell(ca) => Ok(ca.data.is_empty()),
859        Value::GpuTensor(handle) => {
860            let gathered =
861                gpu_helpers::gather_value_async(&Value::GpuTensor(handle.clone())).await?;
862            is_empty_value(&gathered).await
863        }
864        _ => Ok(false),
865    }
866}
867
868fn values_are_real(values: &[Complex64]) -> bool {
869    values.iter().all(|c| c.im.abs() <= EPS)
870}
871
872#[cfg(test)]
873pub(crate) mod tests {
874    use super::*;
875    use crate::builtins::common::test_support;
876    use futures::executor::block_on;
877    use runmat_builtins::StructValue;
878
879    fn assert_error_contains(err: crate::RuntimeError, needle: &str) {
880        assert!(
881            err.message().contains(needle),
882            "expected error containing '{needle}', got '{}'",
883            err.message()
884        );
885    }
886
887    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
888    #[test]
889    fn polyval_scalar() {
890        let coeffs = Tensor::new(vec![2.0, -3.0, 5.0], vec![1, 3]).unwrap();
891        let value =
892            polyval_builtin(Value::Tensor(coeffs), Value::Num(4.0), Vec::new()).expect("polyval");
893        match value {
894            Value::Num(n) => assert!((n - (2.0 * 16.0 - 12.0 + 5.0)).abs() < 1e-12),
895            other => panic!("expected scalar, got {other:?}"),
896        }
897    }
898
899    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
900    #[test]
901    fn polyval_matrix_input() {
902        let coeffs = Tensor::new(vec![1.0, 0.0, -2.0, 1.0], vec![1, 4]).unwrap();
903        let points = Tensor::new(vec![-2.0, -1.0, 0.0, 1.0, 2.0], vec![5, 1]).unwrap();
904        let value = polyval_builtin(
905            Value::Tensor(coeffs),
906            Value::Tensor(points.clone()),
907            Vec::new(),
908        )
909        .expect("polyval");
910        match value {
911            Value::Tensor(tensor) => {
912                assert_eq!(tensor.shape, points.shape);
913                let expected = vec![-3.0, 2.0, 1.0, 0.0, 5.0];
914                assert_eq!(tensor.data, expected);
915            }
916            other => panic!("expected tensor output, got {other:?}"),
917        }
918    }
919
920    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
921    #[test]
922    fn polyval_complex_inputs() {
923        let coeffs =
924            ComplexTensor::new(vec![(1.0, 2.0), (-3.0, 0.0), (0.0, 4.0)], vec![1, 3]).unwrap();
925        let points =
926            ComplexTensor::new(vec![(-1.0, 1.0), (0.0, 0.0), (1.0, -2.0)], vec![1, 3]).unwrap();
927        let value = polyval_builtin(
928            Value::ComplexTensor(coeffs),
929            Value::ComplexTensor(points.clone()),
930            Vec::new(),
931        )
932        .expect("polyval");
933        match value {
934            Value::ComplexTensor(tensor) => {
935                assert_eq!(tensor.shape, points.shape);
936                assert_eq!(tensor.data.len(), 3);
937            }
938            other => panic!("expected complex tensor, got {other:?}"),
939        }
940    }
941
942    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
943    #[test]
944    fn polyval_with_mu() {
945        let coeffs = Tensor::new(vec![1.0, 0.0, 0.0], vec![1, 3]).unwrap();
946        let points = Tensor::new(vec![0.0, 1.0, 2.0], vec![1, 3]).unwrap();
947        let mu = Tensor::new(vec![1.0, 2.0], vec![1, 2]).unwrap();
948        let value = polyval_builtin(
949            Value::Tensor(coeffs),
950            Value::Tensor(points),
951            vec![
952                Value::Tensor(Tensor::new(vec![], vec![0, 0]).unwrap()),
953                Value::Tensor(mu),
954            ],
955        )
956        .expect("polyval");
957        match value {
958            Value::Tensor(tensor) => {
959                assert_eq!(tensor.data, vec![0.25, 0.0, 0.25]);
960            }
961            other => panic!("expected tensor output, got {other:?}"),
962        }
963    }
964
965    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
966    #[test]
967    fn polyval_delta_computation() {
968        let coeffs = Tensor::new(vec![1.0, -3.0, 2.0], vec![1, 3]).unwrap();
969        let points = Tensor::new(vec![0.0, 1.0, 2.0], vec![1, 3]).unwrap();
970        let mut st = StructValue::new();
971        let r = Tensor::new(
972            vec![1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
973            vec![3, 3],
974        )
975        .unwrap();
976        st.fields.insert("R".to_string(), Value::Tensor(r));
977        st.fields.insert("df".to_string(), Value::Num(4.0));
978        st.fields.insert("normr".to_string(), Value::Num(2.0));
979        let stats = Value::Struct(st);
980        let eval = futures::executor::block_on(evaluate(
981            Value::Tensor(coeffs),
982            Value::Tensor(points),
983            &[stats],
984            true,
985        ))
986        .expect("polyval");
987        let (_, delta) = eval.into_pair().expect("delta available");
988        match delta {
989            Value::Tensor(tensor) => {
990                assert_eq!(tensor.shape, vec![1, 3]);
991                assert_eq!(tensor.data.len(), 3);
992            }
993            other => panic!("expected tensor delta, got {other:?}"),
994        }
995    }
996
997    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
998    #[test]
999    fn polyval_delta_requires_stats() {
1000        let coeffs = Tensor::new(vec![1.0, 0.0], vec![1, 2]).unwrap();
1001        let points = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
1002        let err = futures::executor::block_on(evaluate(
1003            Value::Tensor(coeffs),
1004            Value::Tensor(points),
1005            &[],
1006            true,
1007        ))
1008        .expect_err("expected error");
1009        assert_error_contains(err, "S input");
1010    }
1011
1012    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1013    #[test]
1014    fn polyval_invalid_mu_length_errors() {
1015        let coeffs = Tensor::new(vec![1.0, 0.0], vec![1, 2]).unwrap();
1016        let points = Tensor::new(vec![0.0], vec![1, 1]).unwrap();
1017        let mu = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
1018        let placeholder = Tensor::new(vec![], vec![0, 0]).unwrap();
1019        let err = polyval_builtin(
1020            Value::Tensor(coeffs),
1021            Value::Tensor(points),
1022            vec![Value::Tensor(placeholder), Value::Tensor(mu)],
1023        )
1024        .expect_err("expected mu length error");
1025        assert_error_contains(err, "mu must contain at least two elements");
1026    }
1027
1028    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1029    #[test]
1030    fn polyval_complex_mu_rejected() {
1031        let coeffs = Tensor::new(vec![1.0, 0.0], vec![1, 2]).unwrap();
1032        let points = Tensor::new(vec![0.0], vec![1, 1]).unwrap();
1033        let complex_mu =
1034            ComplexTensor::new(vec![(0.0, 0.0), (1.0, 0.5)], vec![1, 2]).expect("complex mu");
1035        let placeholder = Tensor::new(vec![], vec![0, 0]).unwrap();
1036        let err = polyval_builtin(
1037            Value::Tensor(coeffs),
1038            Value::Tensor(points),
1039            vec![Value::Tensor(placeholder), Value::ComplexTensor(complex_mu)],
1040        )
1041        .expect_err("expected complex mu error");
1042        assert_error_contains(err, "mu values must be real");
1043    }
1044
1045    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1046    #[test]
1047    fn polyval_invalid_stats_missing_r() {
1048        let coeffs = Tensor::new(vec![1.0, -3.0, 2.0], vec![1, 3]).unwrap();
1049        let points = Tensor::new(vec![0.0], vec![1, 1]).unwrap();
1050        let mut st = StructValue::new();
1051        st.fields.insert("df".to_string(), Value::Num(1.0));
1052        st.fields.insert("normr".to_string(), Value::Num(1.0));
1053        let stats = Value::Struct(st);
1054        let err = polyval_builtin(Value::Tensor(coeffs), Value::Tensor(points), vec![stats])
1055            .expect_err("expected missing R error");
1056        assert_error_contains(err, "missing the field 'R'");
1057    }
1058
1059    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1060    #[test]
1061    fn polyval_gpu_roundtrip() {
1062        test_support::with_test_provider(|provider| {
1063            let coeffs = Tensor::new(vec![1.0, 0.0, 1.0], vec![1, 3]).unwrap();
1064            let points = Tensor::new(vec![-1.0, 0.0, 1.0], vec![3, 1]).unwrap();
1065            let coeff_handle = provider
1066                .upload(&HostTensorView {
1067                    data: &coeffs.data,
1068                    shape: &coeffs.shape,
1069                })
1070                .expect("upload coeff");
1071            let point_handle = provider
1072                .upload(&HostTensorView {
1073                    data: &points.data,
1074                    shape: &points.shape,
1075                })
1076                .expect("upload points");
1077            let value = polyval_builtin(
1078                Value::GpuTensor(coeff_handle),
1079                Value::GpuTensor(point_handle),
1080                Vec::new(),
1081            )
1082            .expect("polyval");
1083            match value {
1084                Value::GpuTensor(handle) => {
1085                    let gathered = test_support::gather(Value::GpuTensor(handle)).expect("gather");
1086                    assert_eq!(gathered.data, vec![2.0, 1.0, 2.0]);
1087                }
1088                other => panic!("expected gpu tensor, got {other:?}"),
1089            }
1090        });
1091    }
1092
1093    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1094    #[test]
1095    #[cfg(feature = "wgpu")]
1096    fn polyval_wgpu_matches_cpu_real_inputs() {
1097        let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1098            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1099        );
1100        let coeffs = Tensor::new(vec![1.0, -3.0, 2.0], vec![1, 3]).unwrap();
1101        let points = Tensor::new(vec![-2.0, -1.0, 0.5, 2.5], vec![4, 1]).unwrap();
1102
1103        let provider = runmat_accelerate_api::provider().expect("wgpu provider");
1104        let coeff_handle = provider
1105            .upload(&HostTensorView {
1106                data: &coeffs.data,
1107                shape: &coeffs.shape,
1108            })
1109            .expect("upload coeffs");
1110        let point_handle = provider
1111            .upload(&HostTensorView {
1112                data: &points.data,
1113                shape: &points.shape,
1114            })
1115            .expect("upload points");
1116
1117        let gpu_value = polyval_builtin(
1118            Value::GpuTensor(coeff_handle.clone()),
1119            Value::GpuTensor(point_handle.clone()),
1120            Vec::new(),
1121        )
1122        .expect("polyval gpu");
1123
1124        let _ = provider.free(&coeff_handle);
1125        let _ = provider.free(&point_handle);
1126
1127        let gathered = test_support::gather(gpu_value).expect("gather");
1128
1129        let coeff_complex: Vec<Complex64> = coeffs
1130            .data
1131            .iter()
1132            .map(|&c| Complex64::new(c, 0.0))
1133            .collect();
1134        let point_complex: Vec<Complex64> = points
1135            .data
1136            .iter()
1137            .map(|&x| Complex64::new(x, 0.0))
1138            .collect();
1139        let expected_vals = evaluate_polynomial(&coeff_complex, &point_complex);
1140        let expected: Vec<f64> = expected_vals.iter().map(|c| c.re).collect();
1141
1142        assert_eq!(gathered.shape, vec![4, 1]);
1143        assert_eq!(gathered.data, expected);
1144    }
1145
1146    fn polyval_builtin(p: Value, x: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
1147        block_on(super::polyval_builtin(p, x, rest))
1148    }
1149}