1use runmat_accelerate_api::{CovNormalization, CovRows, CovarianceOptions};
4use runmat_builtins::{Tensor, Value};
5use runmat_macros::runtime_builtin;
6
7use crate::builtins::common::gpu_helpers;
8use crate::builtins::common::spec::{
9 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10 ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
11};
12use crate::builtins::common::tensor;
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
16
17#[cfg(feature = "doc_export")]
18pub const DOC_MD: &str = r#"---
19title: "cov"
20category: "stats/summary"
21keywords: ["cov", "covariance", "statistics", "weighted covariance", "gpu"]
22summary: "Compute covariance matrices for vectors, matrices, or paired data sets."
23references:
24 - https://www.mathworks.com/help/matlab/ref/cov.html
25gpu_support:
26 elementwise: false
27 reduction: false
28 precisions: ["f32", "f64"]
29 broadcasting: "none"
30 notes: "Runs on the GPU when rows='all' and no weight vector is supplied; other modes transparently fall back to the CPU reference path."
31fusion:
32 elementwise: false
33 reduction: false
34 max_inputs: 2
35 constants: "inline"
36requires_feature: null
37tested:
38 unit: "builtins::stats::summary::cov::tests"
39 integration: "builtins::stats::summary::cov::tests::cov_gpu_roundtrip"
40---
41
42# What does the `cov` function do in MATLAB / RunMat?
43`cov` returns covariance matrices for numeric data. Columns represent variables and rows are
44observations. You can pass in one matrix, two matching data sets, or supply observation weights
45and row-handling options that mirror MATLAB.
46
47## How does the `cov` function behave in MATLAB / RunMat?
48- `cov(X)` treats each column of `X` as a variable and returns a square covariance matrix.
49- `cov(X, Y)` concatenates `X` and `Y` column-wise (they must have the same number of rows) before computing the covariance.
50- The second argument can be the normalization flag `0` (default) or `1`, matching MATLAB's unbiased and biased estimators.
51- You can pass a weight vector to obtain frequency-weighted covariance.
52- `'omitrows'` drops rows containing `NaN` or `Inf` before the covariance is computed.
53- `'partialrows'` performs pairwise deletion: each covariance entry uses only the rows that contain finite values for that column pair.
54
55## `cov` Function GPU Execution Behaviour
56RunMat invokes provider-specific GPU kernels when:
57
581. All inputs already reside on the GPU;
592. No weight vector is supplied;
603. The rows option is `'all'`; and
614. The active provider exposes the custom `covariance` hook.
62
63If any of these conditions is not met, RunMat gathers the data to the host, evaluates the
64reference implementation, and returns a dense host tensor. This guarantees MATLAB-compatible
65behaviour regardless of GPU support.
66
67## Examples of using the `cov` function in MATLAB / RunMat
68
69### Computing covariance of columns in a matrix
70```matlab
71X = [4.0 2.0 0.60;
72 4.2 2.1 0.59;
73 3.9 2.0 0.58;
74 4.3 2.1 0.62;
75 4.1 2.2 0.63];
76C = cov(X);
77```
78Expected output:
79```matlab
80C =
81 0.0250 0.0075 0.0018
82 0.0075 0.0070 0.0014
83 0.0018 0.0014 0.0004
84```
85
86### Covariance between two vectors
87```matlab
88x = [1 2 3 4]';
89y = [10 11 9 12]';
90C = cov(x, y);
91```
92Expected output:
93```matlab
94C =
95 1.6667 1.5000
96 1.5000 1.6667
97```
98
99### Weighted covariance with observation weights
100```matlab
101X = [4.0 2.0;
102 4.2 2.1;
103 3.9 2.0;
104 4.3 2.1;
105 4.1 2.2];
106w = [1 1 1 2 2];
107Cw = cov(X, w);
108```
109Expected output:
110```matlab
111Cw =
112 0.0224 0.0050
113 0.0050 0.0067
114```
115
116### Ignoring rows that contain missing values
117```matlab
118X = [1 NaN 2;
119 3 4 5;
120 NaN 6 7;
121 8 9 10];
122C = cov(X, 'omitrows');
123```
124Expected output:
125```matlab
126C =
127 12.5000 12.5000 12.5000
128 12.5000 12.5000 12.5000
129 12.5000 12.5000 12.5000
130```
131
132### Pairwise covariance with staggered NaNs
133```matlab
134X = [ 1 2 NaN;
135 4 NaN 6;
136 7 8 9];
137C = cov(X, 'partialrows');
138```
139Expected output:
140```matlab
141C =
142 9.0000 18.0000 4.5000
143 18.0000 18.0000 NaN
144 4.5000 NaN 4.5000
145```
146
147### Running covariance on `gpuArray` inputs
148```matlab
149G = gpuArray(X); % reuse matrix from earlier examples
150CG = cov(G);
151CG_host = gather(CG);
152```
153Expected output:
154```matlab
155CG_host =
156 0.0250 0.0075 0.0018
157 0.0075 0.0070 0.0014
158 0.0018 0.0014 0.0004
159```
160
161## GPU residency in RunMat (Do I need `gpuArray`?)
162You usually do **not** need to call `gpuArray`. Expressions such as `cov(sin(X))` keep temporary
163results on the GPU as long as the active provider handles the operation. The builtin gathers to
164the CPU only when weights, `'omitrows'`, or `'partialrows'` are requested, or when the provider
165does not implement the covariance hook. Explicitly calling `gpuArray` remains supported for
166MATLAB compatibility and to seed GPU residency when you are unsure about planner decisions.
167
168## FAQ
169
170### Does `cov` support biased and unbiased estimators?
171Yes. The default is the unbiased estimator (divide by *N - 1*). Passing `1` as the second argument
172switches to the biased estimator (divide by *N*), matching MATLAB.
173
174### How do I provide observation weights?
175Supply a weight vector whose length equals the number of observations. The covariance is frequency-weighted using the MATLAB formula. Weighted covariance currently falls back to the CPU implementation when running on the GPU.
176
177### What happens when columns contain constant values?
178The diagonal entries become zero, and off-diagonal entries involving the constant column are zero. Any slight negative values caused by floating-point noise are clamped to zero.
179
180### How are `NaN` and `Inf` handled?
181By default (`'all'`), non-finite values propagate `NaN` into the affected covariance entries.
182`'omitrows'` drops rows containing non-finite values, while `'partialrows'` recomputes each
183covariance entry using only rows that are finite for the relevant column pair.
184
185### Can I call `cov` on logical inputs?
186Yes. Logical arrays are converted to double precision (`true → 1.0`, `false → 0.0`) before the
187covariance is computed, matching MATLAB's behaviour.
188
189## See Also
190[corrcoef](./corrcoef), [mean](../../math/reduction/mean), [sum](../../math/reduction/sum), [gpuArray](../../acceleration/gpu/gpuArray), [gather](../../acceleration/gpu/gather)
191
192## Source & Feedback
193- The full source code for the implementation of the `cov` function is available at: [`crates/runmat-runtime/src/builtins/stats/summary/cov.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/stats/summary/cov.rs)
194- Found a bug or behavioural difference? Please [open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with details and a minimal repro.
195"#;
196
197pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
198 name: "cov",
199 op_kind: GpuOpKind::Custom("summary-stats"),
200 supported_precisions: &[ScalarType::F32, ScalarType::F64],
201 broadcast: BroadcastSemantics::None,
202 provider_hooks: &[ProviderHook::Custom("covariance")],
203 constant_strategy: ConstantStrategy::InlineLiteral,
204 residency: ResidencyPolicy::NewHandle,
205 nan_mode: ReductionNaN::Include,
206 two_pass_threshold: None,
207 workgroup_size: None,
208 accepts_nan_mode: false,
209 notes: "GPU execution is available when rows='all' and no weight vector is supplied; other cases fall back to the CPU path.",
210};
211
212register_builtin_gpu_spec!(GPU_SPEC);
213
214pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
215 name: "cov",
216 shape: ShapeRequirements::Any,
217 constant_strategy: ConstantStrategy::InlineLiteral,
218 elementwise: None,
219 reduction: None,
220 emits_nan: true,
221 notes: "The covariance builtin is treated as a fusion boundary and executes via dedicated kernels or the host reference.",
222};
223
224register_builtin_fusion_spec!(FUSION_SPEC);
225
226#[cfg(feature = "doc_export")]
227register_builtin_doc_text!("cov", DOC_MD);
228
229#[runtime_builtin(
230 name = "cov",
231 category = "stats/summary",
232 summary = "Compute covariance matrices for vectors, matrices, or paired data sets.",
233 keywords = "cov,covariance,statistics,weights,gpu",
234 accel = "reduction"
235)]
236fn cov_builtin(value: Value, rest: Vec<Value>) -> Result<Value, String> {
237 let args = CovArgs::parse(value, rest)?;
238 if let Some(result) = cov_try_gpu(&args)? {
239 return Ok(result);
240 }
241 cov_host(args)
242}
243
244pub fn cov_from_tensors(
246 left: Tensor,
247 right: Option<Tensor>,
248 rows: CovRows,
249 weight: CovWeightSpec,
250) -> Result<Tensor, String> {
251 let matrix = combine_tensors(left, right)?;
252 if let CovWeightSpec::Vector(ref vec) = weight {
253 if matrix.rows != vec.len() {
254 return Err(format!(
255 "cov: weight vector must contain {} elements",
256 matrix.rows
257 ));
258 }
259 }
260 match rows {
261 CovRows::All => covariance_dense(&matrix, &weight),
262 CovRows::OmitRows => {
263 let (filtered, filtered_weight) = filter_complete_rows(&matrix, weight);
264 covariance_dense(&filtered, &filtered_weight)
265 }
266 CovRows::PartialRows => covariance_pairwise(&matrix, &weight),
267 }
268}
269
270#[derive(Debug)]
271struct CovArgs {
272 first: Value,
273 second: Option<Value>,
274 normalization: CovNormalization,
275 rows: CovRows,
276 weight_vector: Option<Value>,
277}
278
279impl CovArgs {
280 fn parse(first: Value, rest: Vec<Value>) -> Result<Self, String> {
281 let mut second_candidate: Option<Value> = None;
282 let mut weight_candidate: Option<Value> = None;
283 let mut normalization = CovNormalization::Unbiased;
284 let mut normalization_explicit = false;
285 let mut rows = CovRows::All;
286
287 let iter = rest.into_iter();
288 for arg in iter {
289 match arg {
290 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) => {
291 let key = tensor::value_to_string(&arg)
292 .ok_or_else(|| "cov: expected string option".to_string())?;
293 let lowered = key.trim().to_ascii_lowercase();
294 rows = parse_rows_option(&lowered)?;
295 }
296 Value::Tensor(_) | Value::LogicalArray(_) | Value::GpuTensor(_) => {
297 if second_candidate.is_none() {
298 second_candidate = Some(arg);
299 } else if weight_candidate.is_none() {
300 weight_candidate = Some(arg);
301 } else {
302 return Err("cov: too many array arguments".to_string());
303 }
304 }
305 Value::Num(_) | Value::Int(_) | Value::Bool(_) => {
306 if normalization_explicit || weight_candidate.is_some() {
307 return Err("cov: normalization flag specified more than once".to_string());
308 }
309 normalization = parse_normalization(arg)?;
310 normalization_explicit = true;
311 }
312 Value::ComplexTensor(_) => {
313 return Err("cov: complex inputs are not supported yet".to_string());
314 }
315 other => {
316 return Err(format!("cov: unsupported argument type {:?}", other));
317 }
318 }
319 }
320
321 if let Some(weight_array) = weight_candidate {
322 return Ok(Self {
324 first,
325 second: second_candidate,
326 normalization,
327 rows,
328 weight_vector: Some(weight_array),
329 });
330 }
331
332 let mut second = second_candidate;
333 let mut weight_vector: Option<Value> = None;
334
335 if let Some(candidate) = second.take() {
336 if should_treat_as_weight(&first, &candidate, normalization_explicit, rows)? {
337 weight_vector = Some(candidate);
338 } else {
339 second = Some(candidate);
340 }
341 }
342
343 Ok(Self {
344 first,
345 second,
346 normalization,
347 rows,
348 weight_vector,
349 })
350 }
351}
352
353#[derive(Debug, Clone)]
354pub enum CovWeightSpec {
355 Scalar(CovNormalization),
356 Vector(Vec<f64>),
357}
358
359fn cov_try_gpu(args: &CovArgs) -> Result<Option<Value>, String> {
360 if args.rows != CovRows::All || args.weight_vector.is_some() {
361 return Ok(None);
362 }
363
364 let provider = match runmat_accelerate_api::provider() {
365 Some(p) => p,
366 None => return Ok(None),
367 };
368
369 let first_handle = match &args.first {
370 Value::GpuTensor(handle) => handle,
371 _ => return Ok(None),
372 };
373
374 let maybe_second_handle = match &args.second {
375 Some(Value::GpuTensor(handle)) => Some(handle),
376 Some(_) => return Ok(None),
377 None => None,
378 };
379
380 let options = CovarianceOptions {
381 normalization: args.normalization,
382 rows: args.rows,
383 has_weight_vector: false,
384 };
385
386 match provider.covariance(first_handle, maybe_second_handle, None, &options) {
387 Ok(result) => Ok(Some(Value::GpuTensor(result))),
388 Err(_) => Ok(None),
389 }
390}
391
392fn cov_host(args: CovArgs) -> Result<Value, String> {
393 let CovArgs {
394 first,
395 second,
396 normalization,
397 rows,
398 weight_vector,
399 } = args;
400
401 let left = value_to_tensor_gather(first)?;
402 let right = match second {
403 Some(value) => Some(value_to_tensor_gather(value)?),
404 None => None,
405 };
406
407 let weight_spec = if let Some(weight_value) = weight_vector {
408 let vector = value_to_weight_vector(weight_value, left.rows())?;
409 CovWeightSpec::Vector(vector)
410 } else {
411 CovWeightSpec::Scalar(normalization)
412 };
413
414 let tensor = cov_from_tensors(left, right, rows, weight_spec)?;
415 Ok(Value::Tensor(tensor))
416}
417
418fn value_to_tensor_gather(value: Value) -> Result<Tensor, String> {
419 match value {
420 Value::GpuTensor(handle) => gpu_helpers::gather_tensor(&handle),
421 Value::LogicalArray(logical) => tensor::logical_to_tensor(&logical),
422 other => tensor::value_into_tensor_for("cov", other),
423 }
424}
425
426fn value_to_weight_vector(value: Value, expected_rows: usize) -> Result<Vec<f64>, String> {
427 let tensor = match value {
428 Value::GpuTensor(handle) => gpu_helpers::gather_tensor(&handle)?,
429 Value::LogicalArray(logical) => tensor::logical_to_tensor(&logical)?,
430 other => tensor::value_into_tensor_for("cov", other)?,
431 };
432 if tensor.shape.len() > 2 {
433 return Err("cov: weight vector must be one-dimensional".to_string());
434 }
435 if tensor.rows() != expected_rows && tensor.cols() != expected_rows {
436 return Err(format!(
437 "cov: weight vector must contain {} elements",
438 expected_rows
439 ));
440 }
441 for (idx, weight) in tensor.data.iter().enumerate() {
442 if !weight.is_finite() || *weight < 0.0 {
443 return Err(format!(
444 "cov: weights must be non-negative finite values (index {idx})"
445 ));
446 }
447 }
448 if tensor.data.is_empty() {
449 return Err("cov: weight vector cannot be empty".to_string());
450 }
451 Ok(tensor.data)
452}
453
454fn parse_rows_option(value: &str) -> Result<CovRows, String> {
455 match value {
456 "all" => Ok(CovRows::All),
457 "omitrows" | "omit" => Ok(CovRows::OmitRows),
458 "partialrows" | "partial" | "pairwise" => Ok(CovRows::PartialRows),
459 other => Err(format!("cov: unknown rows option '{other}'")),
460 }
461}
462
463fn parse_normalization(value: Value) -> Result<CovNormalization, String> {
464 match value {
465 Value::Int(i) => match i.to_i64() {
466 0 => Ok(CovNormalization::Unbiased),
467 1 => Ok(CovNormalization::Biased),
468 other => Err(format!(
469 "cov: normalization flag must be 0 or 1, received {other}"
470 )),
471 },
472 Value::Num(n) => {
473 if !n.is_finite() {
474 return Err("cov: normalization flag must be finite".to_string());
475 }
476 let rounded = n.round();
477 if (rounded - n).abs() > 1.0e-12 {
478 return Err("cov: normalization flag must be an integer".to_string());
479 }
480 match rounded as i64 {
481 0 => Ok(CovNormalization::Unbiased),
482 1 => Ok(CovNormalization::Biased),
483 other => Err(format!(
484 "cov: normalization flag must be 0 or 1, received {other}"
485 )),
486 }
487 }
488 Value::Bool(flag) => {
489 if flag {
490 Ok(CovNormalization::Biased)
491 } else {
492 Ok(CovNormalization::Unbiased)
493 }
494 }
495 other => Err(format!(
496 "cov: normalization flag must be numeric, received {:?}",
497 other
498 )),
499 }
500}
501
502fn should_treat_as_weight(
503 first: &Value,
504 candidate: &Value,
505 normalization_explicit: bool,
506 rows_option: CovRows,
507) -> Result<bool, String> {
508 let (rows_first, cols_first) = value_rows_cols(first)?;
509 let (rows_candidate, cols_candidate) = value_rows_cols(candidate)?;
510
511 let is_vector = rows_candidate == 1
512 || cols_candidate == 1
513 || rows_candidate * cols_candidate == rows_candidate
514 && (rows_candidate == rows_first || cols_candidate == rows_first);
515
516 if !is_vector {
517 return Ok(false);
518 }
519
520 if rows_candidate != rows_first && cols_candidate != rows_first {
521 return Ok(false);
523 }
524
525 if cols_first == 1 && !normalization_explicit && matches!(rows_option, CovRows::All) {
526 return Ok(false);
528 }
529
530 Ok(true)
531}
532
533fn value_rows_cols(value: &Value) -> Result<(usize, usize), String> {
534 match value {
535 Value::Tensor(tensor) => Ok((tensor.rows(), tensor.cols())),
536 Value::LogicalArray(array) => {
537 if array.shape.len() > 2 {
538 return Err("cov: inputs must be 2-D matrices or vectors".to_string());
539 }
540 let rows = if array.shape.is_empty() {
541 1
542 } else {
543 array.shape[0]
544 };
545 let cols = if array.shape.len() >= 2 {
546 array.shape[1]
547 } else {
548 1
549 };
550 Ok((rows, cols))
551 }
552 Value::GpuTensor(handle) => {
553 if handle.shape.len() > 2 {
554 return Err("cov: inputs must be 2-D matrices or vectors".to_string());
555 }
556 let rows = if handle.shape.is_empty() {
557 1
558 } else {
559 handle.shape[0]
560 };
561 let cols = if handle.shape.len() >= 2 {
562 handle.shape[1]
563 } else {
564 1
565 };
566 Ok((rows, cols))
567 }
568 Value::Num(_) | Value::Int(_) | Value::Bool(_) => Ok((1, 1)),
569 other => Err(format!(
570 "cov: unsupported input type for shape inspection: {:?}",
571 other
572 )),
573 }
574}
575
576#[derive(Debug, Clone)]
577struct Matrix {
578 data: Vec<f64>,
579 rows: usize,
580 cols: usize,
581}
582
583impl Matrix {
584 fn from_tensor(name: &str, tensor: Tensor) -> Result<Self, String> {
585 if tensor.shape.len() > 2 {
586 return Err(format!("{name}: inputs must be 2-D matrices or vectors"));
587 }
588 Ok(Self {
589 rows: tensor.rows(),
590 cols: tensor.cols(),
591 data: tensor.data,
592 })
593 }
594
595 #[inline]
596 fn get(&self, row: usize, col: usize) -> f64 {
597 self.data[row + col * self.rows]
598 }
599
600 #[inline]
601 fn column(&self, col: usize) -> &[f64] {
602 let start = col * self.rows;
603 let end = start + self.rows;
604 &self.data[start..end]
605 }
606}
607
608fn combine_tensors(left: Tensor, right: Option<Tensor>) -> Result<Matrix, String> {
609 let mut matrix = Matrix::from_tensor("cov", left)?;
610 if let Some(second) = right {
611 let right_matrix = Matrix::from_tensor("cov", second)?;
612 if matrix.rows != right_matrix.rows {
613 return Err("cov: inputs must have the same number of rows".to_string());
614 }
615 matrix.cols += right_matrix.cols;
616 matrix
617 .data
618 .extend_from_slice(&right_matrix.data[..right_matrix.rows * right_matrix.cols]);
619 }
620 Ok(matrix)
621}
622
623fn covariance_dense(matrix: &Matrix, weight: &CovWeightSpec) -> Result<Tensor, String> {
624 let cols = matrix.cols;
625 let rows = matrix.rows;
626
627 if cols == 0 {
628 return Tensor::new(Vec::new(), vec![0, 0]).map_err(|e| format!("cov: {e}"));
629 }
630
631 let mut result = vec![f64::NAN; cols * cols];
632
633 match weight {
634 CovWeightSpec::Scalar(normalization) => {
635 let denom = match normalization {
636 CovNormalization::Unbiased => (rows as f64) - 1.0,
637 CovNormalization::Biased => rows as f64,
638 };
639 if denom <= 0.0 {
640 return Tensor::new(result, vec![cols, cols]).map_err(|e| format!("cov: {e}"));
641 }
642
643 let mut means = vec![0.0; cols];
644 for (col, mean_slot) in means.iter_mut().enumerate() {
645 let column = matrix.column(col);
646 let mut sum = 0.0;
647 let mut valid = true;
648 for &value in column {
649 if !value.is_finite() {
650 valid = false;
651 break;
652 }
653 sum += value;
654 }
655 *mean_slot = if valid { sum / (rows as f64) } else { f64::NAN };
656 }
657
658 for i in 0..cols {
659 for j in i..cols {
660 let value = covariance_unweighted_pair(matrix, i, j, &means, denom);
661 set_entry(&mut result, cols, i, j, sanitize_covariance(i == j, value));
662 }
663 }
664 }
665 CovWeightSpec::Vector(weights) => {
666 if weights.len() != rows {
667 return Err(format!("cov: weight vector must contain {} elements", rows));
668 }
669 let sum_w: f64 = weights.iter().sum();
670 if sum_w <= 0.0 {
671 return Tensor::new(result, vec![cols, cols]).map_err(|e| format!("cov: {e}"));
672 }
673 let denom = sum_w - 1.0;
674 if denom <= 0.0 {
675 return Tensor::new(result, vec![cols, cols]).map_err(|e| format!("cov: {e}"));
676 }
677
678 let mut means = vec![0.0; cols];
679 for (col, mean_slot) in means.iter_mut().enumerate() {
680 let column = matrix.column(col);
681 let mut weighted_sum = 0.0;
682 let mut valid = true;
683 for (row, &value) in column.iter().enumerate() {
684 if !value.is_finite() {
685 valid = false;
686 break;
687 }
688 weighted_sum += weights[row] * value;
689 }
690 *mean_slot = if valid {
691 weighted_sum / sum_w
692 } else {
693 f64::NAN
694 };
695 }
696
697 for i in 0..cols {
698 for j in i..cols {
699 let value = covariance_weighted_pair(matrix, i, j, weights, &means, denom);
700 set_entry(&mut result, cols, i, j, sanitize_covariance(i == j, value));
701 }
702 }
703 }
704 }
705
706 Tensor::new(result, vec![cols, cols]).map_err(|e| format!("cov: {e}"))
707}
708
709fn filter_complete_rows(matrix: &Matrix, weight: CovWeightSpec) -> (Matrix, CovWeightSpec) {
710 if matrix.rows == 0 {
711 return (
712 Matrix {
713 data: Vec::new(),
714 rows: 0,
715 cols: matrix.cols,
716 },
717 weight,
718 );
719 }
720
721 let mut valid_rows = Vec::new();
722 for row in 0..matrix.rows {
723 let mut is_valid = true;
724 for col in 0..matrix.cols {
725 if !matrix.get(row, col).is_finite() {
726 is_valid = false;
727 break;
728 }
729 }
730 if is_valid {
731 valid_rows.push(row);
732 }
733 }
734
735 if valid_rows.len() == matrix.rows {
736 return (matrix.clone(), weight);
738 }
739
740 let mut data = Vec::with_capacity(valid_rows.len() * matrix.cols);
741 for col in 0..matrix.cols {
742 for &row in &valid_rows {
743 data.push(matrix.get(row, col));
744 }
745 }
746
747 let filtered_matrix = Matrix {
748 data,
749 rows: valid_rows.len(),
750 cols: matrix.cols,
751 };
752
753 let filtered_weight = match weight {
754 CovWeightSpec::Scalar(norm) => CovWeightSpec::Scalar(norm),
755 CovWeightSpec::Vector(vec) => {
756 let mut filtered = Vec::with_capacity(valid_rows.len());
757 for &row in &valid_rows {
758 filtered.push(vec[row]);
759 }
760 CovWeightSpec::Vector(filtered)
761 }
762 };
763
764 (filtered_matrix, filtered_weight)
765}
766
767fn covariance_pairwise(matrix: &Matrix, weight: &CovWeightSpec) -> Result<Tensor, String> {
768 let cols = matrix.cols;
769 if cols == 0 {
770 return Tensor::new(Vec::new(), vec![0, 0]).map_err(|e| format!("cov: {e}"));
771 }
772 let mut result = vec![f64::NAN; cols * cols];
773 for i in 0..cols {
774 let variance = covariance_pair(matrix, i, i, weight);
775 set_entry(&mut result, cols, i, i, sanitize_covariance(true, variance));
776 for j in (i + 1)..cols {
777 let value = covariance_pair(matrix, i, j, weight);
778 set_entry(&mut result, cols, i, j, sanitize_covariance(false, value));
779 }
780 }
781 Tensor::new(result, vec![cols, cols]).map_err(|e| format!("cov: {e}"))
782}
783
784fn covariance_unweighted_pair(
785 matrix: &Matrix,
786 lhs: usize,
787 rhs: usize,
788 means: &[f64],
789 denom: f64,
790) -> f64 {
791 if !means[lhs].is_finite() || !means[rhs].is_finite() {
792 return f64::NAN;
793 }
794 let mut accumulator = 0.0;
795 for row in 0..matrix.rows {
796 let x = matrix.get(row, lhs);
797 let y = matrix.get(row, rhs);
798 if !x.is_finite() || !y.is_finite() {
799 return f64::NAN;
800 }
801 accumulator += (x - means[lhs]) * (y - means[rhs]);
802 }
803 accumulator / denom
804}
805
806fn covariance_weighted_pair(
807 matrix: &Matrix,
808 lhs: usize,
809 rhs: usize,
810 weights: &[f64],
811 means: &[f64],
812 denom: f64,
813) -> f64 {
814 if !means[lhs].is_finite() || !means[rhs].is_finite() {
815 return f64::NAN;
816 }
817 let mut accumulator = 0.0;
818 for (row, &weight) in weights.iter().enumerate().take(matrix.rows) {
819 if weight == 0.0 {
820 continue;
821 }
822 let x = matrix.get(row, lhs);
823 let y = matrix.get(row, rhs);
824 if !x.is_finite() || !y.is_finite() {
825 return f64::NAN;
826 }
827 accumulator += weight * (x - means[lhs]) * (y - means[rhs]);
828 }
829 accumulator / denom
830}
831
832fn covariance_pair(matrix: &Matrix, lhs: usize, rhs: usize, weight: &CovWeightSpec) -> f64 {
833 match weight {
834 CovWeightSpec::Scalar(normalization) => {
835 let mut xs = Vec::new();
836 let mut ys = Vec::new();
837 for row in 0..matrix.rows {
838 let x = matrix.get(row, lhs);
839 let y = matrix.get(row, rhs);
840 if x.is_finite() && y.is_finite() {
841 xs.push(x);
842 ys.push(y);
843 }
844 }
845 covariance_unweighted_slice(&xs, &ys, *normalization)
846 }
847 CovWeightSpec::Vector(weights) => {
848 let mut xs = Vec::new();
849 let mut ys = Vec::new();
850 let mut ws = Vec::new();
851 for (row, &weight) in weights.iter().enumerate().take(matrix.rows) {
852 let x = matrix.get(row, lhs);
853 let y = matrix.get(row, rhs);
854 if x.is_finite() && y.is_finite() {
855 xs.push(x);
856 ys.push(y);
857 ws.push(weight);
858 }
859 }
860 covariance_weighted_slice(&xs, &ys, &ws)
861 }
862 }
863}
864
865fn covariance_unweighted_slice(xs: &[f64], ys: &[f64], normalization: CovNormalization) -> f64 {
866 if xs.is_empty() || ys.is_empty() {
867 return f64::NAN;
868 }
869 let n = xs.len().min(ys.len());
870 if n == 0 {
871 return f64::NAN;
872 }
873 let denom = match normalization {
874 CovNormalization::Unbiased => (n as f64) - 1.0,
875 CovNormalization::Biased => n as f64,
876 };
877 if denom <= 0.0 {
878 return f64::NAN;
879 }
880 let sum_x: f64 = xs.iter().take(n).sum();
881 let sum_y: f64 = ys.iter().take(n).sum();
882 let mean_x = sum_x / (n as f64);
883 let mean_y = sum_y / (n as f64);
884 let mut accumulator = 0.0;
885 for idx in 0..n {
886 accumulator += (xs[idx] - mean_x) * (ys[idx] - mean_y);
887 }
888 accumulator / denom
889}
890
891fn covariance_weighted_slice(xs: &[f64], ys: &[f64], weights: &[f64]) -> f64 {
892 if xs.is_empty() || ys.is_empty() || weights.is_empty() {
893 return f64::NAN;
894 }
895 let n = xs.len().min(ys.len()).min(weights.len());
896 if n == 0 {
897 return f64::NAN;
898 }
899 let sum_w: f64 = weights.iter().take(n).sum();
900 if sum_w <= 0.0 {
901 return f64::NAN;
902 }
903 let denom = sum_w - 1.0;
904 if denom <= 0.0 {
905 return f64::NAN;
906 }
907 let mut mean_x = 0.0;
908 let mut mean_y = 0.0;
909 for idx in 0..n {
910 mean_x += weights[idx] * xs[idx];
911 mean_y += weights[idx] * ys[idx];
912 }
913 mean_x /= sum_w;
914 mean_y /= sum_w;
915 let mut accumulator = 0.0;
916 for idx in 0..n {
917 accumulator += weights[idx] * (xs[idx] - mean_x) * (ys[idx] - mean_y);
918 }
919 accumulator / denom
920}
921
922fn sanitize_covariance(is_diag: bool, value: f64) -> f64 {
923 if !value.is_finite() {
924 return value;
925 }
926 if is_diag && value < 0.0 && value > -1.0e-12 {
927 0.0
928 } else {
929 value
930 }
931}
932
933fn set_entry(buffer: &mut [f64], dim: usize, row: usize, col: usize, value: f64) {
934 let idx = row + col * dim;
935 buffer[idx] = value;
936 if row != col {
937 let symmetrical = col + row * dim;
938 buffer[symmetrical] = value;
939 }
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945 use crate::builtins::common::test_support;
946 use runmat_builtins::Tensor;
947
948 fn assert_tensor_close(actual: &Tensor, expected: &[f64], tol: f64) {
949 let dim = (expected.len() as f64).sqrt() as usize;
950 assert_eq!(actual.shape, vec![dim, dim], "unexpected tensor shape");
951 for (idx, (&got, &want)) in actual.data.iter().zip(expected.iter()).enumerate() {
952 if want.is_nan() {
953 assert!(
954 got.is_nan(),
955 "expected NaN at linear index {idx}, found {got}"
956 );
957 } else {
958 assert!(
959 (got - want).abs() <= tol,
960 "mismatch at linear index {idx}: got {got}, expected {want}"
961 );
962 }
963 }
964 }
965
966 #[test]
967 fn cov_matrix_basic() {
968 let tensor = Tensor::new(
969 vec![
970 4.0, 4.2, 3.9, 4.3, 4.1, 2.0, 2.1, 2.0, 2.1, 2.2, 0.60, 0.59, 0.58, 0.62, 0.63,
973 ],
974 vec![5, 3],
975 )
976 .unwrap();
977 let result = cov_builtin(Value::Tensor(tensor), Vec::new()).expect("cov");
978 let tensor = match result {
979 Value::Tensor(t) => t,
980 other => panic!("expected tensor result, got {other:?}"),
981 };
982 let expected = [
983 0.0250, 0.0075, 0.00175, 0.0075, 0.0070, 0.00135, 0.00175, 0.00135, 0.00043,
986 ];
987 assert_tensor_close(&tensor, &expected, 1.0e-6);
988 }
989
990 #[test]
991 fn cov_two_vectors() {
992 let x = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![4, 1]).unwrap();
993 let y = Tensor::new(vec![10.0, 11.0, 9.0, 12.0], vec![4, 1]).unwrap();
994 let result = cov_builtin(Value::Tensor(x), vec![Value::Tensor(y)]).expect("cov");
995 let tensor = match result {
996 Value::Tensor(t) => t,
997 other => panic!("expected tensor result, got {other:?}"),
998 };
999 let expected = [
1000 1.6666666666666667,
1001 0.6666666666666666, 0.6666666666666666,
1003 1.6666666666666667,
1004 ];
1005 assert_tensor_close(&tensor, &expected, 1.0e-6);
1006 }
1007
1008 #[test]
1009 fn cov_weighted_vector() {
1010 let tensor = Tensor::new(
1011 vec![
1012 4.0, 4.2, 3.9, 4.3, 4.1, 2.0, 2.1, 2.0, 2.1, 2.2,
1014 ],
1015 vec![5, 2],
1016 )
1017 .unwrap();
1018 let weights = Tensor::new(vec![1.0, 1.0, 1.0, 2.0, 2.0], vec![5, 1]).unwrap();
1019 let result = cov_builtin(Value::Tensor(tensor), vec![Value::Tensor(weights)]).expect("cov");
1020 let tensor = match result {
1021 Value::Tensor(t) => t,
1022 other => panic!("expected tensor result, got {other:?}"),
1023 };
1024 let expected = [
1025 0.022380952380952376,
1026 0.004999999999999994, 0.004999999999999994,
1028 0.006666666666666678,
1029 ];
1030 assert_tensor_close(&tensor, &expected, 1.0e-6);
1031 }
1032
1033 #[test]
1034 fn cov_omitrows() {
1035 let tensor = Tensor::new(
1036 vec![
1037 1.0,
1038 3.0,
1039 f64::NAN,
1040 8.0, f64::NAN,
1042 4.0,
1043 6.0,
1044 9.0, 2.0,
1046 5.0,
1047 7.0,
1048 10.0,
1049 ],
1050 vec![4, 3],
1051 )
1052 .unwrap();
1053 let result =
1054 cov_builtin(Value::Tensor(tensor), vec![Value::from("omitrows")]).expect("cov");
1055 let tensor = match result {
1056 Value::Tensor(t) => t,
1057 other => panic!("expected tensor result, got {other:?}"),
1058 };
1059 let expected = [
1060 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
1063 ];
1064 assert_tensor_close(&tensor, &expected, 1.0e-6);
1065 }
1066
1067 #[test]
1068 fn cov_partialrows() {
1069 let tensor = Tensor::new(
1070 vec![
1071 1.0,
1072 4.0,
1073 7.0, 2.0,
1075 f64::NAN,
1076 8.0, f64::NAN,
1078 6.0,
1079 9.0,
1080 ],
1081 vec![3, 3],
1082 )
1083 .unwrap();
1084 let result =
1085 cov_builtin(Value::Tensor(tensor), vec![Value::from("partialrows")]).expect("cov");
1086 let tensor = match result {
1087 Value::Tensor(t) => t,
1088 other => panic!("expected tensor result, got {other:?}"),
1089 };
1090 let expected = [
1091 9.0,
1092 18.0,
1093 4.5, 18.0,
1095 18.0,
1096 f64::NAN, 4.5,
1098 f64::NAN,
1099 4.5,
1100 ];
1101 assert_tensor_close(&tensor, &expected, 1.0e-6);
1102 }
1103
1104 #[test]
1105 fn cov_gpu_roundtrip() {
1106 test_support::with_test_provider(|provider| {
1107 let tensor = Tensor::new(
1108 vec![
1109 4.0, 4.2, 3.9, 4.3, 4.1, 2.0, 2.1, 2.0, 2.1, 2.2,
1111 ],
1112 vec![5, 2],
1113 )
1114 .unwrap();
1115 let view = runmat_accelerate_api::HostTensorView {
1116 data: &tensor.data,
1117 shape: &tensor.shape,
1118 };
1119 let handle = provider.upload(&view).expect("upload");
1120 let result = cov_builtin(Value::GpuTensor(handle), Vec::new()).expect("cov");
1121 let gathered = test_support::gather(result).expect("gather");
1122 let expected = [
1123 0.0250, 0.0075, 0.0075, 0.0070,
1125 ];
1126 assert_tensor_close(&gathered, &expected, 1.0e-6);
1127 });
1128 }
1129
1130 #[test]
1131 #[cfg(feature = "doc_export")]
1132 fn doc_examples_present() {
1133 let blocks = test_support::doc_examples(DOC_MD);
1134 assert!(!blocks.is_empty());
1135 }
1136
1137 #[test]
1138 #[cfg(feature = "wgpu")]
1139 fn cov_wgpu_matches_cpu() {
1140 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1141 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1142 );
1143
1144 let tensor = Tensor::new(
1145 vec![
1146 4.0, 4.2, 3.9, 4.3, 4.1, 2.0, 2.1, 2.0, 2.1, 2.2,
1148 ],
1149 vec![5, 2],
1150 )
1151 .unwrap();
1152
1153 let cpu_result = cov_builtin(Value::Tensor(tensor.clone()), Vec::new()).expect("cov");
1154 let cpu_tensor = match cpu_result {
1155 Value::Tensor(t) => t,
1156 other => panic!("expected tensor result, got {other:?}"),
1157 };
1158
1159 let provider = runmat_accelerate_api::provider().expect("wgpu provider");
1160 let view = runmat_accelerate_api::HostTensorView {
1161 data: &tensor.data,
1162 shape: &tensor.shape,
1163 };
1164 let handle = provider.upload(&view).expect("upload");
1165
1166 let gpu_value = cov_builtin(Value::GpuTensor(handle), Vec::new()).expect("cov");
1167 let gathered = test_support::gather(gpu_value).expect("gather");
1168
1169 assert_tensor_close(&gathered, &cpu_tensor.data, 1.0e-6);
1170 }
1171}