Skip to main content

floe_core/checks/
unique.rs

1use std::collections::HashSet;
2
3use polars::prelude::{AnyValue, DataFrame};
4
5use super::RowError;
6use crate::{config, ConfigError, FloeResult};
7
8pub fn unique_errors(
9    df: &DataFrame,
10    columns: &[config::ColumnConfig],
11) -> FloeResult<Vec<Vec<RowError>>> {
12    let mut errors_per_row = vec![Vec::new(); df.height()];
13    let unique_columns: Vec<&config::ColumnConfig> = columns
14        .iter()
15        .filter(|col| col.unique == Some(true))
16        .collect();
17    if unique_columns.is_empty() {
18        return Ok(errors_per_row);
19    }
20
21    for column in unique_columns {
22        let series = df.column(&column.name).map_err(|err| {
23            Box::new(ConfigError(format!(
24                "unique column {} not found: {err}",
25                column.name
26            )))
27        })?;
28        let mut seen = HashSet::new();
29        for (row_idx, errors) in errors_per_row.iter_mut().enumerate() {
30            let value = series.get(row_idx).map_err(|err| {
31                Box::new(ConfigError(format!(
32                    "unique column {} read failed: {err}",
33                    column.name
34                )))
35            })?;
36            if matches!(value, AnyValue::Null) {
37                continue;
38            }
39            let key = value.to_string();
40            if !seen.insert(key) {
41                errors.push(RowError::new("unique", &column.name, "duplicate value"));
42            }
43        }
44    }
45
46    Ok(errors_per_row)
47}