floe_core/checks/
unique.rs1use std::collections::HashSet;
2
3use polars::prelude::{AnyValue, DataFrame};
4
5use super::RowError;
6use crate::{config, ConfigError, FloeResult};
7
8pub fn unique_errors(
9 df: &DataFrame,
10 columns: &[config::ColumnConfig],
11) -> FloeResult<Vec<Vec<RowError>>> {
12 let mut errors_per_row = vec![Vec::new(); df.height()];
13 let unique_columns: Vec<&config::ColumnConfig> = columns
14 .iter()
15 .filter(|col| col.unique == Some(true))
16 .collect();
17 if unique_columns.is_empty() {
18 return Ok(errors_per_row);
19 }
20
21 for column in unique_columns {
22 let series = df.column(&column.name).map_err(|err| {
23 Box::new(ConfigError(format!(
24 "unique column {} not found: {err}",
25 column.name
26 )))
27 })?;
28 let mut seen = HashSet::new();
29 for (row_idx, errors) in errors_per_row.iter_mut().enumerate() {
30 let value = series.get(row_idx).map_err(|err| {
31 Box::new(ConfigError(format!(
32 "unique column {} read failed: {err}",
33 column.name
34 )))
35 })?;
36 if matches!(value, AnyValue::Null) {
37 continue;
38 }
39 let key = value.to_string();
40 if !seen.insert(key) {
41 errors.push(RowError::new("unique", &column.name, "duplicate value"));
42 }
43 }
44 }
45
46 Ok(errors_per_row)
47}