floe_core/checks/
unique.rs1use std::collections::HashSet;
2
3use polars::prelude::{AnyValue, DataFrame};
4
5use crate::{config, ConfigError, FloeResult};
6use super::RowError;
7
8pub fn unique_errors(
9 df: &DataFrame,
10 columns: &[config::ColumnConfig],
11) -> FloeResult<Vec<Vec<RowError>>> {
12 let mut errors_per_row = vec![Vec::new(); df.height()];
13 let unique_columns: Vec<&config::ColumnConfig> = columns
14 .iter()
15 .filter(|col| col.unique == Some(true))
16 .collect();
17 if unique_columns.is_empty() {
18 return Ok(errors_per_row);
19 }
20
21 for column in unique_columns {
22 let series = df
23 .column(&column.name)
24 .map_err(|err| {
25 Box::new(ConfigError(format!(
26 "unique column {} not found: {err}",
27 column.name
28 )))
29 })?;
30 let mut seen = HashSet::new();
31 for row_idx in 0..df.height() {
32 let value = series.get(row_idx).map_err(|err| {
33 Box::new(ConfigError(format!(
34 "unique column {} read failed: {err}",
35 column.name
36 )))
37 })?;
38 if matches!(value, AnyValue::Null) {
39 continue;
40 }
41 let key = value.to_string();
42 if !seen.insert(key) {
43 errors_per_row[row_idx].push(RowError::new(
44 "unique",
45 &column.name,
46 "duplicate value",
47 ));
48 }
49 }
50 }
51
52 Ok(errors_per_row)
53}