Skip to main content

floe_core/run/
normalize.rs

1use std::collections::HashMap;
2
3use polars::prelude::DataFrame;
4
5use crate::{config, ConfigError, FloeResult};
6
7pub fn resolve_normalize_strategy(entity: &config::EntityConfig) -> FloeResult<Option<String>> {
8    let normalize = match &entity.schema.normalize_columns {
9        Some(config) => config.enabled.unwrap_or(false),
10        None => false,
11    };
12    if !normalize {
13        return Ok(None);
14    }
15    let raw = entity
16        .schema
17        .normalize_columns
18        .as_ref()
19        .and_then(|config| config.strategy.as_deref())
20        .unwrap_or("snake_case");
21    let normalized = normalize_strategy_name(raw);
22    match normalized.as_str() {
23        "snakecase" | "lower" | "camelcase" | "none" => Ok(Some(normalized)),
24        _ => Err(Box::new(ConfigError(format!(
25            "unsupported normalize_columns.strategy: {raw}"
26        )))),
27    }
28}
29
30pub fn normalize_schema_columns(
31    columns: &[config::ColumnConfig],
32    strategy: &str,
33) -> FloeResult<Vec<config::ColumnConfig>> {
34    let mut normalized = Vec::with_capacity(columns.len());
35    let mut seen = HashMap::new();
36    for column in columns {
37        let normalized_name = normalize_name(&column.name, strategy);
38        if let Some(existing) = seen.insert(normalized_name.clone(), column.name.clone()) {
39            return Err(Box::new(ConfigError(format!(
40                "normalized column name collision: {} and {} -> {}",
41                existing, column.name, normalized_name
42            ))));
43        }
44        normalized.push(config::ColumnConfig {
45            name: normalized_name,
46            column_type: column.column_type.clone(),
47            nullable: column.nullable,
48            unique: column.unique,
49        });
50    }
51    Ok(normalized)
52}
53
54pub fn normalize_dataframe_columns(df: &mut DataFrame, strategy: &str) -> FloeResult<()> {
55    let names = df.get_column_names();
56    let mut normalized_names = Vec::with_capacity(names.len());
57    let mut seen = HashMap::new();
58    for name in names {
59        let normalized = normalize_name(name, strategy);
60        if let Some(existing) = seen.insert(normalized.clone(), name.to_string()) {
61            return Err(Box::new(ConfigError(format!(
62                "normalized input column collision: {} and {} -> {}",
63                existing, name, normalized
64            ))));
65        }
66        normalized_names.push(normalized);
67    }
68    df.set_column_names(normalized_names.iter())
69        .map_err(|err| {
70            Box::new(ConfigError(format!(
71                "failed to normalize column names: {err}"
72            )))
73        })?;
74    Ok(())
75}
76
77fn normalize_strategy_name(value: &str) -> String {
78    value.to_ascii_lowercase().replace(['-', '_'], "")
79}
80
81pub fn normalize_name(value: &str, strategy: &str) -> String {
82    match normalize_strategy_name(strategy).as_str() {
83        "snakecase" => to_snake_case(value),
84        "lower" => value.to_ascii_lowercase(),
85        "camelcase" => to_camel_case(value),
86        "none" => value.to_string(),
87        _ => value.to_string(),
88    }
89}
90
91fn to_snake_case(value: &str) -> String {
92    split_words(value).join("_")
93}
94
95fn to_camel_case(value: &str) -> String {
96    let words = split_words(value);
97    if words.is_empty() {
98        return String::new();
99    }
100    let mut out = String::new();
101    out.push_str(&words[0]);
102    for word in words.iter().skip(1) {
103        out.push_str(&capitalize(word));
104    }
105    out
106}
107
108fn split_words(value: &str) -> Vec<String> {
109    let chars: Vec<char> = value.chars().collect();
110    let mut words = Vec::new();
111    let mut current = String::new();
112    for (idx, ch) in chars.iter().copied().enumerate() {
113        if !ch.is_ascii_alphanumeric() {
114            if !current.is_empty() {
115                words.push(current);
116                current = String::new();
117            }
118            continue;
119        }
120
121        let is_upper = ch.is_ascii_uppercase();
122        let prev = if idx > 0 { Some(chars[idx - 1]) } else { None };
123        let next = chars.get(idx + 1).copied();
124        let prev_is_lower = prev.map(|c| c.is_ascii_lowercase()).unwrap_or(false);
125        let prev_is_digit = prev.map(|c| c.is_ascii_digit()).unwrap_or(false);
126        let prev_is_upper = prev.map(|c| c.is_ascii_uppercase()).unwrap_or(false);
127        let next_is_lower = next.map(|c| c.is_ascii_lowercase()).unwrap_or(false);
128
129        if !current.is_empty()
130            && is_upper
131            && ((prev_is_lower || prev_is_digit) || (prev_is_upper && next_is_lower))
132        {
133            words.push(current);
134            current = String::new();
135        }
136
137        current.push(ch.to_ascii_lowercase());
138    }
139
140    if !current.is_empty() {
141        words.push(current);
142    }
143
144    words
145}
146
147fn capitalize(value: &str) -> String {
148    let mut chars = value.chars();
149    match chars.next() {
150        Some(first) => first.to_ascii_uppercase().to_string() + chars.as_str(),
151        None => String::new(),
152    }
153}