floe_core/run/
normalize.rs1use std::collections::HashMap;
2
3use polars::prelude::DataFrame;
4
5use crate::{config, ConfigError, FloeResult};
6
7pub fn resolve_normalize_strategy(entity: &config::EntityConfig) -> FloeResult<Option<String>> {
8 let normalize = match &entity.schema.normalize_columns {
9 Some(config) => config.enabled.unwrap_or(false),
10 None => false,
11 };
12 if !normalize {
13 return Ok(None);
14 }
15 let raw = entity
16 .schema
17 .normalize_columns
18 .as_ref()
19 .and_then(|config| config.strategy.as_deref())
20 .unwrap_or("snake_case");
21 let normalized = normalize_strategy_name(raw);
22 match normalized.as_str() {
23 "snakecase" | "lower" | "camelcase" | "none" => Ok(Some(normalized)),
24 _ => Err(Box::new(ConfigError(format!(
25 "unsupported normalize_columns.strategy: {raw}"
26 )))),
27 }
28}
29
30pub fn normalize_schema_columns(
31 columns: &[config::ColumnConfig],
32 strategy: &str,
33) -> FloeResult<Vec<config::ColumnConfig>> {
34 let mut normalized = Vec::with_capacity(columns.len());
35 let mut seen = HashMap::new();
36 for column in columns {
37 let normalized_name = normalize_name(&column.name, strategy);
38 if let Some(existing) = seen.insert(normalized_name.clone(), column.name.clone()) {
39 return Err(Box::new(ConfigError(format!(
40 "normalized column name collision: {} and {} -> {}",
41 existing, column.name, normalized_name
42 ))));
43 }
44 normalized.push(config::ColumnConfig {
45 name: normalized_name,
46 column_type: column.column_type.clone(),
47 nullable: column.nullable,
48 unique: column.unique,
49 });
50 }
51 Ok(normalized)
52}
53
54pub fn normalize_dataframe_columns(df: &mut DataFrame, strategy: &str) -> FloeResult<()> {
55 let names = df.get_column_names();
56 let mut normalized_names = Vec::with_capacity(names.len());
57 let mut seen = HashMap::new();
58 for name in names {
59 let normalized = normalize_name(name, strategy);
60 if let Some(existing) = seen.insert(normalized.clone(), name.to_string()) {
61 return Err(Box::new(ConfigError(format!(
62 "normalized input column collision: {} and {} -> {}",
63 existing, name, normalized
64 ))));
65 }
66 normalized_names.push(normalized);
67 }
68 df.set_column_names(normalized_names.iter())
69 .map_err(|err| {
70 Box::new(ConfigError(format!(
71 "failed to normalize column names: {err}"
72 )))
73 })?;
74 Ok(())
75}
76
77fn normalize_strategy_name(value: &str) -> String {
78 value.to_ascii_lowercase().replace(['-', '_'], "")
79}
80
81pub fn normalize_name(value: &str, strategy: &str) -> String {
82 match normalize_strategy_name(strategy).as_str() {
83 "snakecase" => to_snake_case(value),
84 "lower" => value.to_ascii_lowercase(),
85 "camelcase" => to_camel_case(value),
86 "none" => value.to_string(),
87 _ => value.to_string(),
88 }
89}
90
91fn to_snake_case(value: &str) -> String {
92 split_words(value).join("_")
93}
94
95fn to_camel_case(value: &str) -> String {
96 let words = split_words(value);
97 if words.is_empty() {
98 return String::new();
99 }
100 let mut out = String::new();
101 out.push_str(&words[0]);
102 for word in words.iter().skip(1) {
103 out.push_str(&capitalize(word));
104 }
105 out
106}
107
108fn split_words(value: &str) -> Vec<String> {
109 let chars: Vec<char> = value.chars().collect();
110 let mut words = Vec::new();
111 let mut current = String::new();
112 for (idx, ch) in chars.iter().copied().enumerate() {
113 if !ch.is_ascii_alphanumeric() {
114 if !current.is_empty() {
115 words.push(current);
116 current = String::new();
117 }
118 continue;
119 }
120
121 let is_upper = ch.is_ascii_uppercase();
122 let prev = if idx > 0 { Some(chars[idx - 1]) } else { None };
123 let next = chars.get(idx + 1).copied();
124 let prev_is_lower = prev.map(|c| c.is_ascii_lowercase()).unwrap_or(false);
125 let prev_is_digit = prev.map(|c| c.is_ascii_digit()).unwrap_or(false);
126 let prev_is_upper = prev.map(|c| c.is_ascii_uppercase()).unwrap_or(false);
127 let next_is_lower = next.map(|c| c.is_ascii_lowercase()).unwrap_or(false);
128
129 if !current.is_empty()
130 && is_upper
131 && ((prev_is_lower || prev_is_digit) || (prev_is_upper && next_is_lower))
132 {
133 words.push(current);
134 current = String::new();
135 }
136
137 current.push(ch.to_ascii_lowercase());
138 }
139
140 if !current.is_empty() {
141 words.push(current);
142 }
143
144 words
145}
146
147fn capitalize(value: &str) -> String {
148 let mut chars = value.chars();
149 match chars.next() {
150 Some(first) => first.to_ascii_uppercase().to_string() + chars.as_str(),
151 None => String::new(),
152 }
153}