sql_cli/refactoring/
banding.rs

1// Banding and bucketing refactoring tools
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5
6pub use super::{Band, BandingConfig};
7
8#[derive(Debug, Serialize, Deserialize)]
9pub enum CaseStyle {
10    Range,  // Numeric ranges like 0-10, 11-20
11    Values, // Specific values like 'A', 'B', 'C'
12    Custom, // Custom conditions
13}
14
15#[derive(Debug, Serialize, Deserialize)]
16pub struct CaseGenerator {
17    pub column: String,
18    pub style: CaseStyle,
19    pub conditions: Vec<CaseCondition>,
20    pub else_clause: Option<String>,
21    pub alias: Option<String>,
22}
23
24#[derive(Debug, Serialize, Deserialize)]
25pub struct CaseCondition {
26    pub condition: String,
27    pub label: String,
28}
29
30impl CaseGenerator {
31    /// Create a generator from range specifications
32    pub fn from_ranges(column: &str, ranges: &str, labels: Option<Vec<String>>) -> Result<Self> {
33        let config = BandingConfig::from_string(column, ranges)?;
34        let mut conditions = Vec::new();
35
36        for (i, band) in config.bands.iter().enumerate() {
37            let label = if let Some(ref labels) = labels {
38                labels.get(i).unwrap_or(&band.label).clone()
39            } else {
40                band.label.clone()
41            };
42
43            let condition = if let Some(min) = band.min {
44                if let Some(max) = band.max {
45                    if i == 0 {
46                        format!("{} <= {}", column, max)
47                    } else {
48                        format!("{} > {} AND {} <= {}", column, min, column, max)
49                    }
50                } else {
51                    format!("{} > {}", column, min)
52                }
53            } else if let Some(max) = band.max {
54                format!("{} <= {}", column, max)
55            } else {
56                continue;
57            };
58
59            conditions.push(CaseCondition { condition, label });
60        }
61
62        Ok(CaseGenerator {
63            column: column.to_string(),
64            style: CaseStyle::Range,
65            conditions,
66            else_clause: config.else_label,
67            alias: config.alias,
68        })
69    }
70
71    /// Create a generator from specific values
72    pub fn from_values(column: &str, value_mappings: Vec<(String, String)>) -> Self {
73        let conditions = value_mappings
74            .into_iter()
75            .map(|(value, label)| CaseCondition {
76                condition: format!("{} = '{}'", column, value),
77                label,
78            })
79            .collect();
80
81        CaseGenerator {
82            column: column.to_string(),
83            style: CaseStyle::Values,
84            conditions,
85            else_clause: Some("Other".to_string()),
86            alias: Some(format!("{}_category", column)),
87        }
88    }
89
90    /// Generate the SQL CASE statement
91    pub fn to_sql(&self) -> String {
92        let mut sql = String::from("CASE");
93
94        for condition in &self.conditions {
95            sql.push_str(&format!(
96                "\n    WHEN {} THEN '{}'",
97                condition.condition, condition.label
98            ));
99        }
100
101        if let Some(ref else_clause) = self.else_clause {
102            sql.push_str(&format!("\n    ELSE '{}'", else_clause));
103        }
104
105        sql.push_str("\nEND");
106
107        if let Some(ref alias) = self.alias {
108            sql.push_str(&format!(" AS {}", alias));
109        }
110
111        sql
112    }
113
114    /// Generate a pretty-printed version with indentation
115    pub fn to_sql_formatted(&self, indent: usize) -> String {
116        let indent_str = " ".repeat(indent);
117        let mut sql = format!("{}CASE", indent_str);
118
119        for condition in &self.conditions {
120            sql.push_str(&format!(
121                "\n{}    WHEN {} THEN '{}'",
122                indent_str, condition.condition, condition.label
123            ));
124        }
125
126        if let Some(ref else_clause) = self.else_clause {
127            sql.push_str(&format!("\n{}    ELSE '{}'", indent_str, else_clause));
128        }
129
130        sql.push_str(&format!("\n{}END", indent_str));
131
132        if let Some(ref alias) = self.alias {
133            sql.push_str(&format!(" AS {}", alias));
134        }
135
136        sql
137    }
138}
139
140impl BandingConfig {
141    /// Generate percentile-based bands
142    pub fn from_percentiles(column: &str, percentiles: Vec<f64>) -> Self {
143        let mut bands = Vec::new();
144
145        for i in 0..percentiles.len() {
146            let label = if i == 0 {
147                format!("P0-P{}", (percentiles[i] * 100.0) as i32)
148            } else if i == percentiles.len() - 1 {
149                format!("P{}+", (percentiles[i - 1] * 100.0) as i32)
150            } else {
151                format!(
152                    "P{}-P{}",
153                    (percentiles[i - 1] * 100.0) as i32,
154                    (percentiles[i] * 100.0) as i32
155                )
156            };
157
158            bands.push(Band {
159                min: if i == 0 {
160                    None
161                } else {
162                    Some(percentiles[i - 1])
163                },
164                max: if i == percentiles.len() - 1 {
165                    None
166                } else {
167                    Some(percentiles[i])
168                },
169                label,
170            });
171        }
172
173        BandingConfig {
174            column: column.to_string(),
175            bands,
176            else_label: None,
177            alias: Some(format!("{}_percentile", column)),
178        }
179    }
180
181    /// Generate equal-width bands
182    pub fn from_equal_width(
183        column: &str,
184        num_bands: usize,
185        min_val: f64,
186        max_val: f64,
187        labels: Option<Vec<String>>,
188    ) -> Self {
189        let width = (max_val - min_val) / num_bands as f64;
190        let mut bands = Vec::new();
191
192        for i in 0..num_bands {
193            let band_min = min_val + (i as f64 * width);
194            let band_max = if i == num_bands - 1 {
195                max_val
196            } else {
197                min_val + ((i + 1) as f64 * width)
198            };
199
200            let label = if let Some(ref labels) = labels {
201                labels.get(i).map(|s| s.clone()).unwrap_or_else(|| {
202                    if i == num_bands - 1 {
203                        format!("{:.0}+", band_min)
204                    } else {
205                        format!("{:.0}-{:.0}", band_min, band_max)
206                    }
207                })
208            } else {
209                if i == num_bands - 1 {
210                    format!("{:.0}+", band_min)
211                } else {
212                    format!("{:.0}-{:.0}", band_min, band_max)
213                }
214            };
215
216            bands.push(Band {
217                min: if i == 0 { None } else { Some(band_min) },
218                max: if i == num_bands - 1 {
219                    None
220                } else {
221                    Some(band_max)
222                },
223                label,
224            });
225        }
226
227        BandingConfig {
228            column: column.to_string(),
229            bands,
230            else_label: None,
231            alias: Some(format!("{}_band", column)),
232        }
233    }
234}