sql_cli/ui/utils/
column_utils.rs1use crate::buffer::ColumnStatistics;
4use crate::data::data_provider::DataProvider;
5use crate::data_analyzer::{self, DataAnalyzer};
6use std::collections::HashMap;
7
8pub fn calculate_column_statistics(
10 provider: &dyn DataProvider,
11 analyzer: &mut DataAnalyzer,
12 column_index: usize,
13) -> Option<ColumnStatistics> {
14 let headers = provider.get_column_names();
15 if headers.is_empty() || column_index >= headers.len() {
16 return None;
17 }
18
19 let column_name = headers[column_index].clone();
20 let row_count = provider.get_row_count();
21
22 let mut column_data = Vec::with_capacity(row_count);
24 for row_idx in 0..row_count {
25 if let Some(row) = provider.get_row(row_idx) {
26 if column_index < row.len() {
27 column_data.push(row[column_index].clone());
28 } else {
29 column_data.push(String::new());
30 }
31 }
32 }
33
34 let data_refs: Vec<&str> = column_data
36 .iter()
37 .map(std::string::String::as_str)
38 .collect();
39
40 let analyzer_stats = analyzer.calculate_column_statistics(&column_name, &data_refs);
42
43 Some(ColumnStatistics {
45 column_name: analyzer_stats.column_name,
46 column_type: match analyzer_stats.data_type {
47 data_analyzer::ColumnType::Integer | data_analyzer::ColumnType::Float => {
48 crate::buffer::ColumnType::Numeric
49 }
50 data_analyzer::ColumnType::String
51 | data_analyzer::ColumnType::Boolean
52 | data_analyzer::ColumnType::Date
53 | data_analyzer::ColumnType::Unknown => crate::buffer::ColumnType::String,
54 data_analyzer::ColumnType::Mixed => crate::buffer::ColumnType::Mixed,
55 },
56 total_count: analyzer_stats.total_values,
57 null_count: analyzer_stats.null_values,
58 unique_count: analyzer_stats.unique_values,
59 frequency_map: analyzer_stats.frequency_map,
60 min: analyzer_stats.min_value.and_then(|s| s.parse::<f64>().ok()),
61 max: analyzer_stats.max_value.and_then(|s| s.parse::<f64>().ok()),
62 sum: analyzer_stats.sum_value,
63 mean: analyzer_stats.avg_value,
64 median: analyzer_stats.median_value,
65 })
66}
67
68pub fn calculate_optimal_column_widths(
70 provider: &dyn DataProvider,
71 max_sample_rows: usize,
72) -> Vec<u16> {
73 let column_count = provider.get_column_count();
74 let row_count = provider.get_row_count();
75 let sample_size = row_count.min(max_sample_rows);
76
77 let mut column_widths = vec![0u16; column_count];
78
79 let headers = provider.get_column_names();
81 for (i, header) in headers.iter().enumerate() {
82 if i < column_widths.len() {
83 column_widths[i] = header.len() as u16;
84 }
85 }
86
87 for row_idx in 0..sample_size {
89 if let Some(row) = provider.get_row(row_idx) {
90 for (col_idx, cell) in row.iter().enumerate() {
91 if col_idx < column_widths.len() {
92 column_widths[col_idx] = column_widths[col_idx].max(cell.len() as u16);
93 }
94 }
95 }
96 }
97
98 const MIN_WIDTH: u16 = 5;
100 const MAX_WIDTH: u16 = 50;
101
102 for width in &mut column_widths {
103 *width = (*width).clamp(MIN_WIDTH, MAX_WIDTH);
104 }
105
106 column_widths
107}
108
109pub fn calculate_viewport_column_widths(
111 provider: &dyn DataProvider,
112 viewport_start: usize,
113 viewport_end: usize,
114 max_sample_rows: usize,
115) -> HashMap<usize, u16> {
116 let mut widths = HashMap::new();
117 let row_count = provider.get_row_count();
118 let sample_size = row_count.min(max_sample_rows);
119
120 let headers = provider.get_column_names();
122
123 for col_idx in viewport_start..viewport_end.min(headers.len()) {
125 widths.insert(col_idx, headers[col_idx].len() as u16);
126 }
127
128 for row_idx in 0..sample_size {
130 if let Some(row) = provider.get_row(row_idx) {
131 for col_idx in viewport_start..viewport_end.min(row.len()) {
132 let current_width = widths.get(&col_idx).copied().unwrap_or(0);
133 let cell_width = row[col_idx].len() as u16;
134 widths.insert(col_idx, current_width.max(cell_width));
135 }
136 }
137 }
138
139 const MIN_WIDTH: u16 = 5;
141 const MAX_WIDTH: u16 = 50;
142
143 for width in widths.values_mut() {
144 *width = (*width).clamp(MIN_WIDTH, MAX_WIDTH);
145 }
146
147 widths
148}
149
150pub fn extract_column_values(provider: &dyn DataProvider, column_index: usize) -> Vec<String> {
152 let row_count = provider.get_row_count();
153 let mut values = Vec::with_capacity(row_count);
154
155 for row_idx in 0..row_count {
156 if let Some(row) = provider.get_row(row_idx) {
157 if column_index < row.len() {
158 values.push(row[column_index].clone());
159 } else {
160 values.push(String::new());
161 }
162 }
163 }
164
165 values
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[derive(Debug)]
174 struct MockDataProvider {
175 headers: Vec<String>,
176 rows: Vec<Vec<String>>,
177 }
178
179 impl DataProvider for MockDataProvider {
180 fn get_column_count(&self) -> usize {
181 self.headers.len()
182 }
183
184 fn get_row_count(&self) -> usize {
185 self.rows.len()
186 }
187
188 fn get_column_names(&self) -> Vec<String> {
189 self.headers.clone()
190 }
191
192 fn get_row(&self, index: usize) -> Option<Vec<String>> {
193 self.rows.get(index).cloned()
194 }
195
196 fn get_cell_value(&self, row: usize, col: usize) -> Option<String> {
197 self.rows.get(row).and_then(|r| r.get(col)).cloned()
198 }
199 }
200
201 #[test]
202 fn test_calculate_optimal_widths() {
203 let provider = MockDataProvider {
204 headers: vec![
205 "ID".to_string(),
206 "Name".to_string(),
207 "Description".to_string(),
208 ],
209 rows: vec![
210 vec!["1".to_string(), "Alice".to_string(), "Short".to_string()],
211 vec![
212 "2".to_string(),
213 "Bob".to_string(),
214 "A very long description that should be clamped".to_string(),
215 ],
216 ],
217 };
218
219 let widths = calculate_optimal_column_widths(&provider, 10);
220 assert_eq!(widths.len(), 3);
221 assert!(widths[0] >= 2); assert!(widths[1] >= 4); assert!(widths[2] <= 50); }
225}