sql_cli/ui/utils/
column_utils.rs

1/// Column utilities extracted from `enhanced_tui`
2/// Contains column statistics, width calculations, and column data extraction
3use crate::buffer::ColumnStatistics;
4use crate::data::data_provider::DataProvider;
5use crate::data_analyzer::{self, DataAnalyzer};
6use std::collections::HashMap;
7
8/// Calculate statistics for a specific column
9pub fn calculate_column_statistics(
10    provider: &dyn DataProvider,
11    analyzer: &mut DataAnalyzer,
12    column_index: usize,
13) -> Option<ColumnStatistics> {
14    let headers = provider.get_column_names();
15    if headers.is_empty() || column_index >= headers.len() {
16        return None;
17    }
18
19    let column_name = headers[column_index].clone();
20    let row_count = provider.get_row_count();
21
22    // Extract column data
23    let mut column_data = Vec::with_capacity(row_count);
24    for row_idx in 0..row_count {
25        if let Some(row) = provider.get_row(row_idx) {
26            if column_index < row.len() {
27                column_data.push(row[column_index].clone());
28            } else {
29                column_data.push(String::new());
30            }
31        }
32    }
33
34    // Convert to references for the analyzer
35    let data_refs: Vec<&str> = column_data
36        .iter()
37        .map(std::string::String::as_str)
38        .collect();
39
40    // Calculate statistics
41    let analyzer_stats = analyzer.calculate_column_statistics(&column_name, &data_refs);
42
43    // Convert to buffer's ColumnStatistics format
44    Some(ColumnStatistics {
45        column_name: analyzer_stats.column_name,
46        column_type: match analyzer_stats.data_type {
47            data_analyzer::ColumnType::Integer | data_analyzer::ColumnType::Float => {
48                crate::buffer::ColumnType::Numeric
49            }
50            data_analyzer::ColumnType::String
51            | data_analyzer::ColumnType::Boolean
52            | data_analyzer::ColumnType::Date
53            | data_analyzer::ColumnType::Unknown => crate::buffer::ColumnType::String,
54            data_analyzer::ColumnType::Mixed => crate::buffer::ColumnType::Mixed,
55        },
56        total_count: analyzer_stats.total_values,
57        null_count: analyzer_stats.null_values,
58        unique_count: analyzer_stats.unique_values,
59        frequency_map: analyzer_stats.frequency_map,
60        min: analyzer_stats.min_value.and_then(|s| s.parse::<f64>().ok()),
61        max: analyzer_stats.max_value.and_then(|s| s.parse::<f64>().ok()),
62        sum: analyzer_stats.sum_value,
63        mean: analyzer_stats.avg_value,
64        median: analyzer_stats.median_value,
65    })
66}
67
68/// Calculate optimal column widths based on content
69pub fn calculate_optimal_column_widths(
70    provider: &dyn DataProvider,
71    max_sample_rows: usize,
72) -> Vec<u16> {
73    let column_count = provider.get_column_count();
74    let row_count = provider.get_row_count();
75    let sample_size = row_count.min(max_sample_rows);
76
77    let mut column_widths = vec![0u16; column_count];
78
79    // Start with header widths
80    let headers = provider.get_column_names();
81    for (i, header) in headers.iter().enumerate() {
82        if i < column_widths.len() {
83            column_widths[i] = header.len() as u16;
84        }
85    }
86
87    // Sample rows to find max widths
88    for row_idx in 0..sample_size {
89        if let Some(row) = provider.get_row(row_idx) {
90            for (col_idx, cell) in row.iter().enumerate() {
91                if col_idx < column_widths.len() {
92                    column_widths[col_idx] = column_widths[col_idx].max(cell.len() as u16);
93                }
94            }
95        }
96    }
97
98    // Apply constraints
99    const MIN_WIDTH: u16 = 5;
100    const MAX_WIDTH: u16 = 50;
101
102    for width in &mut column_widths {
103        *width = (*width).clamp(MIN_WIDTH, MAX_WIDTH);
104    }
105
106    column_widths
107}
108
109/// Calculate column widths for a specific viewport range
110pub fn calculate_viewport_column_widths(
111    provider: &dyn DataProvider,
112    viewport_start: usize,
113    viewport_end: usize,
114    max_sample_rows: usize,
115) -> HashMap<usize, u16> {
116    let mut widths = HashMap::new();
117    let row_count = provider.get_row_count();
118    let sample_size = row_count.min(max_sample_rows);
119
120    // Get headers
121    let headers = provider.get_column_names();
122
123    // Initialize with header widths for viewport columns
124    for col_idx in viewport_start..viewport_end.min(headers.len()) {
125        widths.insert(col_idx, headers[col_idx].len() as u16);
126    }
127
128    // Sample rows to find max widths
129    for row_idx in 0..sample_size {
130        if let Some(row) = provider.get_row(row_idx) {
131            for col_idx in viewport_start..viewport_end.min(row.len()) {
132                let current_width = widths.get(&col_idx).copied().unwrap_or(0);
133                let cell_width = row[col_idx].len() as u16;
134                widths.insert(col_idx, current_width.max(cell_width));
135            }
136        }
137    }
138
139    // Apply constraints
140    const MIN_WIDTH: u16 = 5;
141    const MAX_WIDTH: u16 = 50;
142
143    for width in widths.values_mut() {
144        *width = (*width).clamp(MIN_WIDTH, MAX_WIDTH);
145    }
146
147    widths
148}
149
150/// Extract all values from a specific column
151pub fn extract_column_values(provider: &dyn DataProvider, column_index: usize) -> Vec<String> {
152    let row_count = provider.get_row_count();
153    let mut values = Vec::with_capacity(row_count);
154
155    for row_idx in 0..row_count {
156        if let Some(row) = provider.get_row(row_idx) {
157            if column_index < row.len() {
158                values.push(row[column_index].clone());
159            } else {
160                values.push(String::new());
161            }
162        }
163    }
164
165    values
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    // Mock implementation for testing
173    #[derive(Debug)]
174    struct MockDataProvider {
175        headers: Vec<String>,
176        rows: Vec<Vec<String>>,
177    }
178
179    impl DataProvider for MockDataProvider {
180        fn get_column_count(&self) -> usize {
181            self.headers.len()
182        }
183
184        fn get_row_count(&self) -> usize {
185            self.rows.len()
186        }
187
188        fn get_column_names(&self) -> Vec<String> {
189            self.headers.clone()
190        }
191
192        fn get_row(&self, index: usize) -> Option<Vec<String>> {
193            self.rows.get(index).cloned()
194        }
195
196        fn get_cell_value(&self, row: usize, col: usize) -> Option<String> {
197            self.rows.get(row).and_then(|r| r.get(col)).cloned()
198        }
199    }
200
201    #[test]
202    fn test_calculate_optimal_widths() {
203        let provider = MockDataProvider {
204            headers: vec![
205                "ID".to_string(),
206                "Name".to_string(),
207                "Description".to_string(),
208            ],
209            rows: vec![
210                vec!["1".to_string(), "Alice".to_string(), "Short".to_string()],
211                vec![
212                    "2".to_string(),
213                    "Bob".to_string(),
214                    "A very long description that should be clamped".to_string(),
215                ],
216            ],
217        };
218
219        let widths = calculate_optimal_column_widths(&provider, 10);
220        assert_eq!(widths.len(), 3);
221        assert!(widths[0] >= 2); // ID
222        assert!(widths[1] >= 4); // Name
223        assert!(widths[2] <= 50); // Description should be clamped
224    }
225}