sql_cli/utils/
memory_audit.rs

1use crate::data::data_view::DataView;
2use crate::data::datatable::DataTable;
3use std::sync::Arc;
4use tracing::info;
5
6/// Detailed memory audit for tracking memory usage across components
7pub struct MemoryAudit {
8    pub component: String,
9    pub bytes: usize,
10    pub description: String,
11}
12
13impl MemoryAudit {
14    #[must_use]
15    pub fn new(component: &str, bytes: usize, description: &str) -> Self {
16        Self {
17            component: component.to_string(),
18            bytes,
19            description: description.to_string(),
20        }
21    }
22
23    #[must_use]
24    pub fn mb(&self) -> f64 {
25        self.bytes as f64 / (1024.0 * 1024.0)
26    }
27}
28
29/// Estimate memory usage of a `DataTable`
30#[must_use]
31pub fn estimate_datatable_memory(table: &DataTable) -> usize {
32    let mut total_bytes = 0;
33
34    // Base struct size
35    total_bytes += std::mem::size_of::<DataTable>();
36
37    // Column metadata
38    total_bytes += table.column_count() * std::mem::size_of::<crate::data::datatable::DataColumn>();
39    for col_name in table.column_names() {
40        total_bytes += col_name.len();
41    }
42
43    // Row data - this is the big one
44    let rows = table.row_count();
45    let cols = table.column_count();
46
47    // Estimate based on data types
48    // Each DataValue enum takes up space for the discriminant + largest variant
49    let value_size = std::mem::size_of::<crate::data::datatable::DataValue>();
50    total_bytes += rows * cols * value_size;
51
52    // String data (not in enum, heap allocated)
53    // Sample first 100 rows to estimate string sizes
54    let sample_rows = std::cmp::min(100, rows);
55    let mut string_bytes = 0;
56
57    for row_idx in 0..sample_rows {
58        if let Some(row) = table.get_row(row_idx) {
59            for col_idx in 0..cols {
60                if let Some(value) = row.get(col_idx) {
61                    use crate::data::datatable::DataValue;
62                    match value {
63                        DataValue::String(s) => string_bytes += s.len(),
64                        DataValue::InternedString(s) => string_bytes += s.len(),
65                        _ => {}
66                    }
67                }
68            }
69        }
70    }
71
72    // Extrapolate string usage
73    if sample_rows > 0 {
74        let avg_string_per_row = string_bytes / sample_rows;
75        total_bytes += avg_string_per_row * rows;
76    }
77
78    total_bytes
79}
80
81/// Estimate memory usage of a `DataView`
82#[must_use]
83pub fn estimate_dataview_memory(view: &DataView) -> usize {
84    let mut total_bytes = 0;
85
86    // Base struct size
87    total_bytes += std::mem::size_of::<DataView>();
88
89    // The view holds an Arc to the DataTable (just a pointer, 8 bytes)
90    total_bytes += std::mem::size_of::<Arc<DataTable>>();
91
92    // Visible row indices
93    let visible_rows = view.row_count();
94    total_bytes += visible_rows * std::mem::size_of::<usize>();
95
96    // Visible column indices
97    let visible_cols = view.column_count();
98    total_bytes += visible_cols * std::mem::size_of::<usize>();
99
100    // Note: The actual DataTable is NOT counted here as it's shared via Arc
101
102    total_bytes
103}
104
105/// Perform a comprehensive memory audit
106#[must_use]
107pub fn perform_memory_audit(
108    datatable: Option<&DataTable>,
109    original_source: Option<&DataTable>,
110    dataview: Option<&DataView>,
111) -> Vec<MemoryAudit> {
112    let mut audits = Vec::new();
113
114    // Track current process memory
115    if let Some(kb) = crate::utils::memory_tracker::get_process_memory_kb() {
116        audits.push(MemoryAudit::new(
117            "Process Total",
118            kb * 1024,
119            "Total process memory (RSS)",
120        ));
121    }
122
123    // Track DataTable memory
124    if let Some(dt) = datatable {
125        let bytes = estimate_datatable_memory(dt);
126        audits.push(MemoryAudit::new(
127            "DataTable",
128            bytes,
129            &format!("{} rows x {} cols", dt.row_count(), dt.column_count()),
130        ));
131    }
132
133    // Track original source memory (this is the duplication!)
134    if let Some(original) = original_source {
135        let bytes = estimate_datatable_memory(original);
136        audits.push(MemoryAudit::new(
137            "Original Source (DUPLICATE!)",
138            bytes,
139            &format!(
140                "{} rows x {} cols",
141                original.row_count(),
142                original.column_count()
143            ),
144        ));
145    }
146
147    // Track DataView memory (should be small)
148    if let Some(view) = dataview {
149        let bytes = estimate_dataview_memory(view);
150        audits.push(MemoryAudit::new(
151            "DataView",
152            bytes,
153            &format!("{} visible rows", view.row_count()),
154        ));
155    }
156
157    audits
158}
159
160/// Log memory audit results
161pub fn log_memory_audit(audits: &[MemoryAudit]) {
162    info!("=== MEMORY AUDIT ===");
163    let mut total_tracked = 0;
164
165    for audit in audits {
166        info!(
167            "  {}: {:.2} MB - {}",
168            audit.component,
169            audit.mb(),
170            audit.description
171        );
172        if !audit.component.contains("Process") {
173            total_tracked += audit.bytes;
174        }
175    }
176
177    info!(
178        "  Total Tracked: {:.2} MB",
179        total_tracked as f64 / (1024.0 * 1024.0)
180    );
181
182    // Check for duplication
183    if audits.iter().any(|a| a.component.contains("DUPLICATE")) {
184        info!("  ⚠️  WARNING: Memory duplication detected!");
185    }
186}