sql_cli/utils/
memory_audit.rs

1use crate::data::data_view::DataView;
2use crate::data::datatable::DataTable;
3use std::sync::Arc;
4use tracing::info;
5
6/// Detailed memory audit for tracking memory usage across components
7pub struct MemoryAudit {
8    pub component: String,
9    pub bytes: usize,
10    pub description: String,
11}
12
13impl MemoryAudit {
14    pub fn new(component: &str, bytes: usize, description: &str) -> Self {
15        Self {
16            component: component.to_string(),
17            bytes,
18            description: description.to_string(),
19        }
20    }
21
22    pub fn mb(&self) -> f64 {
23        self.bytes as f64 / (1024.0 * 1024.0)
24    }
25}
26
27/// Estimate memory usage of a DataTable
28pub fn estimate_datatable_memory(table: &DataTable) -> usize {
29    let mut total_bytes = 0;
30
31    // Base struct size
32    total_bytes += std::mem::size_of::<DataTable>();
33
34    // Column metadata
35    total_bytes += table.column_count() * std::mem::size_of::<crate::data::datatable::DataColumn>();
36    for col_name in table.column_names() {
37        total_bytes += col_name.len();
38    }
39
40    // Row data - this is the big one
41    let rows = table.row_count();
42    let cols = table.column_count();
43
44    // Estimate based on data types
45    // Each DataValue enum takes up space for the discriminant + largest variant
46    let value_size = std::mem::size_of::<crate::data::datatable::DataValue>();
47    total_bytes += rows * cols * value_size;
48
49    // String data (not in enum, heap allocated)
50    // Sample first 100 rows to estimate string sizes
51    let sample_rows = std::cmp::min(100, rows);
52    let mut string_bytes = 0;
53
54    for row_idx in 0..sample_rows {
55        if let Some(row) = table.get_row(row_idx) {
56            for col_idx in 0..cols {
57                if let Some(value) = row.get(col_idx) {
58                    use crate::data::datatable::DataValue;
59                    match value {
60                        DataValue::String(s) => string_bytes += s.len(),
61                        DataValue::InternedString(s) => string_bytes += s.len(),
62                        _ => {}
63                    }
64                }
65            }
66        }
67    }
68
69    // Extrapolate string usage
70    if sample_rows > 0 {
71        let avg_string_per_row = string_bytes / sample_rows;
72        total_bytes += avg_string_per_row * rows;
73    }
74
75    total_bytes
76}
77
78/// Estimate memory usage of a DataView
79pub fn estimate_dataview_memory(view: &DataView) -> usize {
80    let mut total_bytes = 0;
81
82    // Base struct size
83    total_bytes += std::mem::size_of::<DataView>();
84
85    // The view holds an Arc to the DataTable (just a pointer, 8 bytes)
86    total_bytes += std::mem::size_of::<Arc<DataTable>>();
87
88    // Visible row indices
89    let visible_rows = view.row_count();
90    total_bytes += visible_rows * std::mem::size_of::<usize>();
91
92    // Visible column indices
93    let visible_cols = view.column_count();
94    total_bytes += visible_cols * std::mem::size_of::<usize>();
95
96    // Note: The actual DataTable is NOT counted here as it's shared via Arc
97
98    total_bytes
99}
100
101/// Perform a comprehensive memory audit
102pub fn perform_memory_audit(
103    datatable: Option<&DataTable>,
104    original_source: Option<&DataTable>,
105    dataview: Option<&DataView>,
106) -> Vec<MemoryAudit> {
107    let mut audits = Vec::new();
108
109    // Track current process memory
110    if let Some(kb) = crate::utils::memory_tracker::get_process_memory_kb() {
111        audits.push(MemoryAudit::new(
112            "Process Total",
113            kb * 1024,
114            "Total process memory (RSS)",
115        ));
116    }
117
118    // Track DataTable memory
119    if let Some(dt) = datatable {
120        let bytes = estimate_datatable_memory(dt);
121        audits.push(MemoryAudit::new(
122            "DataTable",
123            bytes,
124            &format!("{} rows x {} cols", dt.row_count(), dt.column_count()),
125        ));
126    }
127
128    // Track original source memory (this is the duplication!)
129    if let Some(original) = original_source {
130        let bytes = estimate_datatable_memory(original);
131        audits.push(MemoryAudit::new(
132            "Original Source (DUPLICATE!)",
133            bytes,
134            &format!(
135                "{} rows x {} cols",
136                original.row_count(),
137                original.column_count()
138            ),
139        ));
140    }
141
142    // Track DataView memory (should be small)
143    if let Some(view) = dataview {
144        let bytes = estimate_dataview_memory(view);
145        audits.push(MemoryAudit::new(
146            "DataView",
147            bytes,
148            &format!("{} visible rows", view.row_count()),
149        ));
150    }
151
152    audits
153}
154
155/// Log memory audit results
156pub fn log_memory_audit(audits: &[MemoryAudit]) {
157    info!("=== MEMORY AUDIT ===");
158    let mut total_tracked = 0;
159
160    for audit in audits {
161        info!(
162            "  {}: {:.2} MB - {}",
163            audit.component,
164            audit.mb(),
165            audit.description
166        );
167        if !audit.component.contains("Process") {
168            total_tracked += audit.bytes;
169        }
170    }
171
172    info!(
173        "  Total Tracked: {:.2} MB",
174        total_tracked as f64 / (1024.0 * 1024.0)
175    );
176
177    // Check for duplication
178    if audits.iter().any(|a| a.component.contains("DUPLICATE")) {
179        info!("  ⚠️  WARNING: Memory duplication detected!");
180    }
181}