tacet/data/mod.rs
1//! Data loading utilities for pre-collected timing measurements.
2//!
3//! This module provides utilities for loading timing data from external sources,
4//! enabling analysis of measurements collected by other tools (SILENT, dudect, etc.)
5//! or historical data.
6//!
7//! # Supported Formats
8//!
9//! - **SILENT format**: CSV with `V1,V2` header, group labels (X/Y) in first column
10//! - **Generic two-column**: Any CSV with group label and timing value columns
11//! - **Separate files**: Two files, one per group
12//!
13//! # Example
14//!
15//! ```ignore
16//! use tacet::data::{load_silent_csv, TimeUnit};
17//! use std::path::Path;
18//!
19//! // Load SILENT-format data
20//! let data = load_silent_csv(Path::new("measurements.csv"))?;
21//! println!("Loaded {} baseline, {} test samples",
22//! data.baseline_samples.len(),
23//! data.test_samples.len());
24//! ```
25
26mod csv;
27mod units;
28
29pub use csv::{load_separate_files, load_silent_csv, load_two_column_csv};
30pub use units::{to_nanoseconds, TimeUnit};
31
32use std::fmt;
33
34/// Errors that can occur during data loading.
35#[derive(Debug)]
36pub enum DataError {
37 /// IO error reading file.
38 Io(std::io::Error),
39
40 /// CSV parse error at a specific line.
41 Parse {
42 /// Line number where the error occurred (1-indexed).
43 line: usize,
44 /// Description of the parse error.
45 message: String,
46 },
47
48 /// Missing required group in data.
49 MissingGroup {
50 /// The group label that was expected but not found.
51 expected: String,
52 /// The group labels that were actually found in the data.
53 found: Vec<String>,
54 },
55
56 /// Insufficient samples for analysis.
57 InsufficientSamples {
58 /// Name of the group with insufficient samples.
59 group: String,
60 /// Number of samples found.
61 got: usize,
62 /// Minimum number of samples required.
63 min: usize,
64 },
65
66 /// Invalid time value.
67 InvalidValue {
68 /// Line number where the invalid value was found (1-indexed).
69 line: usize,
70 /// The invalid value string.
71 value: String,
72 },
73}
74
75impl fmt::Display for DataError {
76 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77 match self {
78 DataError::Io(e) => write!(f, "IO error: {}", e),
79 DataError::Parse { line, message } => {
80 write!(f, "Parse error at line {}: {}", line, message)
81 }
82 DataError::MissingGroup { expected, found } => {
83 write!(
84 f,
85 "Missing group '{}' in data. Found groups: {:?}",
86 expected, found
87 )
88 }
89 DataError::InsufficientSamples { group, got, min } => {
90 write!(
91 f,
92 "Insufficient samples for group '{}': got {}, need at least {}",
93 group, got, min
94 )
95 }
96 DataError::InvalidValue { line, value } => {
97 write!(f, "Invalid timing value at line {}: '{}'", line, value)
98 }
99 }
100 }
101}
102
103impl std::error::Error for DataError {
104 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
105 match self {
106 DataError::Io(e) => Some(e),
107 _ => None,
108 }
109 }
110}
111
112impl From<std::io::Error> for DataError {
113 fn from(e: std::io::Error) -> Self {
114 DataError::Io(e)
115 }
116}
117
118/// Loaded timing data with two sample groups.
119///
120/// Represents timing measurements split into two groups for comparison:
121/// - `baseline_samples`: Control/reference measurements (e.g., "X" group in SILENT)
122/// - `test_samples`: Test/treatment measurements (e.g., "Y" group in SILENT)
123#[derive(Debug, Clone)]
124pub struct TimingData {
125 /// Samples for the baseline/control group.
126 pub baseline_samples: Vec<u64>,
127
128 /// Samples for the test/treatment group.
129 pub test_samples: Vec<u64>,
130
131 /// Time unit of the samples.
132 pub unit: TimeUnit,
133
134 /// Optional metadata about the data source.
135 pub metadata: Option<DataMetadata>,
136}
137
138impl TimingData {
139 /// Create new timing data from two sample vectors.
140 pub fn new(baseline: Vec<u64>, test: Vec<u64>, unit: TimeUnit) -> Self {
141 Self {
142 baseline_samples: baseline,
143 test_samples: test,
144 unit,
145 metadata: None,
146 }
147 }
148
149 /// Create timing data with metadata.
150 pub fn with_metadata(
151 baseline: Vec<u64>,
152 test: Vec<u64>,
153 unit: TimeUnit,
154 metadata: DataMetadata,
155 ) -> Self {
156 Self {
157 baseline_samples: baseline,
158 test_samples: test,
159 unit,
160 metadata: Some(metadata),
161 }
162 }
163
164 /// Get the number of samples in the smaller group.
165 pub fn min_samples(&self) -> usize {
166 self.baseline_samples.len().min(self.test_samples.len())
167 }
168
169 /// Get total number of samples across both groups.
170 pub fn total_samples(&self) -> usize {
171 self.baseline_samples.len() + self.test_samples.len()
172 }
173
174 /// Check if there are enough samples for analysis.
175 ///
176 /// Returns `Ok(())` if both groups have at least `min_samples`,
177 /// or an appropriate `DataError` otherwise.
178 pub fn validate(&self, min_samples: usize) -> Result<(), DataError> {
179 if self.baseline_samples.len() < min_samples {
180 return Err(DataError::InsufficientSamples {
181 group: "baseline".to_string(),
182 got: self.baseline_samples.len(),
183 min: min_samples,
184 });
185 }
186 if self.test_samples.len() < min_samples {
187 return Err(DataError::InsufficientSamples {
188 group: "test".to_string(),
189 got: self.test_samples.len(),
190 min: min_samples,
191 });
192 }
193 Ok(())
194 }
195
196 /// Convert samples to nanoseconds using the specified conversion factor.
197 ///
198 /// # Arguments
199 /// * `ns_per_unit` - Nanoseconds per sample unit (e.g., 0.33 for cycles at 3GHz)
200 ///
201 /// # Returns
202 /// Tuple of (baseline_ns, test_ns) as f64 vectors.
203 pub fn to_nanoseconds(&self, ns_per_unit: f64) -> (Vec<f64>, Vec<f64>) {
204 let baseline_ns: Vec<f64> = self
205 .baseline_samples
206 .iter()
207 .map(|&s| s as f64 * ns_per_unit)
208 .collect();
209 let test_ns: Vec<f64> = self
210 .test_samples
211 .iter()
212 .map(|&s| s as f64 * ns_per_unit)
213 .collect();
214 (baseline_ns, test_ns)
215 }
216}
217
218/// Metadata about the data source.
219#[derive(Debug, Clone, Default)]
220pub struct DataMetadata {
221 /// Original filename or identifier.
222 pub source: Option<String>,
223
224 /// Labels used for the two groups in the source file.
225 pub group_labels: Option<(String, String)>,
226
227 /// Any additional context (e.g., from SILENT summary JSON).
228 pub context: Option<String>,
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn test_timing_data_validation() {
237 let data = TimingData::new(vec![1, 2, 3], vec![4, 5], TimeUnit::Cycles);
238
239 assert!(data.validate(2).is_ok());
240 assert!(data.validate(3).is_err()); // test group only has 2
241 }
242
243 #[test]
244 fn test_timing_data_to_nanoseconds() {
245 let data = TimingData::new(vec![100, 200], vec![150, 250], TimeUnit::Cycles);
246
247 let (baseline_ns, test_ns) = data.to_nanoseconds(0.5);
248
249 assert_eq!(baseline_ns, vec![50.0, 100.0]);
250 assert_eq!(test_ns, vec![75.0, 125.0]);
251 }
252}