Skip to main content

tacet/data/
mod.rs

1//! Data loading utilities for pre-collected timing measurements.
2//!
3//! This module provides utilities for loading timing data from external sources,
4//! enabling analysis of measurements collected by other tools (SILENT, dudect, etc.)
5//! or historical data.
6//!
7//! # Supported Formats
8//!
9//! - **SILENT format**: CSV with `V1,V2` header, group labels (X/Y) in first column
10//! - **Generic two-column**: Any CSV with group label and timing value columns
11//! - **Separate files**: Two files, one per group
12//!
13//! # Example
14//!
15//! ```ignore
16//! use tacet::data::{load_silent_csv, TimeUnit};
17//! use std::path::Path;
18//!
19//! // Load SILENT-format data
20//! let data = load_silent_csv(Path::new("measurements.csv"))?;
21//! println!("Loaded {} baseline, {} test samples",
22//!          data.baseline_samples.len(),
23//!          data.test_samples.len());
24//! ```
25
26mod csv;
27mod units;
28
29pub use csv::{load_separate_files, load_silent_csv, load_two_column_csv};
30pub use units::{to_nanoseconds, TimeUnit};
31
32use std::fmt;
33
34/// Errors that can occur during data loading.
35#[derive(Debug)]
36pub enum DataError {
37    /// IO error reading file.
38    Io(std::io::Error),
39
40    /// CSV parse error at a specific line.
41    Parse {
42        /// Line number where the error occurred (1-indexed).
43        line: usize,
44        /// Description of the parse error.
45        message: String,
46    },
47
48    /// Missing required group in data.
49    MissingGroup {
50        /// The group label that was expected but not found.
51        expected: String,
52        /// The group labels that were actually found in the data.
53        found: Vec<String>,
54    },
55
56    /// Insufficient samples for analysis.
57    InsufficientSamples {
58        /// Name of the group with insufficient samples.
59        group: String,
60        /// Number of samples found.
61        got: usize,
62        /// Minimum number of samples required.
63        min: usize,
64    },
65
66    /// Invalid time value.
67    InvalidValue {
68        /// Line number where the invalid value was found (1-indexed).
69        line: usize,
70        /// The invalid value string.
71        value: String,
72    },
73}
74
75impl fmt::Display for DataError {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        match self {
78            DataError::Io(e) => write!(f, "IO error: {}", e),
79            DataError::Parse { line, message } => {
80                write!(f, "Parse error at line {}: {}", line, message)
81            }
82            DataError::MissingGroup { expected, found } => {
83                write!(
84                    f,
85                    "Missing group '{}' in data. Found groups: {:?}",
86                    expected, found
87                )
88            }
89            DataError::InsufficientSamples { group, got, min } => {
90                write!(
91                    f,
92                    "Insufficient samples for group '{}': got {}, need at least {}",
93                    group, got, min
94                )
95            }
96            DataError::InvalidValue { line, value } => {
97                write!(f, "Invalid timing value at line {}: '{}'", line, value)
98            }
99        }
100    }
101}
102
103impl std::error::Error for DataError {
104    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
105        match self {
106            DataError::Io(e) => Some(e),
107            _ => None,
108        }
109    }
110}
111
112impl From<std::io::Error> for DataError {
113    fn from(e: std::io::Error) -> Self {
114        DataError::Io(e)
115    }
116}
117
118/// Loaded timing data with two sample groups.
119///
120/// Represents timing measurements split into two groups for comparison:
121/// - `baseline_samples`: Control/reference measurements (e.g., "X" group in SILENT)
122/// - `test_samples`: Test/treatment measurements (e.g., "Y" group in SILENT)
123#[derive(Debug, Clone)]
124pub struct TimingData {
125    /// Samples for the baseline/control group.
126    pub baseline_samples: Vec<u64>,
127
128    /// Samples for the test/treatment group.
129    pub test_samples: Vec<u64>,
130
131    /// Time unit of the samples.
132    pub unit: TimeUnit,
133
134    /// Optional metadata about the data source.
135    pub metadata: Option<DataMetadata>,
136}
137
138impl TimingData {
139    /// Create new timing data from two sample vectors.
140    pub fn new(baseline: Vec<u64>, test: Vec<u64>, unit: TimeUnit) -> Self {
141        Self {
142            baseline_samples: baseline,
143            test_samples: test,
144            unit,
145            metadata: None,
146        }
147    }
148
149    /// Create timing data with metadata.
150    pub fn with_metadata(
151        baseline: Vec<u64>,
152        test: Vec<u64>,
153        unit: TimeUnit,
154        metadata: DataMetadata,
155    ) -> Self {
156        Self {
157            baseline_samples: baseline,
158            test_samples: test,
159            unit,
160            metadata: Some(metadata),
161        }
162    }
163
164    /// Get the number of samples in the smaller group.
165    pub fn min_samples(&self) -> usize {
166        self.baseline_samples.len().min(self.test_samples.len())
167    }
168
169    /// Get total number of samples across both groups.
170    pub fn total_samples(&self) -> usize {
171        self.baseline_samples.len() + self.test_samples.len()
172    }
173
174    /// Check if there are enough samples for analysis.
175    ///
176    /// Returns `Ok(())` if both groups have at least `min_samples`,
177    /// or an appropriate `DataError` otherwise.
178    pub fn validate(&self, min_samples: usize) -> Result<(), DataError> {
179        if self.baseline_samples.len() < min_samples {
180            return Err(DataError::InsufficientSamples {
181                group: "baseline".to_string(),
182                got: self.baseline_samples.len(),
183                min: min_samples,
184            });
185        }
186        if self.test_samples.len() < min_samples {
187            return Err(DataError::InsufficientSamples {
188                group: "test".to_string(),
189                got: self.test_samples.len(),
190                min: min_samples,
191            });
192        }
193        Ok(())
194    }
195
196    /// Convert samples to nanoseconds using the specified conversion factor.
197    ///
198    /// # Arguments
199    /// * `ns_per_unit` - Nanoseconds per sample unit (e.g., 0.33 for cycles at 3GHz)
200    ///
201    /// # Returns
202    /// Tuple of (baseline_ns, test_ns) as f64 vectors.
203    pub fn to_nanoseconds(&self, ns_per_unit: f64) -> (Vec<f64>, Vec<f64>) {
204        let baseline_ns: Vec<f64> = self
205            .baseline_samples
206            .iter()
207            .map(|&s| s as f64 * ns_per_unit)
208            .collect();
209        let test_ns: Vec<f64> = self
210            .test_samples
211            .iter()
212            .map(|&s| s as f64 * ns_per_unit)
213            .collect();
214        (baseline_ns, test_ns)
215    }
216}
217
218/// Metadata about the data source.
219#[derive(Debug, Clone, Default)]
220pub struct DataMetadata {
221    /// Original filename or identifier.
222    pub source: Option<String>,
223
224    /// Labels used for the two groups in the source file.
225    pub group_labels: Option<(String, String)>,
226
227    /// Any additional context (e.g., from SILENT summary JSON).
228    pub context: Option<String>,
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn test_timing_data_validation() {
237        let data = TimingData::new(vec![1, 2, 3], vec![4, 5], TimeUnit::Cycles);
238
239        assert!(data.validate(2).is_ok());
240        assert!(data.validate(3).is_err()); // test group only has 2
241    }
242
243    #[test]
244    fn test_timing_data_to_nanoseconds() {
245        let data = TimingData::new(vec![100, 200], vec![150, 250], TimeUnit::Cycles);
246
247        let (baseline_ns, test_ns) = data.to_nanoseconds(0.5);
248
249        assert_eq!(baseline_ns, vec![50.0, 100.0]);
250        assert_eq!(test_ns, vec![75.0, 125.0]);
251    }
252}