#![cfg(feature = "std")]
extern crate std;
use std::string::{String, ToString};
use std::vec::Vec;
use crate::error::{DsfbError, Result};
#[derive(Debug, Clone)]
pub struct OwnedResidualMatrix {
pub data: Vec<f64>,
pub num_signals: usize,
pub num_windows: usize,
pub healthy_window_end: usize,
pub fault_labels: Vec<bool>,
pub is_sentinel: bool,
pub header_provenance: String,
pub channels: Vec<String>,
}
pub fn parse_residual_projection(bytes: &[u8]) -> Result<OwnedResidualMatrix> {
let text = match core::str::from_utf8(bytes) {
Ok(s) => s,
Err(_) => return Err(DsfbError::ParseError { record: 0, field: 0 }),
};
let mut num_windows: Option<usize> = None;
let mut num_signals: Option<usize> = None;
let mut healthy_window_end: Option<usize> = None;
let mut fault_label_indices: Vec<usize> = Vec::new();
let mut header_provenance = String::new();
let mut is_sentinel = false;
let mut data: Vec<f64> = Vec::new();
let mut row_count: usize = 0;
let mut channels: Vec<String> = Vec::new();
for (line_no, raw_line) in text.lines().enumerate() {
let line = raw_line.trim();
if line.is_empty() {
continue;
}
if let Some(comment) = line.strip_prefix('#') {
let comment = comment.trim();
header_provenance.push_str(comment);
header_provenance.push('\n');
if comment == "UPSTREAM_FIXTURE_NOT_VENDORED" {
is_sentinel = true;
continue;
}
if let Some((key, value)) = comment.split_once('=') {
let key = key.trim();
let value = value.trim();
match key {
"num_windows" => {
num_windows = value.parse::<usize>().ok();
}
"num_signals" => {
num_signals = value.parse::<usize>().ok();
}
"healthy_window_end" => {
healthy_window_end = value.parse::<usize>().ok();
}
"fault_labels" => {
if !value.is_empty() {
for tok in value.split(',') {
if let Ok(idx) = tok.trim().parse::<usize>() {
fault_label_indices.push(idx);
}
}
}
}
"channels" => {
if !value.is_empty() {
for tok in value.split(',') {
channels.push(tok.trim().to_string());
}
}
}
_ => {} }
}
continue;
}
if is_sentinel {
return Err(DsfbError::ParseError {
record: line_no as u64,
field: 0,
});
}
let n_signals = match num_signals {
Some(n) if n > 0 => n,
_ => return Err(DsfbError::ParseError { record: line_no as u64, field: 0 }),
};
let mut field_count: u16 = 0;
for tok in line.split('\t') {
let tok = tok.trim();
if tok.is_empty() {
continue;
}
let v: f64 = match tok.parse() {
Ok(v) => v,
Err(_) => {
return Err(DsfbError::ParseError {
record: line_no as u64,
field: field_count,
});
}
};
data.push(v);
field_count += 1;
}
if field_count as usize != n_signals {
return Err(DsfbError::ParseError {
record: line_no as u64,
field: field_count,
});
}
row_count += 1;
}
if is_sentinel {
return Ok(OwnedResidualMatrix {
data: Vec::new(),
num_signals: 0,
num_windows: 0,
healthy_window_end: 0,
fault_labels: Vec::new(),
is_sentinel: true,
header_provenance,
channels,
});
}
let num_signals = num_signals.ok_or(DsfbError::InvalidConfig("missing num_signals header"))?;
let num_windows = num_windows.ok_or(DsfbError::InvalidConfig("missing num_windows header"))?;
let healthy_window_end = healthy_window_end
.ok_or(DsfbError::InvalidConfig("missing healthy_window_end header"))?;
if row_count != num_windows {
return Err(DsfbError::DimensionMismatch {
expected: num_windows,
got: row_count,
});
}
if data.len() != num_windows * num_signals {
return Err(DsfbError::DimensionMismatch {
expected: num_windows * num_signals,
got: data.len(),
});
}
if healthy_window_end > num_windows {
return Err(DsfbError::InvalidConfig("healthy_window_end > num_windows"));
}
let mut fault_labels = std::vec![false; num_windows];
for idx in fault_label_indices {
if idx < num_windows {
fault_labels[idx] = true;
}
}
if !channels.is_empty() && channels.len() != num_signals {
return Err(DsfbError::DimensionMismatch {
expected: num_signals,
got: channels.len(),
});
}
Ok(OwnedResidualMatrix {
data,
num_signals,
num_windows,
healthy_window_end,
fault_labels,
is_sentinel: false,
header_provenance,
channels,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_minimal_real_shape_extract() {
let bytes = b"# residual-projection v1\n\
# num_windows=4\n\
# num_signals=2\n\
# healthy_window_end=2\n\
# fault_labels=3\n\
# upstream_doi=test-only\n\
# license=Apache-2.0\n\
100.0\t0.001\n\
101.5\t0.002\n\
150.0\t0.040\n\
155.2\t0.055\n";
let m = parse_residual_projection(bytes).expect("parse should succeed");
assert!(!m.is_sentinel);
assert_eq!(m.num_signals, 2);
assert_eq!(m.num_windows, 4);
assert_eq!(m.healthy_window_end, 2);
assert_eq!(m.data.len(), 8);
assert!((m.data[0] - 100.0).abs() < 1e-12);
assert_eq!(m.fault_labels, std::vec![false, false, false, true]);
assert!(m.channels.is_empty(), "v1 fixture must yield empty channels");
}
#[test]
fn parses_v2_channels_header() {
let bytes = b"# residual-projection v2\n\
# num_windows=2\n\
# num_signals=3\n\
# healthy_window_end=1\n\
# fault_labels=\n\
# channels=svc_a_latency_p50_ms,svc_a_error_rate,svc_a_log_volume\n\
# license=Apache-2.0\n\
100.0\t0.001\t42.0\n\
150.0\t0.040\t73.0\n";
let m = parse_residual_projection(bytes).expect("v2 parse should succeed");
assert_eq!(m.channels.len(), 3);
assert_eq!(m.channels[0], "svc_a_latency_p50_ms");
assert_eq!(m.channels[1], "svc_a_error_rate");
assert_eq!(m.channels[2], "svc_a_log_volume");
}
#[test]
fn channels_count_must_match_num_signals() {
let bytes = b"# num_windows=1\n\
# num_signals=2\n\
# healthy_window_end=0\n\
# fault_labels=\n\
# channels=a,b,c\n\
100.0\t0.001\n";
let r = parse_residual_projection(bytes);
assert!(matches!(r, Err(DsfbError::DimensionMismatch { .. })),
"channel count mismatch must surface as DimensionMismatch");
}
#[test]
fn detects_sentinel_fixture() {
let bytes = b"# residual-projection v1\n\
# UPSTREAM_FIXTURE_NOT_VENDORED\n\
# extraction_recipe=see data/README.md\n";
let m = parse_residual_projection(bytes).expect("sentinel parse should succeed");
assert!(m.is_sentinel);
assert_eq!(m.num_signals, 0);
assert_eq!(m.num_windows, 0);
}
#[test]
fn rejects_short_row() {
let bytes = b"# num_windows=1\n# num_signals=2\n# healthy_window_end=0\n# fault_labels=\n100.0\n";
assert!(parse_residual_projection(bytes).is_err());
}
}