Skip to main content

dsfb_add/analysis/
structural_law.rs

1use crate::AddError;
2
3/// Least-squares linear fit with a simple 95% slope confidence interval.
4#[derive(Debug, Clone, Copy)]
5pub struct LinearFit {
6    pub slope: f64,
7    pub intercept: f64,
8    pub r2: f64,
9    pub residual_variance: f64,
10    pub mse_resid: f64,
11    pub pearson_r: f64,
12    pub spearman_rho: f64,
13    pub slope_ci_low: f64,
14    pub slope_ci_high: f64,
15    pub sample_count: usize,
16}
17
18/// Residual and ratio diagnostics for the AET-IWLT structural law.
19#[derive(Debug, Clone, Copy)]
20pub struct StructuralLawDiagnostics {
21    pub residual_mean: f64,
22    pub residual_std: f64,
23    pub residual_skew_approx: f64,
24    pub residual_kurtosis_approx: f64,
25    pub ratio_mean: f64,
26    pub ratio_std: f64,
27    pub ratio_min: f64,
28    pub ratio_max: f64,
29}
30
31/// Fit `ys = slope * xs + intercept` and estimate a 95% confidence interval.
32pub fn fit_with_ci(xs: &[f64], ys: &[f64]) -> Result<LinearFit, AddError> {
33    if xs.len() != ys.len() {
34        return Err(AddError::LengthMismatch {
35            context: "structural law fit",
36            expected: xs.len(),
37            got: ys.len(),
38        });
39    }
40
41    if xs.len() < 2 {
42        return Err(AddError::InvalidConfig(
43            "structural law fit requires at least two samples".to_string(),
44        ));
45    }
46
47    let sample_count = xs.len();
48    let x_mean = mean(xs);
49    let y_mean = mean(ys);
50    let sxx = xs
51        .iter()
52        .map(|x| {
53            let dx = x - x_mean;
54            dx * dx
55        })
56        .sum::<f64>();
57    let sxy = xs
58        .iter()
59        .zip(ys.iter())
60        .map(|(x, y)| (x - x_mean) * (y - y_mean))
61        .sum::<f64>();
62
63    let slope = if sxx > f64::EPSILON { sxy / sxx } else { 0.0 };
64    let intercept = y_mean - slope * x_mean;
65
66    let residuals: Vec<f64> = xs
67        .iter()
68        .zip(ys.iter())
69        .map(|(x, y)| y - (slope * x + intercept))
70        .collect();
71    let sse = residuals.iter().map(|value| value * value).sum::<f64>();
72    let mse_resid = sse / sample_count as f64;
73    let residual_variance = if sample_count > 2 {
74        sse / (sample_count - 2) as f64
75    } else {
76        0.0
77    };
78    let sst = ys
79        .iter()
80        .map(|y| {
81            let dy = y - y_mean;
82            dy * dy
83        })
84        .sum::<f64>();
85    let r2 = if sst > f64::EPSILON {
86        1.0 - sse / sst
87    } else {
88        1.0
89    };
90
91    // The lambda grid has O(10^2) points, so using a fixed t ~= 2.0 is a
92    // reasonable 95% CI approximation for the slope.
93    let slope_std_error = if sxx > f64::EPSILON {
94        (residual_variance / sxx).sqrt()
95    } else {
96        0.0
97    };
98    let ci_half_width = 2.0 * slope_std_error;
99
100    Ok(LinearFit {
101        slope,
102        intercept,
103        r2,
104        residual_variance,
105        mse_resid,
106        pearson_r: correlation(xs, ys),
107        spearman_rho: spearman_correlation(xs, ys),
108        slope_ci_low: slope - ci_half_width,
109        slope_ci_high: slope + ci_half_width,
110        sample_count,
111    })
112}
113
114/// Compute residual and ratio diagnostics for a previously fitted law.
115pub fn diagnostics_from_fit(
116    xs: &[f64],
117    ys: &[f64],
118    fit: &LinearFit,
119) -> Result<StructuralLawDiagnostics, AddError> {
120    if xs.len() != ys.len() {
121        return Err(AddError::LengthMismatch {
122            context: "structural law diagnostics",
123            expected: xs.len(),
124            got: ys.len(),
125        });
126    }
127
128    let residuals: Vec<f64> = xs
129        .iter()
130        .zip(ys.iter())
131        .map(|(x, y)| y - (fit.slope * x + fit.intercept))
132        .collect();
133    let residual_mean = mean(&residuals);
134    let residual_std = stddev(&residuals, residual_mean);
135    let residual_skew_approx = standardized_moment(&residuals, residual_mean, residual_std, 3);
136    let residual_kurtosis_approx = standardized_moment(&residuals, residual_mean, residual_std, 4);
137
138    let ratios: Vec<f64> = xs
139        .iter()
140        .zip(ys.iter())
141        .filter_map(|(x, y)| {
142            if x.abs() <= f64::EPSILON {
143                None
144            } else {
145                let ratio = y / x;
146                ratio.is_finite().then_some(ratio)
147            }
148        })
149        .collect();
150
151    let ratio_mean = if ratios.is_empty() {
152        0.0
153    } else {
154        mean(&ratios)
155    };
156    let ratio_std = if ratios.is_empty() {
157        0.0
158    } else {
159        stddev(&ratios, ratio_mean)
160    };
161    let ratio_min = ratios.iter().copied().fold(f64::INFINITY, f64::min);
162    let ratio_max = ratios.iter().copied().fold(f64::NEG_INFINITY, f64::max);
163
164    Ok(StructuralLawDiagnostics {
165        residual_mean,
166        residual_std,
167        residual_skew_approx,
168        residual_kurtosis_approx,
169        ratio_mean,
170        ratio_std,
171        ratio_min: if ratios.is_empty() { 0.0 } else { ratio_min },
172        ratio_max: if ratios.is_empty() { 0.0 } else { ratio_max },
173    })
174}
175
176fn mean(values: &[f64]) -> f64 {
177    values.iter().sum::<f64>() / values.len() as f64
178}
179
180fn stddev(values: &[f64], mean_value: f64) -> f64 {
181    (values
182        .iter()
183        .map(|value| {
184            let delta = value - mean_value;
185            delta * delta
186        })
187        .sum::<f64>()
188        / values.len().max(1) as f64)
189        .sqrt()
190}
191
192fn correlation(xs: &[f64], ys: &[f64]) -> f64 {
193    let x_mean = mean(xs);
194    let y_mean = mean(ys);
195    let covariance = xs
196        .iter()
197        .zip(ys.iter())
198        .map(|(x, y)| (x - x_mean) * (y - y_mean))
199        .sum::<f64>();
200    let x_scale = xs
201        .iter()
202        .map(|x| {
203            let delta = x - x_mean;
204            delta * delta
205        })
206        .sum::<f64>()
207        .sqrt();
208    let y_scale = ys
209        .iter()
210        .map(|y| {
211            let delta = y - y_mean;
212            delta * delta
213        })
214        .sum::<f64>()
215        .sqrt();
216
217    if x_scale <= f64::EPSILON || y_scale <= f64::EPSILON {
218        1.0
219    } else {
220        covariance / (x_scale * y_scale)
221    }
222}
223
224fn spearman_correlation(xs: &[f64], ys: &[f64]) -> f64 {
225    let x_ranks = average_ranks(xs);
226    let y_ranks = average_ranks(ys);
227    correlation(&x_ranks, &y_ranks)
228}
229
230fn average_ranks(values: &[f64]) -> Vec<f64> {
231    let mut indexed: Vec<(usize, f64)> = values.iter().copied().enumerate().collect();
232    indexed.sort_by(|left, right| {
233        left.1
234            .partial_cmp(&right.1)
235            .unwrap_or(std::cmp::Ordering::Equal)
236    });
237
238    let mut ranks = vec![0.0; values.len()];
239    let mut start = 0_usize;
240    while start < indexed.len() {
241        let mut end = start + 1;
242        while end < indexed.len() && (indexed[end].1 - indexed[start].1).abs() <= 1.0e-12 {
243            end += 1;
244        }
245
246        let average_rank = (start + 1 + end) as f64 / 2.0;
247        for idx in start..end {
248            ranks[indexed[idx].0] = average_rank;
249        }
250
251        start = end;
252    }
253
254    ranks
255}
256
257fn standardized_moment(values: &[f64], mean_value: f64, std_value: f64, power: i32) -> f64 {
258    if std_value <= f64::EPSILON {
259        return 0.0;
260    }
261
262    values
263        .iter()
264        .map(|value| (value - mean_value).powi(power))
265        .sum::<f64>()
266        / values.len().max(1) as f64
267        / std_value.powi(power)
268}