dsfb_add/analysis/
structural_law.rs1use crate::AddError;
2
3#[derive(Debug, Clone, Copy)]
5pub struct LinearFit {
6 pub slope: f64,
7 pub intercept: f64,
8 pub r2: f64,
9 pub residual_variance: f64,
10 pub mse_resid: f64,
11 pub pearson_r: f64,
12 pub spearman_rho: f64,
13 pub slope_ci_low: f64,
14 pub slope_ci_high: f64,
15 pub sample_count: usize,
16}
17
18#[derive(Debug, Clone, Copy)]
20pub struct StructuralLawDiagnostics {
21 pub residual_mean: f64,
22 pub residual_std: f64,
23 pub residual_skew_approx: f64,
24 pub residual_kurtosis_approx: f64,
25 pub ratio_mean: f64,
26 pub ratio_std: f64,
27 pub ratio_min: f64,
28 pub ratio_max: f64,
29}
30
31pub fn fit_with_ci(xs: &[f64], ys: &[f64]) -> Result<LinearFit, AddError> {
33 if xs.len() != ys.len() {
34 return Err(AddError::LengthMismatch {
35 context: "structural law fit",
36 expected: xs.len(),
37 got: ys.len(),
38 });
39 }
40
41 if xs.len() < 2 {
42 return Err(AddError::InvalidConfig(
43 "structural law fit requires at least two samples".to_string(),
44 ));
45 }
46
47 let sample_count = xs.len();
48 let x_mean = mean(xs);
49 let y_mean = mean(ys);
50 let sxx = xs
51 .iter()
52 .map(|x| {
53 let dx = x - x_mean;
54 dx * dx
55 })
56 .sum::<f64>();
57 let sxy = xs
58 .iter()
59 .zip(ys.iter())
60 .map(|(x, y)| (x - x_mean) * (y - y_mean))
61 .sum::<f64>();
62
63 let slope = if sxx > f64::EPSILON { sxy / sxx } else { 0.0 };
64 let intercept = y_mean - slope * x_mean;
65
66 let residuals: Vec<f64> = xs
67 .iter()
68 .zip(ys.iter())
69 .map(|(x, y)| y - (slope * x + intercept))
70 .collect();
71 let sse = residuals.iter().map(|value| value * value).sum::<f64>();
72 let mse_resid = sse / sample_count as f64;
73 let residual_variance = if sample_count > 2 {
74 sse / (sample_count - 2) as f64
75 } else {
76 0.0
77 };
78 let sst = ys
79 .iter()
80 .map(|y| {
81 let dy = y - y_mean;
82 dy * dy
83 })
84 .sum::<f64>();
85 let r2 = if sst > f64::EPSILON {
86 1.0 - sse / sst
87 } else {
88 1.0
89 };
90
91 let slope_std_error = if sxx > f64::EPSILON {
94 (residual_variance / sxx).sqrt()
95 } else {
96 0.0
97 };
98 let ci_half_width = 2.0 * slope_std_error;
99
100 Ok(LinearFit {
101 slope,
102 intercept,
103 r2,
104 residual_variance,
105 mse_resid,
106 pearson_r: correlation(xs, ys),
107 spearman_rho: spearman_correlation(xs, ys),
108 slope_ci_low: slope - ci_half_width,
109 slope_ci_high: slope + ci_half_width,
110 sample_count,
111 })
112}
113
114pub fn diagnostics_from_fit(
116 xs: &[f64],
117 ys: &[f64],
118 fit: &LinearFit,
119) -> Result<StructuralLawDiagnostics, AddError> {
120 if xs.len() != ys.len() {
121 return Err(AddError::LengthMismatch {
122 context: "structural law diagnostics",
123 expected: xs.len(),
124 got: ys.len(),
125 });
126 }
127
128 let residuals: Vec<f64> = xs
129 .iter()
130 .zip(ys.iter())
131 .map(|(x, y)| y - (fit.slope * x + fit.intercept))
132 .collect();
133 let residual_mean = mean(&residuals);
134 let residual_std = stddev(&residuals, residual_mean);
135 let residual_skew_approx = standardized_moment(&residuals, residual_mean, residual_std, 3);
136 let residual_kurtosis_approx = standardized_moment(&residuals, residual_mean, residual_std, 4);
137
138 let ratios: Vec<f64> = xs
139 .iter()
140 .zip(ys.iter())
141 .filter_map(|(x, y)| {
142 if x.abs() <= f64::EPSILON {
143 None
144 } else {
145 let ratio = y / x;
146 ratio.is_finite().then_some(ratio)
147 }
148 })
149 .collect();
150
151 let ratio_mean = if ratios.is_empty() {
152 0.0
153 } else {
154 mean(&ratios)
155 };
156 let ratio_std = if ratios.is_empty() {
157 0.0
158 } else {
159 stddev(&ratios, ratio_mean)
160 };
161 let ratio_min = ratios.iter().copied().fold(f64::INFINITY, f64::min);
162 let ratio_max = ratios.iter().copied().fold(f64::NEG_INFINITY, f64::max);
163
164 Ok(StructuralLawDiagnostics {
165 residual_mean,
166 residual_std,
167 residual_skew_approx,
168 residual_kurtosis_approx,
169 ratio_mean,
170 ratio_std,
171 ratio_min: if ratios.is_empty() { 0.0 } else { ratio_min },
172 ratio_max: if ratios.is_empty() { 0.0 } else { ratio_max },
173 })
174}
175
176fn mean(values: &[f64]) -> f64 {
177 values.iter().sum::<f64>() / values.len() as f64
178}
179
180fn stddev(values: &[f64], mean_value: f64) -> f64 {
181 (values
182 .iter()
183 .map(|value| {
184 let delta = value - mean_value;
185 delta * delta
186 })
187 .sum::<f64>()
188 / values.len().max(1) as f64)
189 .sqrt()
190}
191
192fn correlation(xs: &[f64], ys: &[f64]) -> f64 {
193 let x_mean = mean(xs);
194 let y_mean = mean(ys);
195 let covariance = xs
196 .iter()
197 .zip(ys.iter())
198 .map(|(x, y)| (x - x_mean) * (y - y_mean))
199 .sum::<f64>();
200 let x_scale = xs
201 .iter()
202 .map(|x| {
203 let delta = x - x_mean;
204 delta * delta
205 })
206 .sum::<f64>()
207 .sqrt();
208 let y_scale = ys
209 .iter()
210 .map(|y| {
211 let delta = y - y_mean;
212 delta * delta
213 })
214 .sum::<f64>()
215 .sqrt();
216
217 if x_scale <= f64::EPSILON || y_scale <= f64::EPSILON {
218 1.0
219 } else {
220 covariance / (x_scale * y_scale)
221 }
222}
223
224fn spearman_correlation(xs: &[f64], ys: &[f64]) -> f64 {
225 let x_ranks = average_ranks(xs);
226 let y_ranks = average_ranks(ys);
227 correlation(&x_ranks, &y_ranks)
228}
229
230fn average_ranks(values: &[f64]) -> Vec<f64> {
231 let mut indexed: Vec<(usize, f64)> = values.iter().copied().enumerate().collect();
232 indexed.sort_by(|left, right| {
233 left.1
234 .partial_cmp(&right.1)
235 .unwrap_or(std::cmp::Ordering::Equal)
236 });
237
238 let mut ranks = vec![0.0; values.len()];
239 let mut start = 0_usize;
240 while start < indexed.len() {
241 let mut end = start + 1;
242 while end < indexed.len() && (indexed[end].1 - indexed[start].1).abs() <= 1.0e-12 {
243 end += 1;
244 }
245
246 let average_rank = (start + 1 + end) as f64 / 2.0;
247 for idx in start..end {
248 ranks[indexed[idx].0] = average_rank;
249 }
250
251 start = end;
252 }
253
254 ranks
255}
256
257fn standardized_moment(values: &[f64], mean_value: f64, std_value: f64, power: i32) -> f64 {
258 if std_value <= f64::EPSILON {
259 return 0.0;
260 }
261
262 values
263 .iter()
264 .map(|value| (value - mean_value).powi(power))
265 .sum::<f64>()
266 / values.len().max(1) as f64
267 / std_value.powi(power)
268}