Skip to main content

phop_core/
dimension.rs

1//! Buckingham-π dimensional reduction (a discovery *prior* for phop).
2//!
3//! In the pure EML grammar every internal node is `exp(x) − ln(y)`, so **every** argument to a
4//! node must be dimensionless. An in-loss dimensional penalty on single-variable leaves would
5//! therefore be degenerate (it could only forbid using a dimensioned variable at all). The
6//! productive, EML-appropriate way to bring dimensional analysis into the search — the same way
7//! AI-Feynman uses it — is as a **preprocessing reduction**: combine the dimensioned inputs into
8//! the maximal set of independent *dimensionless* groups (the Buckingham-π theorem) and let phop
9//! discover a law over those. The number of groups is `n_features − rank(D)`, where `D` is the
10//! dimension matrix.
11//!
12//! Each feature carries a [`Dimension`] — integer exponents over the seven SI base units
13//! `[s, m, kg, A, K, mol, cd]`. [`pi_groups`] returns an integer basis of the nullspace of `D`
14//! (each basis vector `e` gives a dimensionless monomial `∏ xᵢ^{eᵢ}`); [`DataSet::to_dimensionless`]
15//! materializes those monomials as the new feature columns.
16
17use crate::dataset::DataSet;
18use crate::error::{PhopError, Result};
19use scirs2_core::ndarray::Array2;
20
21/// Integer exponents over the seven SI base units `[s, m, kg, A, K, mol, cd]`.
22pub type Dimension = [i32; 7];
23
24/// The dimensionless dimension (all exponents zero).
25pub const DIMENSIONLESS: Dimension = [0; 7];
26/// Time `[s]`.
27pub const TIME: Dimension = [1, 0, 0, 0, 0, 0, 0];
28/// Length `[m]`.
29pub const LENGTH: Dimension = [0, 1, 0, 0, 0, 0, 0];
30/// Mass `[kg]`.
31pub const MASS: Dimension = [0, 0, 1, 0, 0, 0, 0];
32
33/// An exact rational (kept normalized: `den > 0`, `gcd(|num|, den) = 1`).
34#[derive(Clone, Copy)]
35struct Frac {
36    num: i64,
37    den: i64,
38}
39
40fn gcd(a: i64, b: i64) -> i64 {
41    let (mut a, mut b) = (a.abs(), b.abs());
42    while b != 0 {
43        let t = a % b;
44        a = b;
45        b = t;
46    }
47    a.max(1)
48}
49
50impl Frac {
51    fn new(num: i64, den: i64) -> Frac {
52        debug_assert!(den != 0);
53        let s = if den < 0 { -1 } else { 1 };
54        let (num, den) = (num * s, den * s);
55        let g = gcd(num, den);
56        Frac {
57            num: num / g,
58            den: den / g,
59        }
60    }
61    fn int(n: i64) -> Frac {
62        Frac { num: n, den: 1 }
63    }
64    fn zero() -> Frac {
65        Frac { num: 0, den: 1 }
66    }
67    fn is_zero(self) -> bool {
68        self.num == 0
69    }
70    fn sub(self, o: Frac) -> Frac {
71        Frac::new(self.num * o.den - o.num * self.den, self.den * o.den)
72    }
73    fn mul(self, o: Frac) -> Frac {
74        Frac::new(self.num * o.num, self.den * o.den)
75    }
76    fn div(self, o: Frac) -> Frac {
77        Frac::new(self.num * o.den, self.den * o.num)
78    }
79}
80
81/// Integer basis of the nullspace of the dimension matrix of `dims`.
82///
83/// Each returned vector `e` (length `dims.len()`) satisfies `∑ eᵢ · dim(xᵢ) = 0`, i.e. the monomial
84/// `∏ xᵢ^{eᵢ}` is dimensionless. The basis has `n − rank` vectors; for fully-dimensionally-independent
85/// inputs it is empty. Returned vectors are reduced (gcd 1) with a leading-positive sign convention.
86#[must_use]
87pub fn pi_groups(dims: &[Dimension]) -> Vec<Vec<i32>> {
88    let n = dims.len();
89    if n == 0 {
90        return Vec::new();
91    }
92    // Dimension matrix D: 7 rows (base units) × n cols (features), as exact rationals.
93    let mut m: Vec<Vec<Frac>> = (0..7)
94        .map(|r| (0..n).map(|c| Frac::int(i64::from(dims[c][r]))).collect())
95        .collect();
96
97    // Reduced row echelon form; record the pivot column of each row.
98    let rows = 7;
99    let mut pivot_col = vec![usize::MAX; rows];
100    let mut r = 0usize;
101    for c in 0..n {
102        if r >= rows {
103            break;
104        }
105        // Find a pivot in column c at or below row r.
106        let Some(p) = (r..rows).find(|&i| !m[i][c].is_zero()) else {
107            continue;
108        };
109        m.swap(r, p);
110        // Scale pivot row so the pivot is 1.
111        let pivot = m[r][c];
112        for cell in m[r].iter_mut() {
113            *cell = cell.div(pivot);
114        }
115        // Eliminate column c from all other rows (clone the pivot row to avoid aliasing).
116        let prow = m[r].clone();
117        for (i, mrow) in m.iter_mut().enumerate() {
118            if i != r && !mrow[c].is_zero() {
119                let factor = mrow[c];
120                for (cell, &pj) in mrow.iter_mut().zip(prow.iter()) {
121                    *cell = cell.sub(factor.mul(pj));
122                }
123            }
124        }
125        pivot_col[r] = c;
126        r += 1;
127    }
128
129    let pivots: Vec<usize> = pivot_col
130        .iter()
131        .copied()
132        .filter(|&c| c != usize::MAX)
133        .collect();
134    let is_pivot = |c: usize| pivots.contains(&c);
135
136    // One nullspace basis vector per free column.
137    let mut basis = Vec::new();
138    for free in (0..n).filter(|&c| !is_pivot(c)) {
139        let mut vec_q = vec![Frac::zero(); n];
140        vec_q[free] = Frac::int(1);
141        // For each pivot row with pivot column `pc`: x_pc = -m[row][free].
142        for (row, &pc) in pivot_col.iter().enumerate() {
143            if pc != usize::MAX {
144                vec_q[pc] = Frac::zero().sub(m[row][free]);
145            }
146        }
147        // Clear denominators → integers, reduce by gcd, fix sign (first nonzero positive).
148        let lcm = vec_q
149            .iter()
150            .fold(1i64, |acc, f| acc / gcd(acc, f.den) * f.den);
151        let mut ints: Vec<i32> = vec_q
152            .iter()
153            .map(|f| (f.num * (lcm / f.den)) as i32)
154            .collect();
155        let g = ints
156            .iter()
157            .fold(0i64, |acc, &v| gcd(acc, i64::from(v)))
158            .max(1) as i32;
159        for v in &mut ints {
160            *v /= g;
161        }
162        if let Some(&first) = ints.iter().find(|&&v| v != 0) {
163            if first < 0 {
164                for v in &mut ints {
165                    *v = -*v;
166                }
167            }
168        }
169        basis.push(ints);
170    }
171    basis
172}
173
174impl DataSet {
175    /// Reduce the features to their dimensionless Buckingham-π groups, given each feature's
176    /// [`Dimension`]. The new feature columns are the monomials `∏ xᵢ^{eᵢ}` for each π-group; the
177    /// target is left unchanged. The π-group exponent vectors are returned alongside.
178    ///
179    /// # Errors
180    /// Returns [`PhopError::ShapeMismatch`] if `feature_dims.len() != n_vars`, or
181    /// [`PhopError::NotConverged`] if the inputs are dimensionally independent (no π-groups exist).
182    pub fn to_dimensionless(&self, feature_dims: &[Dimension]) -> Result<(DataSet, Vec<Vec<i32>>)> {
183        let nv = self.n_vars();
184        if feature_dims.len() != nv {
185            return Err(PhopError::ShapeMismatch(format!(
186                "expected {nv} feature dimensions, got {}",
187                feature_dims.len()
188            )));
189        }
190        let groups = pi_groups(feature_dims);
191        if groups.is_empty() {
192            return Err(PhopError::NotConverged(
193                "no dimensionless π-groups: the features are dimensionally independent".to_string(),
194            ));
195        }
196        let n_rows = self.len();
197        let n_pi = groups.len();
198        let mut data = Vec::with_capacity(n_rows * n_pi);
199        for row in 0..n_rows {
200            for g in &groups {
201                let mut v = 1.0_f64;
202                for (j, &e) in g.iter().enumerate() {
203                    if e != 0 {
204                        v *= self.x[[row, j]].powi(e);
205                    }
206                }
207                data.push(v);
208            }
209        }
210        let x = Array2::from_shape_vec((n_rows, n_pi), data)
211            .map_err(|e| PhopError::ShapeMismatch(e.to_string()))?;
212        let feature_names = (0..n_pi).map(|k| format!("pi{k}")).collect();
213        let ds = DataSet {
214            x,
215            y: self.y.clone(),
216            feature_names,
217            target_name: self.target_name.clone(),
218        };
219        Ok((ds, groups))
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use scirs2_core::ndarray::{Array1, Array2};
227
228    #[test]
229    fn pi_group_for_velocity_time_length() {
230        // x0 = length [m], x1 = time [s], x2 = velocity [m/s]. The dimensionless group is
231        // x2 · x1 / x0  (velocity·time/length), i.e. exponents (x0:-1, x1:+1, x2:+1).
232        let dims = [
233            LENGTH,                 // x0
234            TIME,                   // x1
235            [-1, 1, 0, 0, 0, 0, 0], // x2 = m·s^-1
236        ];
237        let groups = pi_groups(&dims);
238        assert_eq!(
239            groups.len(),
240            1,
241            "expected exactly one π-group, got {groups:?}"
242        );
243        // The group must be proportional to (-1, +1, +1) (sign-normalized to leading positive).
244        let g = &groups[0];
245        // leading-positive convention → first nonzero is +1 (x0 coeff = -1 would flip): so expect
246        // either (-1,1,1) or its negation normalized; check the *ratios*.
247        let nonzero: Vec<i32> = g.clone();
248        // x0:x1:x2 should be -1:1:1 up to sign.
249        assert!(
250            nonzero == vec![-1, 1, 1] || nonzero == vec![1, -1, -1],
251            "unexpected π-group {nonzero:?}"
252        );
253    }
254
255    #[test]
256    fn to_dimensionless_yields_constant_group() {
257        // Build data where x2 = x0 / x1 exactly, so the group x2·x1/x0 = 1 for every row.
258        let n = 12;
259        let mut xd = Vec::new();
260        for i in 0..n {
261            let x0 = 1.0 + i as f64; // length
262            let x1 = 2.0 + i as f64; // time
263            let x2 = x0 / x1; // velocity
264            xd.extend_from_slice(&[x0, x1, x2]);
265        }
266        let x = Array2::from_shape_vec((n, 3), xd).unwrap();
267        let y = Array1::from(vec![0.0; n]);
268        let ds = DataSet::from_arrays(x, y).unwrap();
269
270        let dims = [LENGTH, TIME, [-1, 1, 0, 0, 0, 0, 0]];
271        let (red, groups) = ds.to_dimensionless(&dims).unwrap();
272        assert_eq!(groups.len(), 1);
273        assert_eq!(red.n_vars(), 1);
274        // Every π value should equal 1 (x2·x1/x0 = 1), regardless of the basis sign.
275        for r in 0..n {
276            let v = red.x[[r, 0]];
277            assert!((v - 1.0).abs() < 1e-9, "π not constant: {v}");
278        }
279    }
280
281    #[test]
282    fn independent_dims_have_no_group() {
283        // length and time are dimensionally independent → no dimensionless group.
284        assert!(pi_groups(&[LENGTH, TIME]).is_empty());
285    }
286}