phop_core/
discoverer.rs

1//! The top-level discovery engine.
2//!
3//! [`Discoverer::fit`] searches over EML tree topologies and, for each, fits real-valued
4//! constants by gradient descent (Layer A forward + Adam), scores candidates on
5//! (accuracy, complexity), and returns the Pareto front.
6//!
7//! Topology proposal currently uses `oxieml`'s structural enumeration (a strong, exact
8//! baseline at shallow depth). The differentiable Gumbel-Softmax topology relaxation
9//! (Layer B / M2) plugs in here as an alternative proposer for deeper search.
10
11use crate::affine::discover_affine_pareto;
12use crate::any_solution::{merge_pareto, AnySolution};
13use crate::config::Config;
14use crate::dataset::DataSet;
15use crate::error::{PhopError, Result};
16use crate::fit::{fit_constants, n_constants};
17use crate::forest::eval_tree;
18use crate::pareto::ParetoFront;
19use crate::solution::Solution;
20use oxieml::symreg::{dedupe_by_semantics, enumerate_topologies};
21use oxieml::{EmlNode, EmlTree};
22use std::sync::Arc;
23
24/// Crate-local fitting interface implemented by every GPU engine, so the discoverer can dispatch
25/// over `Box<dyn GpuFitEngine>` regardless of which backend feature(s) are compiled in. Each impl
26/// is a one-line forward to the engine's inherent `fit_constants`, so enabling exactly one backend
27/// is behaviorally identical to calling that engine directly.
28#[cfg(any(feature = "gpu-cuda", feature = "gpu-metal"))]
29pub(crate) trait GpuFitEngine {
30    /// Coarse single-precision constant fit on the GPU (the CPU LM polish sharpens it to f64).
31    fn gpu_fit_constants(
32        &self,
33        template: &EmlTree,
34        ds: &DataSet,
35        learning_rate: f64,
36        max_epochs: usize,
37    ) -> Result<(EmlTree, f64)>;
38}
39
40#[cfg(feature = "gpu-cuda")]
41impl GpuFitEngine for crate::gpu::CudaEmlEngine {
42    fn gpu_fit_constants(
43        &self,
44        template: &EmlTree,
45        ds: &DataSet,
46        learning_rate: f64,
47        max_epochs: usize,
48    ) -> Result<(EmlTree, f64)> {
49        self.fit_constants(template, ds, learning_rate, max_epochs)
50    }
51}
52
53#[cfg(feature = "gpu-metal")]
54impl GpuFitEngine for crate::metal::MetalEmlEngine {
55    fn gpu_fit_constants(
56        &self,
57        template: &EmlTree,
58        ds: &DataSet,
59        learning_rate: f64,
60        max_epochs: usize,
61    ) -> Result<(EmlTree, f64)> {
62        self.fit_constants(template, ds, learning_rate, max_epochs)
63    }
64}
65
66/// Maximum depth used for exact structural enumeration (deeper search is the M2 path).
67const MAX_ENUM_DEPTH: usize = 3;
68/// Hard cap on enumerated topologies considered, to bound wall-clock.
69const MAX_CANDIDATES: usize = 512;
70/// Maximum number of free constants a candidate may have to be worth fitting.
71const MAX_FIT_CONSTS: usize = 6;
72/// Number of most-promising topologies whose constants are actually fitted by Adam.
73///
74/// Constant fitting is the expensive step (one autograd loop per candidate). We rank
75/// topologies by their *raw* (unfitted) MSE and fit only the best few, which is both far
76/// cheaper and well-targeted. Joint fitting of the whole population in one graph is the
77/// M2/M4 (tensorized) upgrade.
78const FIT_BUDGET: usize = 24;
79
80/// Discovery engine configured by a [`Config`].
81#[derive(Debug, Clone)]
82pub struct Discoverer {
83    cfg: Config,
84}
85
86/// Replace every `One` leaf with a free `Const(1.0)`, turning literal ones into
87/// fittable parameters while preserving structure and variable placement.
88fn constantize(node: &EmlNode) -> Arc<EmlNode> {
89    match node {
90        EmlNode::One => Arc::new(EmlNode::Const(1.0)),
91        EmlNode::Const(c) => Arc::new(EmlNode::Const(*c)),
92        EmlNode::Var(i) => Arc::new(EmlNode::Var(*i)),
93        EmlNode::Eml { left, right } => Arc::new(EmlNode::Eml {
94            left: constantize(left),
95            right: constantize(right),
96        }),
97    }
98}
99
100/// Whether a solution's rendered LaTeX is a finite, valid closed form.
101fn latex_is_finite(sol: &Solution) -> bool {
102    let tex = sol.latex();
103    let lower = tex.to_ascii_lowercase();
104    !lower.contains("nan") && !lower.contains("inf")
105}
106
107/// Cheaply score one raw topology: forward-evaluate it and pair its MSE with a [`Solution`], or
108/// `None` if the forward pass fails or is non-finite.
109fn eval_one(topo: &EmlTree, ds: &DataSet) -> Option<(f64, Solution)> {
110    let pred = eval_tree(topo, &ds.x).ok()?;
111    let m = crate::fit::mse(&pred, &ds.y);
112    m.is_finite().then(|| (m, Solution::new(topo.clone(), m)))
113}
114
115/// Score every topology. With the `parallel` feature this fans the independent evaluations across
116/// cores via `scirs2-core`'s order-preserving parallel map; otherwise it is a plain sequential map.
117/// Either way the output order matches `topos`, keeping discovery deterministic.
118#[cfg(feature = "parallel")]
119fn raw_evals(topos: &[EmlTree], ds: &DataSet) -> Vec<Option<(f64, Solution)>> {
120    scirs2_core::parallel_ops::parallel_map(topos, |topo| eval_one(topo, ds))
121}
122
123/// Sequential fallback used when the `parallel` feature is disabled.
124#[cfg(not(feature = "parallel"))]
125fn raw_evals(topos: &[EmlTree], ds: &DataSet) -> Vec<Option<(f64, Solution)>> {
126    topos.iter().map(|topo| eval_one(topo, ds)).collect()
127}
128
129impl Discoverer {
130    /// Create a discoverer with the given configuration.
131    #[must_use]
132    pub fn new(cfg: Config) -> Self {
133        Self { cfg }
134    }
135
136    /// Run discovery on a dataset, returning the Pareto front of candidate expressions.
137    ///
138    /// # Errors
139    /// Returns [`PhopError::NotConverged`] if no finite-scoring candidate is found.
140    pub fn fit(&self, ds: &DataSet) -> Result<ParetoFront> {
141        if ds.is_empty() {
142            return Err(PhopError::ShapeMismatch("empty dataset".to_string()));
143        }
144        let n_vars = ds.n_vars();
145        let depth = self.cfg.max_depth.clamp(1, MAX_ENUM_DEPTH);
146
147        let mut topologies = dedupe_by_semantics(enumerate_topologies(depth, n_vars));
148        topologies.truncate(MAX_CANDIDATES);
149
150        let mut candidates: Vec<Solution> = Vec::new();
151
152        // 1. Cheaply evaluate every raw topology and record its MSE for ranking. This sweep is
153        //    embarrassingly parallel (each topology is scored independently); with the `parallel`
154        //    feature it fans out across cores while preserving input order, so the ranking — and
155        //    therefore the whole discovery — stays deterministic.
156        let mut ranked: Vec<(f64, usize)> = Vec::new();
157        for (i, eval) in raw_evals(&topologies, ds).into_iter().enumerate() {
158            if let Some((m, sol)) = eval {
159                candidates.push(sol);
160                ranked.push((m, i));
161            }
162        }
163
164        // Optional GPU backend for the (expensive) constant-fitting step. The GPU produces a coarse
165        // single-precision fit which the CPU Levenberg–Marquardt polish below sharpens to f64, so
166        // the final front stays exact regardless of backend. Falls back to CPU silently.
167        #[cfg(any(feature = "gpu-cuda", feature = "gpu-metal"))]
168        let gpu_engine: Option<Box<dyn GpuFitEngine>> = self.select_gpu_engine();
169
170        // 2. Fit constants only on the most promising topologies (bounded budget).
171        ranked.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
172        let mut fits_done = 0;
173        for (_, i) in ranked {
174            if fits_done >= FIT_BUDGET {
175                break;
176            }
177            let ct = EmlTree::from_node(constantize(&topologies[i].root));
178            let nc = n_constants(&ct);
179            if (1..=MAX_FIT_CONSTS).contains(&nc) {
180                let fitted_res = {
181                    #[cfg(any(feature = "gpu-cuda", feature = "gpu-metal"))]
182                    {
183                        self.fit_one(&ct, ds, gpu_engine.as_deref())
184                    }
185                    #[cfg(not(any(feature = "gpu-cuda", feature = "gpu-metal")))]
186                    {
187                        fit_constants(&ct, ds, &self.cfg)
188                    }
189                };
190                if let Ok((fitted, _)) = fitted_res {
191                    // Sharpen the coarse Adam constants with Levenberg–Marquardt, then snap to
192                    // named constants (π, e, …) when that does not worsen the fit.
193                    let (polished, _) = crate::polish::polish_constants(&fitted, ds, 40);
194                    let (snapped, m) = crate::polish::snap_constants(&polished, ds, 0.02);
195                    let sol = Solution::new(snapped, m);
196                    // Reject fits whose *symbolic* form is degenerate (e.g. a negative
197                    // constant in a `ln` position): the guarded numeric eval stays finite,
198                    // but the closed form is not a valid elementary expression.
199                    if m.is_finite() && latex_is_finite(&sol) {
200                        candidates.push(sol);
201                    }
202                }
203                fits_done += 1;
204            }
205        }
206
207        if candidates.is_empty() {
208            return Err(PhopError::NotConverged(
209                "no finite-scoring candidate expression found".to_string(),
210            ));
211        }
212        Ok(ParetoFront::from_candidates(candidates))
213    }
214
215    /// Choose the GPU fitting engine for the configured backend, or `None` to fit on the CPU.
216    /// Only the backend whose feature is compiled in can be selected; a missing device falls back
217    /// to `None` (CPU). With `--all-features` both arms are present and `cfg.backend` disambiguates.
218    #[cfg(any(feature = "gpu-cuda", feature = "gpu-metal"))]
219    fn select_gpu_engine(&self) -> Option<Box<dyn GpuFitEngine>> {
220        use crate::config::Backend;
221        match self.cfg.backend {
222            #[cfg(feature = "gpu-cuda")]
223            Backend::Cuda if crate::gpu::cuda_available() => crate::gpu::CudaEmlEngine::new()
224                .ok()
225                .map(|e| Box::new(e) as Box<dyn GpuFitEngine>),
226            #[cfg(feature = "gpu-metal")]
227            Backend::Metal if crate::metal::metal_available() => {
228                crate::metal::MetalEmlEngine::new()
229                    .ok()
230                    .map(|e| Box::new(e) as Box<dyn GpuFitEngine>)
231            }
232            _ => None,
233        }
234    }
235
236    /// Fit the constant leaves of `ct` on the GPU when an engine is supplied, else on the CPU.
237    #[cfg(any(feature = "gpu-cuda", feature = "gpu-metal"))]
238    fn fit_one(
239        &self,
240        ct: &EmlTree,
241        ds: &DataSet,
242        gpu: Option<&dyn GpuFitEngine>,
243    ) -> Result<(EmlTree, f64)> {
244        match gpu {
245            Some(engine) => {
246                engine.gpu_fit_constants(ct, ds, self.cfg.learning_rate, self.cfg.max_epochs)
247            }
248            None => fit_constants(ct, ds, &self.cfg),
249        }
250    }
251}
252
253/// Robust "just works" discovery: a **meta-ensemble** that runs algorithmically diverse searches
254/// and returns the **merged** Pareto front — the best across methods and depths without the caller
255/// picking one. It combines phop's own searches (structural enumeration, Gumbel-Softmax topology,
256/// gated depth learning) with the cool-japan `oxieml` symbolic-regression engine (its GA/beam/MCTS
257/// strategies). All `oxieml` candidates are re-scored with phop's evaluator so the front is
258/// consistent. This is the shape-mixture idea in spirit, and the diversity is something no single
259/// monolithic SR algorithm provides.
260///
261/// # Errors
262/// Returns [`PhopError::NotConverged`] only if *every* method fails to produce a finite solution.
263pub fn discover_auto(ds: &DataSet, cfg: &Config) -> Result<ParetoFront> {
264    if ds.is_empty() {
265        return Err(PhopError::ShapeMismatch("empty dataset".to_string()));
266    }
267    let mut cands: Vec<Solution> = Vec::new();
268    if let Ok(f) = Discoverer::new(cfg.clone()).fit(ds) {
269        cands.extend(f.solutions);
270    }
271    if let Ok(f) = crate::gumbel::discover_gumbel(ds, cfg) {
272        cands.extend(f.solutions);
273    }
274    if let Ok(f) = crate::gated::discover_gated(ds, cfg) {
275        cands.extend(f.solutions);
276    }
277    cands.extend(oxieml_symreg_candidates(ds));
278    if cands.is_empty() {
279        return Err(PhopError::NotConverged(
280            "no discovery method produced a finite solution".to_string(),
281        ));
282    }
283    Ok(ParetoFront::from_candidates(cands))
284}
285
286/// The **full** meta-ensemble: [`discover_auto`]'s EML-tree front merged with the **rich-leaf affine
287/// engine** ([`crate::discover_affine_pareto`]) into one Pareto front over both representations.
288///
289/// This is the discovery the CLI `auto` method runs, so the default search *reaches the affine
290/// engine* — the one that recovers product / power / ratio laws the shallow EML core cannot assemble.
291/// Without it, `auto` silently excludes phop's strongest recovery path. `max_internal` bounds the
292/// affine eml-skeleton size and `cand_cap` its candidate pool. Returns solutions sorted by MSE
293/// ascending (most accurate first); see [`AnySolution`].
294///
295/// # Errors
296/// Returns [`PhopError::ShapeMismatch`] only for an empty dataset. Individual searches that fail are
297/// omitted; if *every* contributor is empty the returned vector is empty (not an error).
298pub fn discover_auto_all(
299    ds: &DataSet,
300    cfg: &Config,
301    max_internal: usize,
302    cand_cap: usize,
303) -> Result<Vec<AnySolution>> {
304    if ds.is_empty() {
305        return Err(PhopError::ShapeMismatch("empty dataset".to_string()));
306    }
307    let mut cands: Vec<AnySolution> = Vec::new();
308    // EML-tree core (enumerate + Gumbel + gated + oxieml-symreg). It only errors when every method
309    // fails — in that case proceed with the affine engine alone.
310    if let Ok(front) = discover_auto(ds, cfg) {
311        cands.extend(front.solutions.into_iter().map(AnySolution::Eml));
312    }
313    // Rich-leaf affine engine (the product/power/ratio recovery path).
314    cands.extend(
315        discover_affine_pareto(&ds.x, &ds.y, max_internal, cand_cap)
316            .into_iter()
317            .map(AnySolution::Affine),
318    );
319    Ok(merge_pareto(cands))
320}
321
322/// Run the `oxieml` symbolic-regression engine and return its Pareto formulas as phop solutions,
323/// re-scored with phop's evaluator for a consistent front. Failures yield an empty vector (the
324/// meta-ensemble simply proceeds without this contributor).
325fn oxieml_symreg_candidates(ds: &DataSet) -> Vec<Solution> {
326    use oxieml::symreg::{SymRegConfig, SymRegEngine};
327    let inputs: Vec<Vec<f64>> = (0..ds.len())
328        .map(|i| (0..ds.n_vars()).map(|j| ds.x[[i, j]]).collect())
329        .collect();
330    let targets: Vec<f64> = ds.y.to_vec();
331    // Dependency searches can emit debug output; suppress it during the call.
332    let _silencer = crate::silence::SilenceStdout::new();
333    let engine = SymRegEngine::new(SymRegConfig::quick());
334    let result = engine.discover_pareto(&inputs, &targets, ds.n_vars());
335    drop(_silencer);
336    match result {
337        Ok(formulas) => formulas
338            .into_iter()
339            .filter_map(|f| {
340                let pred = eval_tree(&f.eml_tree, &ds.x).ok()?;
341                let m = crate::fit::mse(&pred, &ds.y);
342                m.is_finite().then(|| Solution::new(f.eml_tree, m))
343            })
344            .collect(),
345        Err(_) => Vec::new(),
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use scirs2_core::ndarray::{Array1, Array2};
353
354    #[test]
355    fn discovers_exp() {
356        // Target: y = exp(x0), recoverable exactly as eml(x0, 1) at depth 1.
357        let xs: Vec<f64> = (0..30).map(|i| f64::from(i) * 0.1).collect();
358        let ys: Vec<f64> = xs.iter().map(|&x| x.exp()).collect();
359        let x = Array2::from_shape_vec((xs.len(), 1), xs).unwrap();
360        let y = Array1::from(ys);
361        let ds = DataSet::from_arrays(x, y).unwrap();
362
363        let cfg = Config::default().max_depth(1).max_epochs(300);
364        let front = Discoverer::new(cfg).fit(&ds).unwrap();
365        assert!(!front.is_empty());
366        let best = front.best().unwrap();
367        assert!(
368            best.mse < 1e-6,
369            "best mse = {} ({})",
370            best.mse,
371            best.pretty()
372        );
373    }
374
375    #[test]
376    fn enumerate_discovery_is_deterministic() {
377        // A fixed seed must produce an identical Pareto front (same size, same MSEs/complexities).
378        let xs: Vec<f64> = (1..=30).map(|i| f64::from(i) * 0.1).collect();
379        let ys: Vec<f64> = xs.iter().map(|&x| x.exp() - 2.0_f64.ln()).collect();
380        let x = Array2::from_shape_vec((xs.len(), 1), xs).unwrap();
381        let ds = DataSet::from_arrays(x, Array1::from(ys)).unwrap();
382
383        let cfg = Config::default().max_depth(2).max_epochs(200).seed(5);
384        let a = Discoverer::new(cfg.clone()).fit(&ds).unwrap();
385        let b = Discoverer::new(cfg).fit(&ds).unwrap();
386        assert_eq!(a.len(), b.len(), "front sizes differ across identical runs");
387        for (sa, sb) in a.solutions.iter().zip(&b.solutions) {
388            assert_eq!(sa.complexity, sb.complexity);
389            assert!(
390                (sa.mse - sb.mse).abs() < 1e-9,
391                "non-deterministic MSE: {} vs {}",
392                sa.mse,
393                sb.mse
394            );
395        }
396    }
397
398    #[test]
399    fn discover_auto_merges_methods_and_recovers_exp() {
400        // y = exp(x): the merged front (enumerate + gumbel + gated) must contain the exact law.
401        let xs: Vec<f64> = (0..24).map(|i| f64::from(i) * 0.1).collect();
402        let ys: Vec<f64> = xs.iter().map(|&x| x.exp()).collect();
403        let x = Array2::from_shape_vec((xs.len(), 1), xs).unwrap();
404        let ds = DataSet::from_arrays(x, Array1::from(ys)).unwrap();
405
406        // Tiny config so the three methods together stay fast.
407        let cfg = Config::default()
408            .max_depth(2)
409            .population(2)
410            .max_epochs(120)
411            .seed(0);
412        let front = discover_auto(&ds, &cfg).unwrap();
413        assert!(!front.is_empty());
414        assert!(
415            front.best().unwrap().mse < 1e-6,
416            "discover_auto did not recover exp: best mse = {}",
417            front.best().unwrap().mse
418        );
419    }
420
421    #[cfg(feature = "gpu-cuda")]
422    #[test]
423    fn discovers_exp_on_cuda_backend() {
424        use crate::config::Backend;
425        if !crate::gpu::cuda_available() {
426            eprintln!("skipping CUDA-backend test: no device");
427            return;
428        }
429        // y = exp(x0): GPU does the coarse constant fit, CPU LM polish sharpens to f64.
430        let xs: Vec<f64> = (0..30).map(|i| f64::from(i) * 0.1).collect();
431        let ys: Vec<f64> = xs.iter().map(|&x| x.exp()).collect();
432        let x = Array2::from_shape_vec((xs.len(), 1), xs).unwrap();
433        let ds = DataSet::from_arrays(x, Array1::from(ys)).unwrap();
434
435        let cfg = Config::default()
436            .max_depth(1)
437            .max_epochs(300)
438            .backend(Backend::Cuda);
439        let front = Discoverer::new(cfg).fit(&ds).unwrap();
440        let best = front.best().unwrap();
441        assert!(
442            best.mse < 1e-6,
443            "CUDA-backend best mse = {} ({})",
444            best.mse,
445            best.pretty()
446        );
447    }
448
449    #[cfg(all(target_os = "macos", feature = "gpu-metal"))]
450    #[test]
451    fn discovers_exp_on_metal_backend() {
452        use crate::config::Backend;
453        if !crate::metal::metal_available() {
454            eprintln!("skipping Metal-backend test: no device");
455            return;
456        }
457        // y = exp(x0): Metal does the coarse constant fit, CPU LM polish sharpens to f64.
458        let xs: Vec<f64> = (0..30).map(|i| f64::from(i) * 0.1).collect();
459        let ys: Vec<f64> = xs.iter().map(|&x| x.exp()).collect();
460        let x = Array2::from_shape_vec((xs.len(), 1), xs).unwrap();
461        let y = Array1::from(ys);
462        let ds = DataSet::from_arrays(x, y).unwrap();
463
464        let cfg = Config::default()
465            .max_depth(1)
466            .max_epochs(300)
467            .backend(Backend::Metal);
468        let front = Discoverer::new(cfg).fit(&ds).unwrap();
469        let best = front.best().unwrap();
470        assert!(
471            best.mse < 1e-6,
472            "Metal-backend best mse = {} ({})",
473            best.mse,
474            best.pretty()
475        );
476    }
477}
phop_core/discoverer.rs

phop_core/
discoverer.rs