Skip to main content

phop_core/
distill.rs

1//! Layer D — symbolic distillation of a discovered EML tree.
2//!
3//! A raw EML tree is a composition of a single primitive (`eml(a, b) = exp(a) − ln(b)`). To be
4//! useful as a *discovered law* it must be turned into a recognizable closed form and emitted in
5//! the formats scientists actually use. This module is the single entry point for that:
6//!
7//! 1. **Canonicalize** by lowering the EML tree to the general elementary-function algebra and
8//!    running `oxieml`'s simplifier (`tree.lower().simplify()`), which folds `ln(1) = 0`,
9//!    `exp(ln(x)) = x`, constant arithmetic, etc. — so `eml(x, 1)` distills to `e^{x}`.
10//! 2. **Render** the canonical form to LaTeX and a pretty math string, and generate executable
11//!    code in Rust, NumPy, and SymPy directly from the AST.
12//!
13//! Named-constant *snapping* (π, e, √2, …) happens earlier, during fitting ([`crate::polish`]),
14//! so the constants reaching distillation are already recognizable where possible.
15
16use crate::codegen;
17use oxieml::EmlTree;
18
19/// A discovered expression rendered into every output format phop supports.
20#[derive(Debug, Clone)]
21pub struct Distilled {
22    /// Canonical LaTeX (after lowering + algebraic simplification).
23    pub latex: String,
24    /// Canonical pretty math string (e.g. `exp(x0) - ln(2)`), simplified.
25    pub pretty: String,
26    /// A standalone Rust function `f(x0, …) -> f64`.
27    pub rust: String,
28    /// A NumPy-compatible Python lambda.
29    pub numpy: String,
30    /// A SymPy expression string (with a leading `symbols(...)` hint comment).
31    pub sympy: String,
32    /// Structural complexity (EML node count) of the original tree.
33    pub complexity: usize,
34}
35
36/// Canonical LaTeX for a tree: lower to the elementary algebra and simplify before rendering.
37#[must_use]
38pub fn canonical_latex(tree: &EmlTree) -> String {
39    tree.lower().simplify().to_latex()
40}
41
42/// Canonical pretty math string for a tree (simplified rich-AST display).
43#[must_use]
44pub fn canonical_pretty(tree: &EmlTree) -> String {
45    format!("{}", tree.lower().simplify())
46}
47
48/// Distill a tree into all supported output formats.
49#[must_use]
50pub fn distill(tree: &EmlTree) -> Distilled {
51    Distilled {
52        latex: canonical_latex(tree),
53        pretty: canonical_pretty(tree),
54        rust: codegen::rust_code(tree),
55        numpy: codegen::numpy_code(tree),
56        sympy: codegen::sympy_code(tree),
57        complexity: tree.size(),
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64
65    #[test]
66    fn exp_distills_to_clean_forms() {
67        // eml(x, 1) = exp(x) - ln(1) = exp(x); canonicalization should drop the ln(1) term.
68        let tree = EmlTree::eml(&EmlTree::var(0), &EmlTree::one());
69        let d = distill(&tree);
70        // LaTeX simplifies to e^{x_0} with no leftover "ln" of one.
71        assert!(
72            d.latex.contains("e^") || d.latex.contains("exp"),
73            "latex: {}",
74            d.latex
75        );
76        assert!(
77            !d.latex.contains("\\ln\\left(1"),
78            "ln(1) not simplified: {}",
79            d.latex
80        );
81        // Every code target is populated.
82        assert!(d.rust.contains("fn f("));
83        assert!(d.numpy.contains("np.exp"));
84        assert!(d.sympy.contains("exp("));
85        assert_eq!(d.complexity, tree.size());
86    }
87
88    #[test]
89    fn canonical_latex_matches_solution_latex() {
90        // distill's canonical LaTeX is the same path Solution::latex uses.
91        let tree = oxieml::Canonical::exp(&EmlTree::var(0));
92        assert_eq!(canonical_latex(&tree), tree.lower().simplify().to_latex());
93    }
94}