phop_core/distill.rs
1//! Layer D — symbolic distillation of a discovered EML tree.
2//!
3//! A raw EML tree is a composition of a single primitive (`eml(a, b) = exp(a) − ln(b)`). To be
4//! useful as a *discovered law* it must be turned into a recognizable closed form and emitted in
5//! the formats scientists actually use. This module is the single entry point for that:
6//!
7//! 1. **Canonicalize** by lowering the EML tree to the general elementary-function algebra and
8//! running `oxieml`'s simplifier (`tree.lower().simplify()`), which folds `ln(1) = 0`,
9//! `exp(ln(x)) = x`, constant arithmetic, etc. — so `eml(x, 1)` distills to `e^{x}`.
10//! 2. **Render** the canonical form to LaTeX and a pretty math string, and generate executable
11//! code in Rust, NumPy, and SymPy directly from the AST.
12//!
13//! Named-constant *snapping* (π, e, √2, …) happens earlier, during fitting ([`crate::polish`]),
14//! so the constants reaching distillation are already recognizable where possible.
15
16use crate::codegen;
17use oxieml::EmlTree;
18
19/// A discovered expression rendered into every output format phop supports.
20#[derive(Debug, Clone)]
21pub struct Distilled {
22 /// Canonical LaTeX (after lowering + algebraic simplification).
23 pub latex: String,
24 /// Canonical pretty math string (e.g. `exp(x0) - ln(2)`), simplified.
25 pub pretty: String,
26 /// A standalone Rust function `f(x0, …) -> f64`.
27 pub rust: String,
28 /// A NumPy-compatible Python lambda.
29 pub numpy: String,
30 /// A SymPy expression string (with a leading `symbols(...)` hint comment).
31 pub sympy: String,
32 /// Structural complexity (EML node count) of the original tree.
33 pub complexity: usize,
34}
35
36/// Canonical LaTeX for a tree: lower to the elementary algebra and simplify before rendering.
37#[must_use]
38pub fn canonical_latex(tree: &EmlTree) -> String {
39 tree.lower().simplify().to_latex()
40}
41
42/// Canonical pretty math string for a tree (simplified rich-AST display).
43#[must_use]
44pub fn canonical_pretty(tree: &EmlTree) -> String {
45 format!("{}", tree.lower().simplify())
46}
47
48/// Distill a tree into all supported output formats.
49#[must_use]
50pub fn distill(tree: &EmlTree) -> Distilled {
51 Distilled {
52 latex: canonical_latex(tree),
53 pretty: canonical_pretty(tree),
54 rust: codegen::rust_code(tree),
55 numpy: codegen::numpy_code(tree),
56 sympy: codegen::sympy_code(tree),
57 complexity: tree.size(),
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64
65 #[test]
66 fn exp_distills_to_clean_forms() {
67 // eml(x, 1) = exp(x) - ln(1) = exp(x); canonicalization should drop the ln(1) term.
68 let tree = EmlTree::eml(&EmlTree::var(0), &EmlTree::one());
69 let d = distill(&tree);
70 // LaTeX simplifies to e^{x_0} with no leftover "ln" of one.
71 assert!(
72 d.latex.contains("e^") || d.latex.contains("exp"),
73 "latex: {}",
74 d.latex
75 );
76 assert!(
77 !d.latex.contains("\\ln\\left(1"),
78 "ln(1) not simplified: {}",
79 d.latex
80 );
81 // Every code target is populated.
82 assert!(d.rust.contains("fn f("));
83 assert!(d.numpy.contains("np.exp"));
84 assert!(d.sympy.contains("exp("));
85 assert_eq!(d.complexity, tree.size());
86 }
87
88 #[test]
89 fn canonical_latex_matches_solution_latex() {
90 // distill's canonical LaTeX is the same path Solution::latex uses.
91 let tree = oxieml::Canonical::exp(&EmlTree::var(0));
92 assert_eq!(canonical_latex(&tree), tree.lower().simplify().to_latex());
93 }
94}