Skip to main content

sim_lib_lang_genconf/
space.rs

1//! Shape-backed expression spaces for generative conformance.
2
3use std::sync::Arc;
4
5use sim_kernel::{Cx, Expr, NumberLiteral, ShapeDoc, Symbol};
6use sim_shape::{AnyShape, ExprKind, ExprKindShape, ListShape, OrShape, Shape};
7
8use crate::seed::r7rs_seed_corpus;
9
10/// A deterministic, sized enumerator of `Expr` graphs drawn from a
11/// Shape-described space.
12///
13/// The grammar shape is the membership oracle: an expression is in the space
14/// exactly when `grammar.check_expr` accepts it.
15pub struct ExprSpace {
16    grammar: Arc<dyn Shape>,
17    seed: Vec<Expr>,
18    atoms: Vec<Expr>,
19    max_depth: usize,
20}
21
22impl ExprSpace {
23    /// Builds the R7RS-small core expression space.
24    ///
25    /// The first space covers booleans, symbols, strings, integer literals, and
26    /// list compounds over those atoms, bounded by `max_depth`.
27    pub fn r7rs_core_space(max_depth: usize) -> Self {
28        Self {
29            grammar: r7rs_core_grammar(),
30            seed: r7rs_seed_corpus(),
31            atoms: r7rs_core_atoms(),
32            max_depth,
33        }
34    }
35
36    /// Builds the shared core expression space for codec-neutral round trips.
37    ///
38    /// This space uses the same grammar and atoms as [`ExprSpace::r7rs_core_space`],
39    /// but keeps the seed corpus to forms that general expression readers decode
40    /// back to the same `Expr` without syntax lowering.
41    pub fn core_round_trip_space(max_depth: usize) -> Self {
42        Self {
43            grammar: r7rs_core_grammar(),
44            seed: core_round_trip_seed_corpus(),
45            atoms: r7rs_core_atoms(),
46            max_depth,
47        }
48    }
49
50    /// Returns the Shape grammar used as this space's membership oracle.
51    pub fn grammar(&self) -> Arc<dyn Shape> {
52        Arc::clone(&self.grammar)
53    }
54
55    /// Returns the curated seed corpus this space contains.
56    pub fn seed_corpus(&self) -> Vec<Expr> {
57        self.seed.clone()
58    }
59
60    /// Returns a browsable description of this space's membership grammar.
61    pub fn describe_grammar(&self, cx: &mut Cx) -> sim_kernel::Result<ShapeDoc> {
62        self.grammar.describe(cx)
63    }
64
65    /// Returns the maximum expression depth used by enumeration.
66    pub fn max_depth(&self) -> usize {
67        self.max_depth
68    }
69
70    /// Returns true when `expr` is a member of this space.
71    pub fn contains(&self, cx: &mut Cx, expr: &Expr) -> bool {
72        matches!(self.grammar.check_expr(cx, expr), Ok(matched) if matched.accepted)
73    }
74
75    /// Deterministically enumerates distinct in-space expressions in size order.
76    ///
77    /// Enumeration is stable for the same `(space, budget)` input.
78    pub fn enumerate(&self, budget: usize) -> Vec<Expr> {
79        let mut out = Vec::new();
80        for seed in &self.seed {
81            push_unique(&mut out, seed.clone(), budget);
82        }
83        for atom in &self.atoms {
84            push_unique(&mut out, atom.clone(), budget);
85        }
86
87        let mut depth = 1;
88        while depth < self.max_depth && out.len() < budget {
89            let frontier = out.clone();
90            for head in &frontier {
91                for tail in &self.atoms {
92                    if out.len() >= budget {
93                        break;
94                    }
95                    push_unique(
96                        &mut out,
97                        Expr::List(vec![head.clone(), tail.clone()]),
98                        budget,
99                    );
100                }
101            }
102            depth += 1;
103        }
104        out.truncate(budget);
105        out
106    }
107}
108
109/// Builds the canonical R7RS-small core Shape grammar.
110pub fn r7rs_core_grammar() -> Arc<dyn Shape> {
111    Arc::new(OrShape::new(vec![
112        Arc::new(ExprKindShape::new(ExprKind::Bool)),
113        Arc::new(ExprKindShape::new(ExprKind::Symbol)),
114        Arc::new(ExprKindShape::new(ExprKind::String)),
115        Arc::new(ExprKindShape::new(ExprKind::Number)),
116        Arc::new(ListShape::with_rest(Vec::new(), Arc::new(AnyShape))),
117    ]))
118}
119
120fn push_unique(out: &mut Vec<Expr>, expr: Expr, budget: usize) {
121    if out.len() < budget && !out.iter().any(|existing| existing.canonical_eq(&expr)) {
122        out.push(expr);
123    }
124}
125
126fn r7rs_core_atoms() -> Vec<Expr> {
127    vec![
128        Expr::Bool(true),
129        Expr::Bool(false),
130        Expr::Symbol(Symbol::new("answer")),
131        Expr::String("sim".to_owned()),
132        Expr::Number(NumberLiteral {
133            domain: Symbol::qualified("numbers", "i64"),
134            canonical: "1".to_owned(),
135        }),
136    ]
137}
138
139fn core_round_trip_seed_corpus() -> Vec<Expr> {
140    vec![
141        Expr::Bool(true),
142        Expr::Bool(false),
143        Expr::Symbol(Symbol::new("answer")),
144        Expr::String("sim".to_owned()),
145    ]
146}
147
148#[cfg(test)]
149mod tests {
150    use sim_kernel::testing::bare_cx as cx;
151
152    use super::*;
153
154    #[test]
155    fn r7rs_core_enumeration_is_stable_and_in_space() {
156        let mut cx = cx();
157        let space = ExprSpace::r7rs_core_space(3);
158        let first = space.enumerate(64);
159        let second = space.enumerate(64);
160
161        assert_eq!(first, second);
162        assert!(first.len() > 5);
163        for expr in first {
164            assert!(space.contains(&mut cx, &expr), "out-of-space: {expr:?}");
165        }
166    }
167
168    #[test]
169    fn grammar_description_is_browsable() {
170        let mut cx = cx();
171        let space = ExprSpace::r7rs_core_space(3);
172
173        let doc = space.describe_grammar(&mut cx).unwrap();
174
175        assert_eq!(doc.name, "or shape");
176        assert!(doc.details.iter().any(|detail| detail == "expr-kind bool"));
177        assert!(doc.details.iter().any(|detail| detail == "list shape"));
178    }
179
180    #[test]
181    fn grammar_tests_enumerated_exprs_are_all_in_space() {
182        let mut cx = cx();
183        let space = ExprSpace::r7rs_core_space(3);
184
185        for expr in space.enumerate(128) {
186            assert!(space.contains(&mut cx, &expr), "out-of-space: {expr:?}");
187        }
188        for seed in space.seed_corpus() {
189            assert!(
190                space.contains(&mut cx, &seed),
191                "seed not in space: {seed:?}"
192            );
193        }
194    }
195
196    #[test]
197    fn round_trip_space_omits_lowering_sensitive_quote_seed() {
198        let space = ExprSpace::core_round_trip_space(3);
199        let seed = space.seed_corpus();
200
201        assert!(seed.iter().any(|expr| matches!(expr, Expr::Bool(true))));
202        assert!(!seed.iter().any(|expr| matches!(
203            expr,
204            Expr::List(items)
205                if items.first() == Some(&Expr::Symbol(Symbol::new("quote")))
206        )));
207    }
208}