Skip to main content

shifty_engine/
enumerate.rs

1//! The enumeration reference driver (`docs/07-repair-drivers.md` §4) and the
2//! `candidates` helper (`docs/06-repair.md` §3.3).
3//!
4//! This is the simplest driver: it walks the [`RepairTree`]'s finite choices —
5//! `Any` branches, `Repeat` counts, and per-hole candidate terms drawn from the
6//! data graph (reuse) or minted fresh — and returns the first plan whose `ΔG`
7//! passes the [`gate`]. It is primarily a correctness check on the IR and the
8//! gate; it cannot invent novel literals (those holes are filled only by reuse).
9//!
10//! Like every driver, it decides nothing the library doesn't expose: it only
11//! composes [`instantiate`], [`candidates`], and [`gate`].
12
13use crate::gate::{RepairOutcome, apply, gate};
14use crate::path::term_of;
15use crate::synthesize::synthesize;
16use crate::validate::NonStratifiable;
17use crate::value::value_type_holds;
18use crate::witness::witness_violations;
19use oxrdf::{BlankNode, Graph, Term};
20use shifty_algebra::Schema;
21use shifty_repair::{GraphDelta, HoleConstraint, NodeId, Plan, RepairTree, instantiate};
22use std::collections::{HashMap, HashSet};
23
24/// Knobs for the enumeration search.
25#[derive(Debug, Clone, Copy)]
26pub struct EnumOptions {
27    /// Maximum number of gated candidate `ΔG`s before giving up.
28    pub budget: usize,
29    /// Maximum candidate terms tried per hole (bounds branching).
30    pub max_candidates: usize,
31}
32
33impl Default for EnumOptions {
34    fn default() -> Self {
35        Self {
36            budget: 10_000,
37            max_candidates: 64,
38        }
39    }
40}
41
42/// A repair the enumeration driver found and verified.
43#[derive(Debug, Clone)]
44pub struct RepairSolution {
45    pub plan: Plan,
46    pub delta: GraphDelta,
47    pub outcome: RepairOutcome,
48}
49
50/// Search for the first sound, progress-making repair of `tree` against
51/// `(data, schema)`. `Ok(None)` means none was found within the budget.
52pub fn enumerate_repair(
53    tree: &RepairTree,
54    data: &Graph,
55    schema: &Schema,
56    opts: EnumOptions,
57) -> Result<Option<RepairSolution>, NonStratifiable> {
58    let mut choices = HashMap::new();
59    index_choices(tree, &mut choices);
60    let mut budget = opts.budget;
61    solve(
62        tree,
63        data,
64        schema,
65        &choices,
66        Plan::default(),
67        &mut budget,
68        opts,
69    )
70}
71
72/// The outcome of repairing a graph to a fixpoint.
73#[derive(Debug, Clone)]
74pub struct FixpointResult {
75    /// The repaired data graph (`G` with every applied `ΔG`).
76    pub graph: Graph,
77    /// The deltas applied, in order.
78    pub applied: Vec<GraphDelta>,
79    /// Iterations run (one per applied repair).
80    pub iterations: usize,
81    /// Violations still unrepaired when the loop stopped (0 ⟺ conforms).
82    pub remaining: usize,
83}
84
85/// The reference fixpoint driver (`docs/07-repair-drivers.md` §1): witness →
86/// synthesize → enumerate → gate → apply, re-witnessing after each accepted
87/// repair until the graph conforms or no focus admits a sound, progress-making
88/// repair. Each accepted `ΔG` is gated (introduces nothing) and strictly reduces
89/// the violation count, so the loop terminates.
90pub fn repair_to_fixpoint(
91    data: &Graph,
92    schema: &Schema,
93    opts: EnumOptions,
94) -> Result<FixpointResult, NonStratifiable> {
95    let mut graph = data.clone();
96    let mut applied = Vec::new();
97    let mut iterations = 0usize;
98    // A safety bound; progress guarantees far fewer iterations in practice.
99    let max_iterations = 100_000;
100
101    loop {
102        let witnesses = witness_violations(&graph, schema)?;
103        if witnesses.is_empty() {
104            return Ok(FixpointResult {
105                graph,
106                applied,
107                iterations,
108                remaining: 0,
109            });
110        }
111        if iterations >= max_iterations {
112            return Ok(FixpointResult {
113                remaining: witnesses.len(),
114                graph,
115                applied,
116                iterations,
117            });
118        }
119
120        let mut progressed = false;
121        for fw in &witnesses {
122            let tree = synthesize(&schema.arena, fw);
123            if let Some(sol) = enumerate_repair(&tree, &graph, schema, opts)? {
124                graph = apply(&graph, &sol.delta);
125                applied.push(sol.delta);
126                progressed = true;
127                break; // re-witness after each applied repair
128            }
129        }
130        if !progressed {
131            return Ok(FixpointResult {
132                remaining: witnesses.len(),
133                graph,
134                applied,
135                iterations,
136            });
137        }
138        iterations += 1;
139    }
140}
141
142#[derive(Clone, Copy)]
143enum Choice {
144    Any(usize),
145    Repeat(u64),
146}
147
148fn index_choices(tree: &RepairTree, out: &mut HashMap<NodeId, Choice>) {
149    match tree {
150        RepairTree::Any { id, children } => {
151            out.insert(*id, Choice::Any(children.len()));
152            for c in children {
153                index_choices(c, out);
154            }
155        }
156        RepairTree::Repeat { id, body, min, .. } => {
157            // Minimal repair: take exactly `min` instances.
158            out.insert(*id, Choice::Repeat(*min));
159            index_choices(body, out);
160        }
161        RepairTree::All { children, .. } => {
162            for c in children {
163                index_choices(c, out);
164            }
165        }
166        RepairTree::Noop(_) | RepairTree::Blocked(..) | RepairTree::Edits { .. } => {}
167    }
168}
169
170fn solve(
171    tree: &RepairTree,
172    data: &Graph,
173    schema: &Schema,
174    choices: &HashMap<NodeId, Choice>,
175    plan: Plan,
176    budget: &mut usize,
177    opts: EnumOptions,
178) -> Result<Option<RepairSolution>, NonStratifiable> {
179    if *budget == 0 {
180        return Ok(None);
181    }
182    let inst = instantiate(tree, &plan);
183
184    if let Some(node) = inst.open_choices.first().copied() {
185        return match choices.get(&node) {
186            Some(Choice::Any(n)) => {
187                for i in 0..*n {
188                    let mut p = plan.clone();
189                    p.branch.insert(node, i);
190                    if let Some(sol) = solve(tree, data, schema, choices, p, budget, opts)? {
191                        return Ok(Some(sol));
192                    }
193                }
194                Ok(None)
195            }
196            Some(Choice::Repeat(count)) => {
197                let mut p = plan;
198                p.count.insert(node, *count);
199                solve(tree, data, schema, choices, p, budget, opts)
200            }
201            None => Ok(None),
202        };
203    }
204
205    if let Some((hole, constraint)) = inst.open_holes.first().cloned() {
206        for cand in candidates(&constraint, data, opts.max_candidates) {
207            let mut p = plan.clone();
208            p.binding.insert(hole, cand);
209            if let Some(sol) = solve(tree, data, schema, choices, p, budget, opts)? {
210                return Ok(Some(sol));
211            }
212        }
213        return Ok(None);
214    }
215
216    // Fully resolved: gate it.
217    *budget -= 1;
218    let outcome = gate(data, schema, &inst.delta)?;
219    if outcome.is_progress() {
220        Ok(Some(RepairSolution {
221            plan,
222            delta: inst.delta,
223            outcome,
224        }))
225    } else {
226        Ok(None)
227    }
228}
229
230/// Existing terms in `data` that satisfy a hole's constraint, plus a fresh node
231/// where minting is allowed — truncated to `cap`. The contract places no
232/// requirement on where a binding comes from; this is the reuse-first default.
233pub fn candidates(constraint: &HoleConstraint, data: &Graph, cap: usize) -> Vec<Term> {
234    let mut out = match constraint {
235        HoleConstraint::Const(t) => vec![t.clone()],
236        HoleConstraint::OneOf(v) => v.clone(),
237        HoleConstraint::Fresh => vec![fresh()],
238        HoleConstraint::AnyNode => with_fresh(graph_nodes(data)),
239        HoleConstraint::ConformsTo(_) | HoleConstraint::ConformsToAll(_) => {
240            with_fresh(graph_nodes(data))
241        }
242        // Infinite domains: reuse only — enumeration cannot invent a novel literal.
243        HoleConstraint::Typed(t) => graph_terms(data)
244            .into_iter()
245            .filter(|x| value_type_holds(t, x))
246            .collect(),
247        HoleConstraint::Kind(k) => graph_terms(data)
248            .into_iter()
249            .filter(|x| k.matches(x))
250            .collect(),
251    };
252    out.truncate(cap);
253    out
254}
255
256fn fresh() -> Term {
257    Term::BlankNode(BlankNode::default())
258}
259
260fn with_fresh(mut nodes: Vec<Term>) -> Vec<Term> {
261    nodes.push(fresh());
262    nodes
263}
264
265/// All distinct terms in the graph, ordered for determinism.
266fn graph_terms(data: &Graph) -> Vec<Term> {
267    let mut seen = HashSet::new();
268    let mut out = Vec::new();
269    for t in data.iter() {
270        let s = term_of(t.subject.into_owned());
271        if seen.insert(s.clone()) {
272            out.push(s);
273        }
274        let o = t.object.into_owned();
275        if seen.insert(o.clone()) {
276            out.push(o);
277        }
278    }
279    out.sort_by_key(|t| t.to_string());
280    out
281}
282
283/// Graph terms usable as nodes (not literals).
284fn graph_nodes(data: &Graph) -> Vec<Term> {
285    graph_terms(data)
286        .into_iter()
287        .filter(|t| !matches!(t, Term::Literal(_)))
288        .collect()
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use crate::synthesize::synthesize;
295    use crate::witness::witness_violations;
296    use shifty_parse::{load_turtle, parse_turtle};
297
298    const PREFIXES: &str = r#"
299        @prefix sh:  <http://www.w3.org/ns/shacl#> .
300        @prefix ex:  <http://ex/> .
301        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
302    "#;
303
304    /// witness → synthesize → enumerate the first repair for the sole violation.
305    fn repair(ttl: &str) -> (Option<RepairSolution>, Schema, Graph) {
306        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
307        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
308        let ws = witness_violations(&loaded.graph, &parsed.schema).unwrap();
309        assert_eq!(ws.len(), 1);
310        let tree = synthesize(&parsed.schema.arena, &ws[0]);
311        let sol =
312            enumerate_repair(&tree, &loaded.graph, &parsed.schema, EnumOptions::default()).unwrap();
313        (sol, parsed.schema, loaded.graph)
314    }
315
316    #[test]
317    fn min_count_repaired_by_reusing_an_existing_node() {
318        let ttl = format!(
319            "{PREFIXES}
320            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
321                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
322            ex:x ex:other ex:y .
323            "
324        );
325        let (sol, _, _) = repair(&ttl);
326        let sol = sol.expect("a repair exists");
327        assert_eq!(sol.delta.add.len(), 1);
328        assert!(sol.outcome.is_progress());
329    }
330
331    #[test]
332    fn datatype_repaired_by_reusing_an_existing_integer() {
333        let ttl = format!(
334            "{PREFIXES}
335            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
336                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
337            ex:x ex:p \"hello\" .
338            ex:z ex:n 7 .
339            "
340        );
341        let (sol, _, _) = repair(&ttl);
342        let sol = sol.expect("reuse the integer 7");
343        // replace-in-place: delete the bad value, add a good one.
344        assert_eq!(sol.delta.delete.len(), 1);
345        assert_eq!(sol.delta.add.len(), 1);
346    }
347
348    #[test]
349    fn fixpoint_repairs_multiple_violations_to_conformance() {
350        // Two foci each missing an ex:p; reuse can satisfy both.
351        let ttl = format!(
352            "{PREFIXES}
353            ex:S a sh:NodeShape ; sh:targetNode ex:x, ex:y ;
354                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
355            ex:x ex:other ex:n .
356            ex:y ex:other ex:n .
357            "
358        );
359        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
360        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
361        // baseline: two violations.
362        assert_eq!(
363            witness_violations(&loaded.graph, &parsed.schema)
364                .unwrap()
365                .len(),
366            2
367        );
368        let result =
369            repair_to_fixpoint(&loaded.graph, &parsed.schema, EnumOptions::default()).unwrap();
370        assert_eq!(result.remaining, 0, "repaired to conformance");
371        assert_eq!(result.applied.len(), 2, "one repair per focus");
372        // the repaired graph genuinely conforms.
373        assert!(
374            witness_violations(&result.graph, &parsed.schema)
375                .unwrap()
376                .is_empty()
377        );
378    }
379
380    #[test]
381    fn unsatisfiable_when_no_reusable_value_exists() {
382        // datatype integer, but the graph has no integer literal to reuse.
383        let ttl = format!(
384            "{PREFIXES}
385            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
386                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
387            ex:x ex:p \"hello\" .
388            "
389        );
390        let (sol, _, _) = repair(&ttl);
391        assert!(
392            sol.is_none(),
393            "no integer to reuse ⇒ enumeration finds nothing"
394        );
395    }
396}