Skip to main content

shifty_engine/
enumerate.rs

1//! The enumeration reference driver (`docs/07-repair-drivers.md` §4) and the
2//! `candidates` helper (`docs/06-repair.md` §3.3).
3//!
4//! This is the simplest driver: it walks the [`RepairTree`]'s finite choices —
5//! `Any` branches, `Repeat` counts, and per-hole candidate terms drawn from the
6//! data graph (reuse) or minted fresh — and returns the first plan whose `ΔG`
7//! passes the [`gate`]. It is primarily a correctness check on the IR and the
8//! gate; it cannot invent novel literals (those holes are filled only by reuse).
9//!
10//! Like every driver, it decides nothing the library doesn't expose: it only
11//! composes [`instantiate`], [`candidates`], and [`gate`].
12
13use crate::gate::{RepairOutcome, apply, gate};
14use crate::path::term_of;
15use crate::synthesize::synthesize;
16use crate::validate::NonStratifiable;
17use crate::value::value_type_holds;
18use crate::witness::witness_violations;
19use oxrdf::{BlankNode, Graph, Term};
20use shifty_algebra::Schema;
21use shifty_repair::{GraphDelta, HoleConstraint, NodeId, Plan, RepairTree, instantiate};
22use std::collections::{HashMap, HashSet};
23
24/// Knobs for the enumeration search.
25#[derive(Debug, Clone, Copy)]
26pub struct EnumOptions {
27    /// Maximum number of gated candidate `ΔG`s before giving up.
28    pub budget: usize,
29    /// Maximum candidate terms tried per hole (bounds branching).
30    pub max_candidates: usize,
31}
32
33impl Default for EnumOptions {
34    fn default() -> Self {
35        Self {
36            budget: 10_000,
37            max_candidates: 64,
38        }
39    }
40}
41
42/// A repair the enumeration driver found and verified.
43#[derive(Debug, Clone)]
44pub struct RepairSolution {
45    pub plan: Plan,
46    pub delta: GraphDelta,
47    pub outcome: RepairOutcome,
48}
49
50/// Search for the first sound, progress-making repair of `tree` against
51/// `(data, schema)`. `Ok(None)` means none was found within the budget.
52pub fn enumerate_repair(
53    tree: &RepairTree,
54    data: &Graph,
55    schema: &Schema,
56    opts: EnumOptions,
57) -> Result<Option<RepairSolution>, NonStratifiable> {
58    let mut choices = HashMap::new();
59    index_choices(tree, &mut choices);
60    let mut budget = opts.budget;
61    solve(
62        tree,
63        data,
64        schema,
65        &choices,
66        Plan::default(),
67        &mut budget,
68        opts,
69    )
70}
71
72/// The outcome of repairing a graph to a fixpoint.
73#[derive(Debug, Clone)]
74pub struct FixpointResult {
75    /// The repaired data graph (`G` with every applied `ΔG`).
76    pub graph: Graph,
77    /// The deltas applied, in order.
78    pub applied: Vec<GraphDelta>,
79    /// Iterations run (one per applied repair).
80    pub iterations: usize,
81    /// Violations still unrepaired when the loop stopped (0 ⟺ conforms).
82    pub remaining: usize,
83}
84
85/// The reference fixpoint driver (`docs/07-repair-drivers.md` §1): witness →
86/// synthesize → enumerate → gate → apply, re-witnessing after each accepted
87/// repair until the graph conforms or no focus admits a sound, progress-making
88/// repair. Each accepted `ΔG` is gated (introduces nothing) and strictly reduces
89/// the violation count, so the loop terminates.
90pub fn repair_to_fixpoint(
91    data: &Graph,
92    schema: &Schema,
93    opts: EnumOptions,
94) -> Result<FixpointResult, NonStratifiable> {
95    let mut graph = data.clone();
96    let mut applied = Vec::new();
97    let mut iterations = 0usize;
98    // A safety bound; progress guarantees far fewer iterations in practice.
99    let max_iterations = 100_000;
100
101    loop {
102        let witnesses = witness_violations(&graph, schema)?;
103        if witnesses.is_empty() {
104            return Ok(FixpointResult {
105                graph,
106                applied,
107                iterations,
108                remaining: 0,
109            });
110        }
111        if iterations >= max_iterations {
112            return Ok(FixpointResult {
113                remaining: witnesses.len(),
114                graph,
115                applied,
116                iterations,
117            });
118        }
119
120        let mut progressed = false;
121        for fw in &witnesses {
122            let tree = synthesize(&schema.arena, fw);
123            if let Some(sol) = enumerate_repair(&tree, &graph, schema, opts)? {
124                graph = apply(&graph, &sol.delta);
125                applied.push(sol.delta);
126                progressed = true;
127                break; // re-witness after each applied repair
128            }
129        }
130        if !progressed {
131            return Ok(FixpointResult {
132                remaining: witnesses.len(),
133                graph,
134                applied,
135                iterations,
136            });
137        }
138        iterations += 1;
139    }
140}
141
142#[derive(Clone, Copy)]
143enum Choice {
144    Any(usize),
145    Repeat(u64),
146}
147
148fn index_choices(tree: &RepairTree, out: &mut HashMap<NodeId, Choice>) {
149    match tree {
150        RepairTree::Any { id, children } => {
151            out.insert(*id, Choice::Any(children.len()));
152            for c in children {
153                index_choices(c, out);
154            }
155        }
156        RepairTree::Repeat { id, body, min, .. } => {
157            // Minimal repair: take exactly `min` instances.
158            out.insert(*id, Choice::Repeat(*min));
159            index_choices(body, out);
160        }
161        RepairTree::All { children, .. } => {
162            for c in children {
163                index_choices(c, out);
164            }
165        }
166        RepairTree::Noop(_) | RepairTree::Blocked(..) | RepairTree::Edits { .. } => {}
167    }
168}
169
170fn solve(
171    tree: &RepairTree,
172    data: &Graph,
173    schema: &Schema,
174    choices: &HashMap<NodeId, Choice>,
175    plan: Plan,
176    budget: &mut usize,
177    opts: EnumOptions,
178) -> Result<Option<RepairSolution>, NonStratifiable> {
179    if *budget == 0 {
180        return Ok(None);
181    }
182    let inst = instantiate(tree, &plan);
183
184    if let Some(node) = inst.open_choices.first().copied() {
185        return match choices.get(&node) {
186            Some(Choice::Any(n)) => {
187                for i in 0..*n {
188                    let mut p = plan.clone();
189                    p.branch.insert(node, i);
190                    if let Some(sol) = solve(tree, data, schema, choices, p, budget, opts)? {
191                        return Ok(Some(sol));
192                    }
193                }
194                Ok(None)
195            }
196            Some(Choice::Repeat(count)) => {
197                let mut p = plan;
198                p.count.insert(node, *count);
199                solve(tree, data, schema, choices, p, budget, opts)
200            }
201            None => Ok(None),
202        };
203    }
204
205    if let Some((hole, constraint)) = inst.open_holes.first().cloned() {
206        for cand in candidates(&constraint, data, opts.max_candidates) {
207            let mut p = plan.clone();
208            p.binding.insert(hole, cand);
209            if let Some(sol) = solve(tree, data, schema, choices, p, budget, opts)? {
210                return Ok(Some(sol));
211            }
212        }
213        return Ok(None);
214    }
215
216    // Fully resolved: gate it.
217    *budget -= 1;
218    let outcome = gate(data, schema, &inst.delta)?;
219    if outcome.is_progress() {
220        Ok(Some(RepairSolution {
221            plan,
222            delta: inst.delta,
223            outcome,
224        }))
225    } else {
226        Ok(None)
227    }
228}
229
230/// Existing terms in `data` that satisfy a hole's constraint, plus a fresh node
231/// where minting is allowed — truncated to `cap`. The contract places no
232/// requirement on where a binding comes from; this is the reuse-first default.
233pub fn candidates(constraint: &HoleConstraint, data: &Graph, cap: usize) -> Vec<Term> {
234    let mut out = match constraint {
235        HoleConstraint::Const(t) => vec![t.clone()],
236        HoleConstraint::OneOf(v) => v.clone(),
237        HoleConstraint::Fresh => vec![fresh()],
238        HoleConstraint::AnyNode => with_fresh(graph_nodes(data)),
239        HoleConstraint::ConformsTo(_) => with_fresh(graph_nodes(data)),
240        // Infinite domains: reuse only — enumeration cannot invent a novel literal.
241        HoleConstraint::Typed(t) => graph_terms(data)
242            .into_iter()
243            .filter(|x| value_type_holds(t, x))
244            .collect(),
245        HoleConstraint::Kind(k) => graph_terms(data)
246            .into_iter()
247            .filter(|x| k.matches(x))
248            .collect(),
249    };
250    out.truncate(cap);
251    out
252}
253
254fn fresh() -> Term {
255    Term::BlankNode(BlankNode::default())
256}
257
258fn with_fresh(mut nodes: Vec<Term>) -> Vec<Term> {
259    nodes.push(fresh());
260    nodes
261}
262
263/// All distinct terms in the graph, ordered for determinism.
264fn graph_terms(data: &Graph) -> Vec<Term> {
265    let mut seen = HashSet::new();
266    let mut out = Vec::new();
267    for t in data.iter() {
268        let s = term_of(t.subject.into_owned());
269        if seen.insert(s.clone()) {
270            out.push(s);
271        }
272        let o = t.object.into_owned();
273        if seen.insert(o.clone()) {
274            out.push(o);
275        }
276    }
277    out.sort_by_key(|t| t.to_string());
278    out
279}
280
281/// Graph terms usable as nodes (not literals).
282fn graph_nodes(data: &Graph) -> Vec<Term> {
283    graph_terms(data)
284        .into_iter()
285        .filter(|t| !matches!(t, Term::Literal(_)))
286        .collect()
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use crate::synthesize::synthesize;
293    use crate::witness::witness_violations;
294    use shifty_parse::{load_turtle, parse_turtle};
295
296    const PREFIXES: &str = r#"
297        @prefix sh:  <http://www.w3.org/ns/shacl#> .
298        @prefix ex:  <http://ex/> .
299        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
300    "#;
301
302    /// witness → synthesize → enumerate the first repair for the sole violation.
303    fn repair(ttl: &str) -> (Option<RepairSolution>, Schema, Graph) {
304        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
305        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
306        let ws = witness_violations(&loaded.graph, &parsed.schema).unwrap();
307        assert_eq!(ws.len(), 1);
308        let tree = synthesize(&parsed.schema.arena, &ws[0]);
309        let sol =
310            enumerate_repair(&tree, &loaded.graph, &parsed.schema, EnumOptions::default()).unwrap();
311        (sol, parsed.schema, loaded.graph)
312    }
313
314    #[test]
315    fn min_count_repaired_by_reusing_an_existing_node() {
316        let ttl = format!(
317            "{PREFIXES}
318            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
319                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
320            ex:x ex:other ex:y .
321            "
322        );
323        let (sol, _, _) = repair(&ttl);
324        let sol = sol.expect("a repair exists");
325        assert_eq!(sol.delta.add.len(), 1);
326        assert!(sol.outcome.is_progress());
327    }
328
329    #[test]
330    fn datatype_repaired_by_reusing_an_existing_integer() {
331        let ttl = format!(
332            "{PREFIXES}
333            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
334                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
335            ex:x ex:p \"hello\" .
336            ex:z ex:n 7 .
337            "
338        );
339        let (sol, _, _) = repair(&ttl);
340        let sol = sol.expect("reuse the integer 7");
341        // replace-in-place: delete the bad value, add a good one.
342        assert_eq!(sol.delta.delete.len(), 1);
343        assert_eq!(sol.delta.add.len(), 1);
344    }
345
346    #[test]
347    fn fixpoint_repairs_multiple_violations_to_conformance() {
348        // Two foci each missing an ex:p; reuse can satisfy both.
349        let ttl = format!(
350            "{PREFIXES}
351            ex:S a sh:NodeShape ; sh:targetNode ex:x, ex:y ;
352                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
353            ex:x ex:other ex:n .
354            ex:y ex:other ex:n .
355            "
356        );
357        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
358        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
359        // baseline: two violations.
360        assert_eq!(
361            witness_violations(&loaded.graph, &parsed.schema)
362                .unwrap()
363                .len(),
364            2
365        );
366        let result =
367            repair_to_fixpoint(&loaded.graph, &parsed.schema, EnumOptions::default()).unwrap();
368        assert_eq!(result.remaining, 0, "repaired to conformance");
369        assert_eq!(result.applied.len(), 2, "one repair per focus");
370        // the repaired graph genuinely conforms.
371        assert!(
372            witness_violations(&result.graph, &parsed.schema)
373                .unwrap()
374                .is_empty()
375        );
376    }
377
378    #[test]
379    fn unsatisfiable_when_no_reusable_value_exists() {
380        // datatype integer, but the graph has no integer literal to reuse.
381        let ttl = format!(
382            "{PREFIXES}
383            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
384                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
385            ex:x ex:p \"hello\" .
386            "
387        );
388        let (sol, _, _) = repair(&ttl);
389        assert!(
390            sol.is_none(),
391            "no integer to reuse ⇒ enumeration finds nothing"
392        );
393    }
394}