Skip to main content

shifty_engine/
enumerate.rs

1//! The enumeration reference driver (`docs/07-repair-drivers.md` §4) and the
2//! `candidates` helper (`docs/06-repair.md` §3.3).
3//!
4//! This is the simplest driver: it walks the [`RepairTree`]'s finite choices —
5//! `Any` branches, `Repeat` counts, and per-hole candidate terms drawn from the
6//! data graph (reuse) or minted fresh — and returns the first plan whose `ΔG`
7//! passes the [`gate`]. It is primarily a correctness check on the
8//! IR and the gate; it cannot invent novel literals (those holes are filled only
9//! by reuse).
10//!
11//! Like every driver, it decides nothing the library doesn't expose: it only
12//! composes [`instantiate`], [`candidates`], and [`gate`].
13
14use crate::gate::{RepairOutcome, apply, gate};
15use crate::path::term_of;
16use crate::synthesize::synthesize;
17use crate::validate::NonStratifiable;
18use crate::value::value_type_holds;
19use crate::witness::witness_violations;
20use oxrdf::{BlankNode, Graph, Term};
21use shifty_algebra::Schema;
22use shifty_repair::{GraphDelta, HoleConstraint, NodeId, Plan, RepairTree, instantiate};
23use std::collections::{HashMap, HashSet};
24
25/// Knobs for the enumeration search.
26#[derive(Debug, Clone, Copy)]
27pub struct EnumOptions {
28    /// Maximum number of gated candidate `ΔG`s before giving up.
29    pub budget: usize,
30    /// Maximum candidate terms tried per hole (bounds branching).
31    pub max_candidates: usize,
32}
33
34impl Default for EnumOptions {
35    fn default() -> Self {
36        Self {
37            budget: 10_000,
38            max_candidates: 64,
39        }
40    }
41}
42
43/// A repair the enumeration driver found and verified.
44#[derive(Debug, Clone)]
45pub struct RepairSolution {
46    pub plan: Plan,
47    pub delta: GraphDelta,
48    pub outcome: RepairOutcome,
49}
50
51/// Search for the first sound, progress-making repair of `tree`. Reuse
52/// candidates are drawn from `data`, while gating is evaluated against `context`
53/// (which should contain `data`; pass `data` again when there is no separate
54/// shapes graph) — so a candidate typed with a subclass of a required class
55/// clears the gate. The enumeration dual of [`crate::validate_with_context`].
56pub fn enumerate_repair(
57    tree: &RepairTree,
58    data: &Graph,
59    context: &Graph,
60    schema: &Schema,
61    opts: EnumOptions,
62) -> Result<Option<RepairSolution>, NonStratifiable> {
63    let mut choices = HashMap::new();
64    index_choices(tree, &mut choices);
65    let mut budget = opts.budget;
66    solve(
67        tree,
68        data,
69        context,
70        schema,
71        &choices,
72        Plan::default(),
73        &mut budget,
74        opts,
75    )
76}
77
78/// The outcome of repairing a graph to a fixpoint.
79#[derive(Debug, Clone)]
80pub struct FixpointResult {
81    /// The repaired data graph (`G` with every applied `ΔG`).
82    pub graph: Graph,
83    /// The deltas applied, in order.
84    pub applied: Vec<GraphDelta>,
85    /// Iterations run (one per applied repair).
86    pub iterations: usize,
87    /// Violations still unrepaired when the loop stopped (0 ⟺ conforms).
88    pub remaining: usize,
89}
90
91/// The reference fixpoint driver (`docs/07-repair-drivers.md` §1): witness →
92/// synthesize → enumerate → gate → apply, re-witnessing after each accepted
93/// repair until the graph conforms or no focus admits a sound, progress-making
94/// repair. Each accepted `ΔG` is gated (introduces nothing) and strictly reduces
95/// the violation count, so the loop terminates.
96///
97/// Witnessing and gating are evaluated against `context` (which should contain
98/// `data`; pass `data` again when there is no separate shapes graph) while focus
99/// and the emitted graph stay the data graph. Each accepted `ΔG` is applied to
100/// both graphs so later iterations see `(data ⊕ …) ∪ shapes` for evaluation. The
101/// returned `graph` is the repaired *data* graph — the shapes/context triples are
102/// not emitted.
103pub fn repair_to_fixpoint(
104    data: &Graph,
105    context: &Graph,
106    schema: &Schema,
107    opts: EnumOptions,
108) -> Result<FixpointResult, NonStratifiable> {
109    let mut graph = data.clone();
110    let mut context = context.clone();
111    let mut applied = Vec::new();
112    let mut iterations = 0usize;
113    // A safety bound; progress guarantees far fewer iterations in practice.
114    let max_iterations = 100_000;
115
116    loop {
117        let witnesses = witness_violations(&graph, &context, schema)?;
118        if witnesses.is_empty() {
119            return Ok(FixpointResult {
120                graph,
121                applied,
122                iterations,
123                remaining: 0,
124            });
125        }
126        if iterations >= max_iterations {
127            return Ok(FixpointResult {
128                remaining: witnesses.len(),
129                graph,
130                applied,
131                iterations,
132            });
133        }
134
135        let mut progressed = false;
136        for fw in &witnesses {
137            let tree = synthesize(&schema.arena, fw);
138            if let Some(sol) = enumerate_repair(&tree, &graph, &context, schema, opts)? {
139                graph = apply(&graph, &sol.delta);
140                context = apply(&context, &sol.delta);
141                applied.push(sol.delta);
142                progressed = true;
143                break; // re-witness after each applied repair
144            }
145        }
146        if !progressed {
147            return Ok(FixpointResult {
148                remaining: witnesses.len(),
149                graph,
150                applied,
151                iterations,
152            });
153        }
154        iterations += 1;
155    }
156}
157
158#[derive(Clone, Copy)]
159enum Choice {
160    Any(usize),
161    Repeat(u64),
162}
163
164fn index_choices(tree: &RepairTree, out: &mut HashMap<NodeId, Choice>) {
165    match tree {
166        RepairTree::Any { id, children } => {
167            out.insert(*id, Choice::Any(children.len()));
168            for c in children {
169                index_choices(c, out);
170            }
171        }
172        RepairTree::Repeat { id, body, min, .. } => {
173            // Minimal repair: take exactly `min` instances.
174            out.insert(*id, Choice::Repeat(*min));
175            index_choices(body, out);
176        }
177        RepairTree::All { children, .. } => {
178            for c in children {
179                index_choices(c, out);
180            }
181        }
182        RepairTree::Noop(_) | RepairTree::Blocked(..) | RepairTree::Edits { .. } => {}
183    }
184}
185
186#[allow(clippy::too_many_arguments)]
187fn solve(
188    tree: &RepairTree,
189    data: &Graph,
190    context: &Graph,
191    schema: &Schema,
192    choices: &HashMap<NodeId, Choice>,
193    plan: Plan,
194    budget: &mut usize,
195    opts: EnumOptions,
196) -> Result<Option<RepairSolution>, NonStratifiable> {
197    if *budget == 0 {
198        return Ok(None);
199    }
200    let inst = instantiate(tree, &plan);
201
202    if let Some(node) = inst.open_choices.first().copied() {
203        return match choices.get(&node) {
204            Some(Choice::Any(n)) => {
205                for i in 0..*n {
206                    let mut p = plan.clone();
207                    p.branch.insert(node, i);
208                    if let Some(sol) = solve(tree, data, context, schema, choices, p, budget, opts)?
209                    {
210                        return Ok(Some(sol));
211                    }
212                }
213                Ok(None)
214            }
215            Some(Choice::Repeat(count)) => {
216                let mut p = plan;
217                p.count.insert(node, *count);
218                solve(tree, data, context, schema, choices, p, budget, opts)
219            }
220            None => Ok(None),
221        };
222    }
223
224    if let Some((hole, constraint)) = inst.open_holes.first().cloned() {
225        for cand in candidates(&constraint, data, opts.max_candidates) {
226            let mut p = plan.clone();
227            p.binding.insert(hole, cand);
228            if let Some(sol) = solve(tree, data, context, schema, choices, p, budget, opts)? {
229                return Ok(Some(sol));
230            }
231        }
232        return Ok(None);
233    }
234
235    // Fully resolved: gate it.
236    *budget -= 1;
237    let outcome = gate(data, context, schema, &inst.delta)?;
238    if outcome.is_progress() {
239        Ok(Some(RepairSolution {
240            plan,
241            delta: inst.delta,
242            outcome,
243        }))
244    } else {
245        Ok(None)
246    }
247}
248
249/// Existing terms in `data` that satisfy a hole's constraint, plus a fresh node
250/// where minting is allowed — truncated to `cap`. The contract places no
251/// requirement on where a binding comes from; this is the reuse-first default.
252pub fn candidates(constraint: &HoleConstraint, data: &Graph, cap: usize) -> Vec<Term> {
253    let mut out = match constraint {
254        HoleConstraint::Const(t) => vec![t.clone()],
255        HoleConstraint::OneOf(v) => v.clone(),
256        HoleConstraint::Fresh => vec![fresh()],
257        HoleConstraint::AnyNode => with_fresh(graph_nodes(data)),
258        HoleConstraint::ConformsTo(_) | HoleConstraint::ConformsToAll(_) => {
259            with_fresh(graph_nodes(data))
260        }
261        // Infinite domains: reuse only — enumeration cannot invent a novel literal.
262        HoleConstraint::Typed(t) => graph_terms(data)
263            .into_iter()
264            .filter(|x| value_type_holds(t, x))
265            .collect(),
266        HoleConstraint::Kind(k) => graph_terms(data)
267            .into_iter()
268            .filter(|x| k.matches(x))
269            .collect(),
270    };
271    out.truncate(cap);
272    out
273}
274
275fn fresh() -> Term {
276    Term::BlankNode(BlankNode::default())
277}
278
279fn with_fresh(mut nodes: Vec<Term>) -> Vec<Term> {
280    nodes.push(fresh());
281    nodes
282}
283
284/// All distinct terms in the graph, ordered for determinism.
285fn graph_terms(data: &Graph) -> Vec<Term> {
286    let mut seen = HashSet::new();
287    let mut out = Vec::new();
288    for t in data.iter() {
289        let s = term_of(t.subject.into_owned());
290        if seen.insert(s.clone()) {
291            out.push(s);
292        }
293        let o = t.object.into_owned();
294        if seen.insert(o.clone()) {
295            out.push(o);
296        }
297    }
298    out.sort_by_key(|t| t.to_string());
299    out
300}
301
302/// Graph terms usable as nodes (not literals).
303fn graph_nodes(data: &Graph) -> Vec<Term> {
304    graph_terms(data)
305        .into_iter()
306        .filter(|t| !matches!(t, Term::Literal(_)))
307        .collect()
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use crate::synthesize::synthesize;
314    use crate::witness::witness_violations;
315    use shifty_parse::{load_turtle, parse_turtle};
316
317    const PREFIXES: &str = r#"
318        @prefix sh:  <http://www.w3.org/ns/shacl#> .
319        @prefix ex:  <http://ex/> .
320        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
321    "#;
322
323    /// witness → synthesize → enumerate the first repair for the sole violation.
324    fn repair(ttl: &str) -> (Option<RepairSolution>, Schema, Graph) {
325        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
326        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
327        let ws = witness_violations(&loaded.graph, &loaded.graph, &parsed.schema).unwrap();
328        assert_eq!(ws.len(), 1);
329        let tree = synthesize(&parsed.schema.arena, &ws[0]);
330        let sol = enumerate_repair(
331            &tree,
332            &loaded.graph,
333            &loaded.graph,
334            &parsed.schema,
335            EnumOptions::default(),
336        )
337        .unwrap();
338        (sol, parsed.schema, loaded.graph)
339    }
340
341    #[test]
342    fn min_count_repaired_by_reusing_an_existing_node() {
343        let ttl = format!(
344            "{PREFIXES}
345            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
346                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
347            ex:x ex:other ex:y .
348            "
349        );
350        let (sol, _, _) = repair(&ttl);
351        let sol = sol.expect("a repair exists");
352        assert_eq!(sol.delta.add.len(), 1);
353        assert!(sol.outcome.is_progress());
354    }
355
356    #[test]
357    fn datatype_repaired_by_reusing_an_existing_integer() {
358        let ttl = format!(
359            "{PREFIXES}
360            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
361                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
362            ex:x ex:p \"hello\" .
363            ex:z ex:n 7 .
364            "
365        );
366        let (sol, _, _) = repair(&ttl);
367        let sol = sol.expect("reuse the integer 7");
368        // replace-in-place: delete the bad value, add a good one.
369        assert_eq!(sol.delta.delete.len(), 1);
370        assert_eq!(sol.delta.add.len(), 1);
371    }
372
373    #[test]
374    fn fixpoint_repairs_multiple_violations_to_conformance() {
375        // Two foci each missing an ex:p; reuse can satisfy both.
376        let ttl = format!(
377            "{PREFIXES}
378            ex:S a sh:NodeShape ; sh:targetNode ex:x, ex:y ;
379                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
380            ex:x ex:other ex:n .
381            ex:y ex:other ex:n .
382            "
383        );
384        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
385        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
386        // baseline: two violations.
387        assert_eq!(
388            witness_violations(&loaded.graph, &loaded.graph, &parsed.schema)
389                .unwrap()
390                .len(),
391            2
392        );
393        let result = repair_to_fixpoint(
394            &loaded.graph,
395            &loaded.graph,
396            &parsed.schema,
397            EnumOptions::default(),
398        )
399        .unwrap();
400        assert_eq!(result.remaining, 0, "repaired to conformance");
401        assert_eq!(result.applied.len(), 2, "one repair per focus");
402        // the repaired graph genuinely conforms.
403        assert!(
404            witness_violations(&result.graph, &result.graph, &parsed.schema)
405                .unwrap()
406                .is_empty()
407        );
408    }
409
410    #[test]
411    fn unsatisfiable_when_no_reusable_value_exists() {
412        // datatype integer, but the graph has no integer literal to reuse.
413        let ttl = format!(
414            "{PREFIXES}
415            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
416                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
417            ex:x ex:p \"hello\" .
418            "
419        );
420        let (sol, _, _) = repair(&ttl);
421        assert!(
422            sol.is_none(),
423            "no integer to reuse ⇒ enumeration finds nothing"
424        );
425    }
426}