Skip to main content

uni_locy/
config.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3use std::time::Duration;
4
5use uni_common::Value;
6
7use crate::neural::{ModelInvocationCache, NeuralClassifier, NeuralProvenanceStore};
8use crate::semiring::ResolvedSemiringConfig;
9use crate::types::SemiringKind;
10
11/// Type alias for the runtime registry mapping a Locy model name to its
12/// concrete classifier implementation. Populated by callers (Python /
13/// Rust application code, TCK steps) before invoking a Locy program that
14/// references neural models.
15pub type ClassifierRegistry = HashMap<String, Arc<dyn NeuralClassifier>>;
16
17/// Configuration error raised by [`LocyConfig::resolve`].
18#[derive(Debug, Clone, PartialEq)]
19pub enum ConfigError {
20    /// `exact_probability = true` combined with a non-`AddMultProb`
21    /// semiring is incoherent: weighted model counting requires the
22    /// independence semiring as its base. Most commonly hit by setting
23    /// `semiring = MaxMinProb` while `exact_probability` remains on.
24    IncoherentSemiring {
25        semiring: SemiringKind,
26        message: &'static str,
27    },
28}
29
30impl std::fmt::Display for ConfigError {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        match self {
33            Self::IncoherentSemiring { semiring, message } => {
34                write!(f, "incoherent semiring {semiring:?}: {message}")
35            }
36        }
37    }
38}
39
40impl std::error::Error for ConfigError {}
41
42/// Configuration for the Locy orchestrator.
43#[derive(Debug, Clone)]
44pub struct LocyConfig {
45    /// Maximum fixpoint iterations per recursive stratum.
46    pub max_iterations: usize,
47    /// Overall evaluation timeout.
48    pub timeout: Duration,
49    /// When `false` (default), an evaluation that exceeds `timeout` or
50    /// `max_iterations` returns [`UniError::LocyIncomplete`] rather than
51    /// silently yielding partial facts. Set to `true` for anytime / best-effort
52    /// semantics: the partial `LocyResult` is returned (`Ok`) with its
53    /// `incomplete` diagnostics populated, and the caller is responsible for
54    /// checking them.
55    ///
56    /// [`UniError::LocyIncomplete`]: uni_common::UniError::LocyIncomplete
57    pub allow_partial: bool,
58    /// Maximum recursion depth for EXPLAIN derivation trees.
59    pub max_explain_depth: usize,
60    /// Maximum recursion depth for SLG resolution.
61    pub max_slg_depth: usize,
62    /// Maximum candidate modifications to generate during ABDUCE.
63    pub max_abduce_candidates: usize,
64    /// Maximum validated results to return from ABDUCE.
65    pub max_abduce_results: usize,
66    /// Maximum bytes of derived facts to hold in memory per relation.
67    pub max_derived_bytes: usize,
68    /// When true, BEST BY applies a secondary sort on remaining columns for
69    /// deterministic tie-breaking. Set to false for non-deterministic (faster)
70    /// selection.
71    pub deterministic_best_by: bool,
72    /// When true, MNOR/MPROD reject values outside \[0,1\] with an error instead
73    /// of clamping. When false (default), values are clamped silently.
74    pub strict_probability_domain: bool,
75    /// Underflow threshold for MPROD log-space switch (spec §5.3).
76    ///
77    /// When the running product drops below this value, `product_f64`
78    /// switches to log-space accumulation to prevent floating-point
79    /// underflow.
80    pub probability_epsilon: f64,
81    /// When true, groups flagged with shared probabilistic dependencies use
82    /// exact BDD-based probability computation instead of the independence
83    /// assumption (MNOR/MPROD). Defaults to false (independence mode).
84    pub exact_probability: bool,
85    /// Maximum number of BDD variables (unique base facts) allowed per
86    /// aggregation group. If a group exceeds this limit, it falls back to
87    /// the independence-mode result and emits a `BddLimitExceeded` warning.
88    pub max_bdd_variables: usize,
89    /// Top-k proof filtering (Scallop, Huang et al. 2021): retain at most k
90    /// proofs per derived fact, ranked by proof probability. When 0 (default),
91    /// all proofs are retained (unlimited mode).
92    pub top_k_proofs: usize,
93    /// Override `top_k_proofs` during training. When `Some(k)`, training
94    /// evaluation uses this value instead of `top_k_proofs`. When `None`,
95    /// `top_k_proofs` is used for both training and inference.
96    pub top_k_proofs_training: Option<usize>,
97    /// Active probability semiring (rollout decision D-7). Defaults to
98    /// [`SemiringKind::AddMultProb`] which matches the Phase 1/2 noisy-OR
99    /// and product behavior byte-for-byte. Opting into
100    /// [`SemiringKind::MaxMinProb`] (Viterbi / fuzzy) triggers a
101    /// non-suppressible `FuzzyNotProbabilistic` warning on PROB-bearing
102    /// rules (D-9).
103    ///
104    /// When `exact_probability` is `true`, [`LocyConfig::resolve`]
105    /// promotes `AddMultProb` to [`SemiringKind::BddExact`] for the
106    /// duration of evaluation. A `MaxMinProb` + `exact_probability` combo
107    /// is rejected as incoherent.
108    pub semiring: SemiringKind,
109    /// When `true`, the compiler accepts `CREATE MODEL` statements.
110    /// Default flipped from `false` to `true` in the Phase C
111    /// gate-closure cycle — the neural stack is GA. The flag remains
112    /// settable for explicit opt-out: setting it to `false`
113    /// re-imposes the original Phase B compile-time rejection
114    /// (`LocyCompileError::NeuralPreviewDisabled`) for environments
115    /// that need to disable the feature surface entirely. The
116    /// grammar always parses neural syntax regardless — the gate
117    /// lives at compile time.
118    pub neural_predicates_preview: bool,
119    /// Phase B Slice 3 runtime registry mapping a model name (the
120    /// identifier from `CREATE MODEL <name> AS ...`) to its concrete
121    /// [`NeuralClassifier`]. Rule bodies that invoke `<name>(args)` will
122    /// dispatch through this map at runtime. Models referenced by name
123    /// in a rule body but absent from the registry produce a runtime
124    /// error (`UnknownNeuralModel`) at the first invocation attempt;
125    /// programs that declare models but never invoke them at runtime
126    /// don't need entries here.
127    ///
128    /// Defaults to an empty map. The registry is held by `Arc` so the
129    /// same shared map can flow through fixpoint executor clones without
130    /// per-iteration deep-copy.
131    pub classifier_registry: ClassifierRegistry,
132    /// Phase B Slice 4 (post-Slice-3 follow-up): optional shared
133    /// memoization cache for neural classifier outputs. When `None`
134    /// (default), the runtime materializes a fresh cache per query
135    /// sized to `classifier_cache_max`. When `Some`, the cache is
136    /// reused across queries — useful for batch evaluation pipelines.
137    pub classifier_cache: Option<Arc<ModelInvocationCache>>,
138    /// Maximum entries in the per-query memoization cache. Default
139    /// 100_000. Naive eviction (clear when full) — see
140    /// `ModelInvocationCache::insert` doc.
141    pub classifier_cache_max: usize,
142    /// Phase C B1-B3 follow-up: optional side-channel store
143    /// recording (raw, calibrated, confidence_band) per
144    /// classifier invocation. When `Some`, the runtime
145    /// `apply_model_invocations` writes one record per
146    /// (model, input_hash); EXPLAIN reads from this store to
147    /// populate `NeuralProvenance` entries on derivations.
148    /// When `None`, EXPLAIN's neural_calls fall back to the
149    /// (raw-only) yield-alias lookup path. The store flows
150    /// through alongside `classifier_cache`.
151    pub classifier_provenance_store: Option<Arc<NeuralProvenanceStore>>,
152    /// Parameters bound to `$name` references inside rules and QUERY/RETURN
153    /// expressions.  Equivalent to the parameter map passed to `db.query()`.
154    /// Keys are parameter names **without** the leading `$` (e.g., `"agent_id"`
155    /// binds `$agent_id`).
156    pub params: HashMap<String, Value>,
157}
158
159impl LocyConfig {
160    /// Resolve scattered probability-related fields into a single
161    /// [`ResolvedSemiringConfig`] for threading through the planner and
162    /// executors. Performs the `exact_probability` → `BddExact`
163    /// promotion (decision D-7) and rejects incoherent combinations
164    /// (decision D-9).
165    pub fn resolve(&self) -> Result<ResolvedSemiringConfig, ConfigError> {
166        let kind = match (self.semiring, self.exact_probability) {
167            (SemiringKind::AddMultProb, true) => SemiringKind::BddExact,
168            (SemiringKind::AddMultProb, false) => SemiringKind::AddMultProb,
169            (SemiringKind::MaxMinProb, true) => {
170                return Err(ConfigError::IncoherentSemiring {
171                    semiring: SemiringKind::MaxMinProb,
172                    message: "MaxMinProb cannot be combined with exact_probability=true; \
173                              weighted model counting requires the AddMultProb semiring",
174                });
175            }
176            (SemiringKind::MaxMinProb, false) => SemiringKind::MaxMinProb,
177            (SemiringKind::BddExact, _) => SemiringKind::BddExact,
178            // Phase C C0: TopKProofs is its own correction story
179            // (per-tag DNF inclusion-exclusion) and is incoherent
180            // with `exact_probability` — that knob means "promote to
181            // whole-group BDD" and conflicts with the per-tag form.
182            (SemiringKind::TopKProofs { k: 0 }, _) => {
183                return Err(ConfigError::IncoherentSemiring {
184                    semiring: SemiringKind::TopKProofs { k: 0 },
185                    message: "TopKProofs requires k > 0; \
186                              k=0 would retain no proofs and reduce to ⊥",
187                });
188            }
189            (SemiringKind::TopKProofs { .. }, true) => {
190                return Err(ConfigError::IncoherentSemiring {
191                    semiring: self.semiring,
192                    message: "TopKProofs cannot be combined with exact_probability=true; \
193                              TopKProofs already provides its own correction via \
194                              per-tag inclusion-exclusion (impl plan §3.0)",
195                });
196            }
197            (SemiringKind::TopKProofs { k }, false) => SemiringKind::TopKProofs { k },
198        };
199        Ok(ResolvedSemiringConfig {
200            kind,
201            strict_probability_domain: self.strict_probability_domain,
202            probability_epsilon: self.probability_epsilon,
203            max_bdd_variables: self.max_bdd_variables,
204        })
205    }
206}
207
208impl Default for LocyConfig {
209    fn default() -> Self {
210        Self {
211            max_iterations: 1000,
212            timeout: Duration::from_secs(300),
213            allow_partial: false,
214            max_explain_depth: 100,
215            max_slg_depth: 1000,
216            max_abduce_candidates: 20,
217            max_abduce_results: 10,
218            max_derived_bytes: 256 * 1024 * 1024,
219            deterministic_best_by: true,
220            strict_probability_domain: false,
221            probability_epsilon: 1e-15,
222            exact_probability: false,
223            max_bdd_variables: 1000,
224            top_k_proofs: 0,
225            top_k_proofs_training: None,
226            semiring: SemiringKind::AddMultProb,
227            // Phase C gate-closure: neural stack is GA; default flag
228            // flipped from false to true. Set to false for explicit
229            // opt-out (re-imposes the Phase B compile-time rejection).
230            neural_predicates_preview: true,
231            classifier_registry: HashMap::new(),
232            classifier_cache: None,
233            classifier_cache_max: 100_000,
234            classifier_provenance_store: None,
235            params: HashMap::new(),
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn resolve_default_is_add_mult_prob() {
246        let cfg = LocyConfig::default();
247        let resolved = cfg.resolve().unwrap();
248        assert_eq!(resolved.kind, SemiringKind::AddMultProb);
249    }
250
251    #[test]
252    fn resolve_promotes_exact_probability_to_bdd() {
253        let cfg = LocyConfig {
254            exact_probability: true,
255            ..Default::default()
256        };
257        let resolved = cfg.resolve().unwrap();
258        assert_eq!(resolved.kind, SemiringKind::BddExact);
259    }
260
261    #[test]
262    fn resolve_rejects_maxmin_plus_exact() {
263        let cfg = LocyConfig {
264            exact_probability: true,
265            semiring: SemiringKind::MaxMinProb,
266            ..Default::default()
267        };
268        assert!(matches!(
269            cfg.resolve(),
270            Err(ConfigError::IncoherentSemiring { .. })
271        ));
272    }
273
274    #[test]
275    fn resolve_passes_maxmin_through() {
276        let cfg = LocyConfig {
277            semiring: SemiringKind::MaxMinProb,
278            ..Default::default()
279        };
280        let resolved = cfg.resolve().unwrap();
281        assert_eq!(resolved.kind, SemiringKind::MaxMinProb);
282    }
283
284    #[test]
285    fn resolve_passes_topkproofs_through() {
286        let cfg = LocyConfig {
287            semiring: SemiringKind::TopKProofs { k: 4 },
288            ..Default::default()
289        };
290        let resolved = cfg.resolve().unwrap();
291        assert_eq!(resolved.kind, SemiringKind::TopKProofs { k: 4 });
292    }
293
294    #[test]
295    fn resolve_rejects_topkproofs_k_zero() {
296        let cfg = LocyConfig {
297            semiring: SemiringKind::TopKProofs { k: 0 },
298            ..Default::default()
299        };
300        assert!(matches!(
301            cfg.resolve(),
302            Err(ConfigError::IncoherentSemiring { .. })
303        ));
304    }
305
306    #[test]
307    fn resolve_rejects_topkproofs_plus_exact_probability() {
308        let cfg = LocyConfig {
309            semiring: SemiringKind::TopKProofs { k: 4 },
310            exact_probability: true,
311            ..Default::default()
312        };
313        assert!(matches!(
314            cfg.resolve(),
315            Err(ConfigError::IncoherentSemiring { .. })
316        ));
317    }
318}