uni_locy/config.rs
1use std::collections::HashMap;
2use std::sync::Arc;
3use std::time::Duration;
4
5use uni_common::Value;
6
7use crate::neural::{ModelInvocationCache, NeuralClassifier, NeuralProvenanceStore};
8use crate::semiring::ResolvedSemiringConfig;
9use crate::types::SemiringKind;
10
11/// Type alias for the runtime registry mapping a Locy model name to its
12/// concrete classifier implementation. Populated by callers (Python /
13/// Rust application code, TCK steps) before invoking a Locy program that
14/// references neural models.
15pub type ClassifierRegistry = HashMap<String, Arc<dyn NeuralClassifier>>;
16
17/// Configuration error raised by [`LocyConfig::resolve`].
18#[derive(Debug, Clone, PartialEq)]
19pub enum ConfigError {
20 /// `exact_probability = true` combined with a non-`AddMultProb`
21 /// semiring is incoherent: weighted model counting requires the
22 /// independence semiring as its base. Most commonly hit by setting
23 /// `semiring = MaxMinProb` while `exact_probability` remains on.
24 IncoherentSemiring {
25 semiring: SemiringKind,
26 message: &'static str,
27 },
28}
29
30impl std::fmt::Display for ConfigError {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 match self {
33 Self::IncoherentSemiring { semiring, message } => {
34 write!(f, "incoherent semiring {semiring:?}: {message}")
35 }
36 }
37 }
38}
39
40impl std::error::Error for ConfigError {}
41
42/// Configuration for the Locy orchestrator.
43#[derive(Debug, Clone)]
44pub struct LocyConfig {
45 /// Maximum fixpoint iterations per recursive stratum.
46 pub max_iterations: usize,
47 /// Overall evaluation timeout.
48 pub timeout: Duration,
49 /// When `false` (default), an evaluation that exceeds `timeout` or
50 /// `max_iterations` returns [`UniError::LocyIncomplete`] rather than
51 /// silently yielding partial facts. Set to `true` for anytime / best-effort
52 /// semantics: the partial `LocyResult` is returned (`Ok`) with its
53 /// `incomplete` diagnostics populated, and the caller is responsible for
54 /// checking them.
55 ///
56 /// [`UniError::LocyIncomplete`]: uni_common::UniError::LocyIncomplete
57 pub allow_partial: bool,
58 /// Maximum recursion depth for EXPLAIN derivation trees.
59 pub max_explain_depth: usize,
60 /// Maximum recursion depth for SLG resolution.
61 pub max_slg_depth: usize,
62 /// Maximum candidate modifications to generate during ABDUCE.
63 pub max_abduce_candidates: usize,
64 /// Maximum validated results to return from ABDUCE.
65 pub max_abduce_results: usize,
66 /// Maximum bytes of derived facts to hold in memory per relation.
67 pub max_derived_bytes: usize,
68 /// When true, BEST BY applies a secondary sort on remaining columns for
69 /// deterministic tie-breaking. Set to false for non-deterministic (faster)
70 /// selection.
71 pub deterministic_best_by: bool,
72 /// When true, MNOR/MPROD reject values outside \[0,1\] with an error instead
73 /// of clamping. When false (default), values are clamped silently.
74 pub strict_probability_domain: bool,
75 /// Underflow threshold for MPROD log-space switch (spec §5.3).
76 ///
77 /// When the running product drops below this value, `product_f64`
78 /// switches to log-space accumulation to prevent floating-point
79 /// underflow.
80 pub probability_epsilon: f64,
81 /// When true, groups flagged with shared probabilistic dependencies use
82 /// exact BDD-based probability computation instead of the independence
83 /// assumption (MNOR/MPROD). Defaults to false (independence mode).
84 pub exact_probability: bool,
85 /// Maximum number of BDD variables (unique base facts) allowed per
86 /// aggregation group. If a group exceeds this limit, it falls back to
87 /// the independence-mode result and emits a `BddLimitExceeded` warning.
88 pub max_bdd_variables: usize,
89 /// Top-k proof filtering (Scallop, Huang et al. 2021): retain at most k
90 /// proofs per derived fact, ranked by proof probability. When 0 (default),
91 /// all proofs are retained (unlimited mode).
92 pub top_k_proofs: usize,
93 /// Override `top_k_proofs` during training. When `Some(k)`, training
94 /// evaluation uses this value instead of `top_k_proofs`. When `None`,
95 /// `top_k_proofs` is used for both training and inference.
96 pub top_k_proofs_training: Option<usize>,
97 /// Active probability semiring (rollout decision D-7). Defaults to
98 /// [`SemiringKind::AddMultProb`] which matches the Phase 1/2 noisy-OR
99 /// and product behavior byte-for-byte. Opting into
100 /// [`SemiringKind::MaxMinProb`] (Viterbi / fuzzy) triggers a
101 /// non-suppressible `FuzzyNotProbabilistic` warning on PROB-bearing
102 /// rules (D-9).
103 ///
104 /// When `exact_probability` is `true`, [`LocyConfig::resolve`]
105 /// promotes `AddMultProb` to [`SemiringKind::BddExact`] for the
106 /// duration of evaluation. A `MaxMinProb` + `exact_probability` combo
107 /// is rejected as incoherent.
108 pub semiring: SemiringKind,
109 /// When `true`, the compiler accepts `CREATE MODEL` statements.
110 /// Default flipped from `false` to `true` in the Phase C
111 /// gate-closure cycle — the neural stack is GA. The flag remains
112 /// settable for explicit opt-out: setting it to `false`
113 /// re-imposes the original Phase B compile-time rejection
114 /// (`LocyCompileError::NeuralPreviewDisabled`) for environments
115 /// that need to disable the feature surface entirely. The
116 /// grammar always parses neural syntax regardless — the gate
117 /// lives at compile time.
118 pub neural_predicates_preview: bool,
119 /// Phase B Slice 3 runtime registry mapping a model name (the
120 /// identifier from `CREATE MODEL <name> AS ...`) to its concrete
121 /// [`NeuralClassifier`]. Rule bodies that invoke `<name>(args)` will
122 /// dispatch through this map at runtime. Models referenced by name
123 /// in a rule body but absent from the registry produce a runtime
124 /// error (`UnknownNeuralModel`) at the first invocation attempt;
125 /// programs that declare models but never invoke them at runtime
126 /// don't need entries here.
127 ///
128 /// Defaults to an empty map. The registry is held by `Arc` so the
129 /// same shared map can flow through fixpoint executor clones without
130 /// per-iteration deep-copy.
131 pub classifier_registry: ClassifierRegistry,
132 /// Phase B Slice 4 (post-Slice-3 follow-up): optional shared
133 /// memoization cache for neural classifier outputs. When `None`
134 /// (default), the runtime materializes a fresh cache per query
135 /// sized to `classifier_cache_max`. When `Some`, the cache is
136 /// reused across queries — useful for batch evaluation pipelines.
137 pub classifier_cache: Option<Arc<ModelInvocationCache>>,
138 /// Maximum entries in the per-query memoization cache. Default
139 /// 100_000. Naive eviction (clear when full) — see
140 /// `ModelInvocationCache::insert` doc.
141 pub classifier_cache_max: usize,
142 /// Phase C B1-B3 follow-up: optional side-channel store
143 /// recording (raw, calibrated, confidence_band) per
144 /// classifier invocation. When `Some`, the runtime
145 /// `apply_model_invocations` writes one record per
146 /// (model, input_hash); EXPLAIN reads from this store to
147 /// populate `NeuralProvenance` entries on derivations.
148 /// When `None`, EXPLAIN's neural_calls fall back to the
149 /// (raw-only) yield-alias lookup path. The store flows
150 /// through alongside `classifier_cache`.
151 pub classifier_provenance_store: Option<Arc<NeuralProvenanceStore>>,
152 /// Parameters bound to `$name` references inside rules and QUERY/RETURN
153 /// expressions. Equivalent to the parameter map passed to `db.query()`.
154 /// Keys are parameter names **without** the leading `$` (e.g., `"agent_id"`
155 /// binds `$agent_id`).
156 pub params: HashMap<String, Value>,
157}
158
159impl LocyConfig {
160 /// Resolve scattered probability-related fields into a single
161 /// [`ResolvedSemiringConfig`] for threading through the planner and
162 /// executors. Performs the `exact_probability` → `BddExact`
163 /// promotion (decision D-7) and rejects incoherent combinations
164 /// (decision D-9).
165 pub fn resolve(&self) -> Result<ResolvedSemiringConfig, ConfigError> {
166 let kind = match (self.semiring, self.exact_probability) {
167 (SemiringKind::AddMultProb, true) => SemiringKind::BddExact,
168 (SemiringKind::AddMultProb, false) => SemiringKind::AddMultProb,
169 (SemiringKind::MaxMinProb, true) => {
170 return Err(ConfigError::IncoherentSemiring {
171 semiring: SemiringKind::MaxMinProb,
172 message: "MaxMinProb cannot be combined with exact_probability=true; \
173 weighted model counting requires the AddMultProb semiring",
174 });
175 }
176 (SemiringKind::MaxMinProb, false) => SemiringKind::MaxMinProb,
177 (SemiringKind::BddExact, _) => SemiringKind::BddExact,
178 // Phase C C0: TopKProofs is its own correction story
179 // (per-tag DNF inclusion-exclusion) and is incoherent
180 // with `exact_probability` — that knob means "promote to
181 // whole-group BDD" and conflicts with the per-tag form.
182 (SemiringKind::TopKProofs { k: 0 }, _) => {
183 return Err(ConfigError::IncoherentSemiring {
184 semiring: SemiringKind::TopKProofs { k: 0 },
185 message: "TopKProofs requires k > 0; \
186 k=0 would retain no proofs and reduce to ⊥",
187 });
188 }
189 (SemiringKind::TopKProofs { .. }, true) => {
190 return Err(ConfigError::IncoherentSemiring {
191 semiring: self.semiring,
192 message: "TopKProofs cannot be combined with exact_probability=true; \
193 TopKProofs already provides its own correction via \
194 per-tag inclusion-exclusion (impl plan §3.0)",
195 });
196 }
197 (SemiringKind::TopKProofs { k }, false) => SemiringKind::TopKProofs { k },
198 };
199 Ok(ResolvedSemiringConfig {
200 kind,
201 strict_probability_domain: self.strict_probability_domain,
202 probability_epsilon: self.probability_epsilon,
203 max_bdd_variables: self.max_bdd_variables,
204 })
205 }
206}
207
208impl Default for LocyConfig {
209 fn default() -> Self {
210 Self {
211 max_iterations: 1000,
212 timeout: Duration::from_secs(300),
213 allow_partial: false,
214 max_explain_depth: 100,
215 max_slg_depth: 1000,
216 max_abduce_candidates: 20,
217 max_abduce_results: 10,
218 max_derived_bytes: 256 * 1024 * 1024,
219 deterministic_best_by: true,
220 strict_probability_domain: false,
221 probability_epsilon: 1e-15,
222 exact_probability: false,
223 max_bdd_variables: 1000,
224 top_k_proofs: 0,
225 top_k_proofs_training: None,
226 semiring: SemiringKind::AddMultProb,
227 // Phase C gate-closure: neural stack is GA; default flag
228 // flipped from false to true. Set to false for explicit
229 // opt-out (re-imposes the Phase B compile-time rejection).
230 neural_predicates_preview: true,
231 classifier_registry: HashMap::new(),
232 classifier_cache: None,
233 classifier_cache_max: 100_000,
234 classifier_provenance_store: None,
235 params: HashMap::new(),
236 }
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn resolve_default_is_add_mult_prob() {
246 let cfg = LocyConfig::default();
247 let resolved = cfg.resolve().unwrap();
248 assert_eq!(resolved.kind, SemiringKind::AddMultProb);
249 }
250
251 #[test]
252 fn resolve_promotes_exact_probability_to_bdd() {
253 let cfg = LocyConfig {
254 exact_probability: true,
255 ..Default::default()
256 };
257 let resolved = cfg.resolve().unwrap();
258 assert_eq!(resolved.kind, SemiringKind::BddExact);
259 }
260
261 #[test]
262 fn resolve_rejects_maxmin_plus_exact() {
263 let cfg = LocyConfig {
264 exact_probability: true,
265 semiring: SemiringKind::MaxMinProb,
266 ..Default::default()
267 };
268 assert!(matches!(
269 cfg.resolve(),
270 Err(ConfigError::IncoherentSemiring { .. })
271 ));
272 }
273
274 #[test]
275 fn resolve_passes_maxmin_through() {
276 let cfg = LocyConfig {
277 semiring: SemiringKind::MaxMinProb,
278 ..Default::default()
279 };
280 let resolved = cfg.resolve().unwrap();
281 assert_eq!(resolved.kind, SemiringKind::MaxMinProb);
282 }
283
284 #[test]
285 fn resolve_passes_topkproofs_through() {
286 let cfg = LocyConfig {
287 semiring: SemiringKind::TopKProofs { k: 4 },
288 ..Default::default()
289 };
290 let resolved = cfg.resolve().unwrap();
291 assert_eq!(resolved.kind, SemiringKind::TopKProofs { k: 4 });
292 }
293
294 #[test]
295 fn resolve_rejects_topkproofs_k_zero() {
296 let cfg = LocyConfig {
297 semiring: SemiringKind::TopKProofs { k: 0 },
298 ..Default::default()
299 };
300 assert!(matches!(
301 cfg.resolve(),
302 Err(ConfigError::IncoherentSemiring { .. })
303 ));
304 }
305
306 #[test]
307 fn resolve_rejects_topkproofs_plus_exact_probability() {
308 let cfg = LocyConfig {
309 semiring: SemiringKind::TopKProofs { k: 4 },
310 exact_probability: true,
311 ..Default::default()
312 };
313 assert!(matches!(
314 cfg.resolve(),
315 Err(ConfigError::IncoherentSemiring { .. })
316 ));
317 }
318}