1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use uni_common::Value;
use crate::neural::{ModelInvocationCache, NeuralClassifier, NeuralProvenanceStore};
use crate::semiring::ResolvedSemiringConfig;
use crate::types::SemiringKind;
/// Type alias for the runtime registry mapping a Locy model name to its
/// concrete classifier implementation. Populated by callers (Python /
/// Rust application code, TCK steps) before invoking a Locy program that
/// references neural models.
pub type ClassifierRegistry = HashMap<String, Arc<dyn NeuralClassifier>>;
/// Configuration error raised by [`LocyConfig::resolve`].
#[derive(Debug, Clone, PartialEq)]
pub enum ConfigError {
/// `exact_probability = true` combined with a non-`AddMultProb`
/// semiring is incoherent: weighted model counting requires the
/// independence semiring as its base. Most commonly hit by setting
/// `semiring = MaxMinProb` while `exact_probability` remains on.
IncoherentSemiring {
semiring: SemiringKind,
message: &'static str,
},
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::IncoherentSemiring { semiring, message } => {
write!(f, "incoherent semiring {semiring:?}: {message}")
}
}
}
}
impl std::error::Error for ConfigError {}
/// Configuration for the Locy orchestrator.
#[derive(Debug, Clone)]
pub struct LocyConfig {
/// Maximum fixpoint iterations per recursive stratum.
pub max_iterations: usize,
/// Overall evaluation timeout.
pub timeout: Duration,
/// When `false` (default), an evaluation that exceeds `timeout` or
/// `max_iterations` returns [`UniError::LocyIncomplete`] rather than
/// silently yielding partial facts. Set to `true` for anytime / best-effort
/// semantics: the partial `LocyResult` is returned (`Ok`) with its
/// `incomplete` diagnostics populated, and the caller is responsible for
/// checking them.
///
/// [`UniError::LocyIncomplete`]: uni_common::UniError::LocyIncomplete
pub allow_partial: bool,
/// Maximum recursion depth for EXPLAIN derivation trees.
pub max_explain_depth: usize,
/// Maximum recursion depth for SLG resolution.
pub max_slg_depth: usize,
/// Maximum candidate modifications to generate during ABDUCE.
pub max_abduce_candidates: usize,
/// Maximum validated results to return from ABDUCE.
pub max_abduce_results: usize,
/// Maximum bytes of derived facts to hold in memory per relation.
pub max_derived_bytes: usize,
/// When true, BEST BY applies a secondary sort on remaining columns for
/// deterministic tie-breaking. Set to false for non-deterministic (faster)
/// selection.
pub deterministic_best_by: bool,
/// When true, MNOR/MPROD reject values outside \[0,1\] with an error instead
/// of clamping. When false (default), values are clamped silently.
pub strict_probability_domain: bool,
/// Underflow threshold for MPROD log-space switch (spec §5.3).
///
/// When the running product drops below this value, `product_f64`
/// switches to log-space accumulation to prevent floating-point
/// underflow.
pub probability_epsilon: f64,
/// When true, groups flagged with shared probabilistic dependencies use
/// exact BDD-based probability computation instead of the independence
/// assumption (MNOR/MPROD). Defaults to false (independence mode).
pub exact_probability: bool,
/// Maximum number of BDD variables (unique base facts) allowed per
/// aggregation group. If a group exceeds this limit, it falls back to
/// the independence-mode result and emits a `BddLimitExceeded` warning.
pub max_bdd_variables: usize,
/// Top-k proof filtering (Scallop, Huang et al. 2021): retain at most k
/// proofs per derived fact, ranked by proof probability. When 0 (default),
/// all proofs are retained (unlimited mode).
pub top_k_proofs: usize,
/// Override `top_k_proofs` during training. When `Some(k)`, training
/// evaluation uses this value instead of `top_k_proofs`. When `None`,
/// `top_k_proofs` is used for both training and inference.
pub top_k_proofs_training: Option<usize>,
/// Active probability semiring (rollout decision D-7). Defaults to
/// [`SemiringKind::AddMultProb`] which matches the Phase 1/2 noisy-OR
/// and product behavior byte-for-byte. Opting into
/// [`SemiringKind::MaxMinProb`] (Viterbi / fuzzy) triggers a
/// non-suppressible `FuzzyNotProbabilistic` warning on PROB-bearing
/// rules (D-9).
///
/// When `exact_probability` is `true`, [`LocyConfig::resolve`]
/// promotes `AddMultProb` to [`SemiringKind::BddExact`] for the
/// duration of evaluation. A `MaxMinProb` + `exact_probability` combo
/// is rejected as incoherent.
pub semiring: SemiringKind,
/// When `true`, the compiler accepts `CREATE MODEL` statements.
/// Default flipped from `false` to `true` in the Phase C
/// gate-closure cycle — the neural stack is GA. The flag remains
/// settable for explicit opt-out: setting it to `false`
/// re-imposes the original Phase B compile-time rejection
/// (`LocyCompileError::NeuralPreviewDisabled`) for environments
/// that need to disable the feature surface entirely. The
/// grammar always parses neural syntax regardless — the gate
/// lives at compile time.
pub neural_predicates_preview: bool,
/// Phase B Slice 3 runtime registry mapping a model name (the
/// identifier from `CREATE MODEL <name> AS ...`) to its concrete
/// [`NeuralClassifier`]. Rule bodies that invoke `<name>(args)` will
/// dispatch through this map at runtime. Models referenced by name
/// in a rule body but absent from the registry produce a runtime
/// error (`UnknownNeuralModel`) at the first invocation attempt;
/// programs that declare models but never invoke them at runtime
/// don't need entries here.
///
/// Defaults to an empty map. The registry is held by `Arc` so the
/// same shared map can flow through fixpoint executor clones without
/// per-iteration deep-copy.
pub classifier_registry: ClassifierRegistry,
/// Phase B Slice 4 (post-Slice-3 follow-up): optional shared
/// memoization cache for neural classifier outputs. When `None`
/// (default), the runtime materializes a fresh cache per query
/// sized to `classifier_cache_max`. When `Some`, the cache is
/// reused across queries — useful for batch evaluation pipelines.
pub classifier_cache: Option<Arc<ModelInvocationCache>>,
/// Maximum entries in the per-query memoization cache. Default
/// 100_000. Naive eviction (clear when full) — see
/// `ModelInvocationCache::insert` doc.
pub classifier_cache_max: usize,
/// Phase C B1-B3 follow-up: optional side-channel store
/// recording (raw, calibrated, confidence_band) per
/// classifier invocation. When `Some`, the runtime
/// `apply_model_invocations` writes one record per
/// (model, input_hash); EXPLAIN reads from this store to
/// populate `NeuralProvenance` entries on derivations.
/// When `None`, EXPLAIN's neural_calls fall back to the
/// (raw-only) yield-alias lookup path. The store flows
/// through alongside `classifier_cache`.
pub classifier_provenance_store: Option<Arc<NeuralProvenanceStore>>,
/// Parameters bound to `$name` references inside rules and QUERY/RETURN
/// expressions. Equivalent to the parameter map passed to `db.query()`.
/// Keys are parameter names **without** the leading `$` (e.g., `"agent_id"`
/// binds `$agent_id`).
pub params: HashMap<String, Value>,
}
impl LocyConfig {
/// Resolve scattered probability-related fields into a single
/// [`ResolvedSemiringConfig`] for threading through the planner and
/// executors. Performs the `exact_probability` → `BddExact`
/// promotion (decision D-7) and rejects incoherent combinations
/// (decision D-9).
pub fn resolve(&self) -> Result<ResolvedSemiringConfig, ConfigError> {
let kind = match (self.semiring, self.exact_probability) {
(SemiringKind::AddMultProb, true) => SemiringKind::BddExact,
(SemiringKind::AddMultProb, false) => SemiringKind::AddMultProb,
(SemiringKind::MaxMinProb, true) => {
return Err(ConfigError::IncoherentSemiring {
semiring: SemiringKind::MaxMinProb,
message: "MaxMinProb cannot be combined with exact_probability=true; \
weighted model counting requires the AddMultProb semiring",
});
}
(SemiringKind::MaxMinProb, false) => SemiringKind::MaxMinProb,
(SemiringKind::BddExact, _) => SemiringKind::BddExact,
// Phase C C0: TopKProofs is its own correction story
// (per-tag DNF inclusion-exclusion) and is incoherent
// with `exact_probability` — that knob means "promote to
// whole-group BDD" and conflicts with the per-tag form.
(SemiringKind::TopKProofs { k: 0 }, _) => {
return Err(ConfigError::IncoherentSemiring {
semiring: SemiringKind::TopKProofs { k: 0 },
message: "TopKProofs requires k > 0; \
k=0 would retain no proofs and reduce to ⊥",
});
}
(SemiringKind::TopKProofs { .. }, true) => {
return Err(ConfigError::IncoherentSemiring {
semiring: self.semiring,
message: "TopKProofs cannot be combined with exact_probability=true; \
TopKProofs already provides its own correction via \
per-tag inclusion-exclusion (impl plan §3.0)",
});
}
(SemiringKind::TopKProofs { k }, false) => SemiringKind::TopKProofs { k },
};
Ok(ResolvedSemiringConfig {
kind,
strict_probability_domain: self.strict_probability_domain,
probability_epsilon: self.probability_epsilon,
max_bdd_variables: self.max_bdd_variables,
})
}
}
impl Default for LocyConfig {
fn default() -> Self {
Self {
max_iterations: 1000,
timeout: Duration::from_secs(300),
allow_partial: false,
max_explain_depth: 100,
max_slg_depth: 1000,
max_abduce_candidates: 20,
max_abduce_results: 10,
max_derived_bytes: 256 * 1024 * 1024,
deterministic_best_by: true,
strict_probability_domain: false,
probability_epsilon: 1e-15,
exact_probability: false,
max_bdd_variables: 1000,
top_k_proofs: 0,
top_k_proofs_training: None,
semiring: SemiringKind::AddMultProb,
// Phase C gate-closure: neural stack is GA; default flag
// flipped from false to true. Set to false for explicit
// opt-out (re-imposes the Phase B compile-time rejection).
neural_predicates_preview: true,
classifier_registry: HashMap::new(),
classifier_cache: None,
classifier_cache_max: 100_000,
classifier_provenance_store: None,
params: HashMap::new(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_default_is_add_mult_prob() {
let cfg = LocyConfig::default();
let resolved = cfg.resolve().unwrap();
assert_eq!(resolved.kind, SemiringKind::AddMultProb);
}
#[test]
fn resolve_promotes_exact_probability_to_bdd() {
let cfg = LocyConfig {
exact_probability: true,
..Default::default()
};
let resolved = cfg.resolve().unwrap();
assert_eq!(resolved.kind, SemiringKind::BddExact);
}
#[test]
fn resolve_rejects_maxmin_plus_exact() {
let cfg = LocyConfig {
exact_probability: true,
semiring: SemiringKind::MaxMinProb,
..Default::default()
};
assert!(matches!(
cfg.resolve(),
Err(ConfigError::IncoherentSemiring { .. })
));
}
#[test]
fn resolve_passes_maxmin_through() {
let cfg = LocyConfig {
semiring: SemiringKind::MaxMinProb,
..Default::default()
};
let resolved = cfg.resolve().unwrap();
assert_eq!(resolved.kind, SemiringKind::MaxMinProb);
}
#[test]
fn resolve_passes_topkproofs_through() {
let cfg = LocyConfig {
semiring: SemiringKind::TopKProofs { k: 4 },
..Default::default()
};
let resolved = cfg.resolve().unwrap();
assert_eq!(resolved.kind, SemiringKind::TopKProofs { k: 4 });
}
#[test]
fn resolve_rejects_topkproofs_k_zero() {
let cfg = LocyConfig {
semiring: SemiringKind::TopKProofs { k: 0 },
..Default::default()
};
assert!(matches!(
cfg.resolve(),
Err(ConfigError::IncoherentSemiring { .. })
));
}
#[test]
fn resolve_rejects_topkproofs_plus_exact_probability() {
let cfg = LocyConfig {
semiring: SemiringKind::TopKProofs { k: 4 },
exact_probability: true,
..Default::default()
};
assert!(matches!(
cfg.resolve(),
Err(ConfigError::IncoherentSemiring { .. })
));
}
}