elenchus_compiler/error.rs
1//! Compile errors and the shared "did you mean" suggestion helpers.
2use alloc::boxed::Box;
3use alloc::collections::BTreeMap;
4use alloc::string::String;
5use alloc::vec;
6use alloc::vec::Vec;
7use thiserror::Error;
8
9/// Anything that can go wrong while compiling (and resolving imports).
10#[derive(Debug, Error, PartialEq, Eq)]
11pub enum CompileError {
12 /// A source failed to parse; carries the full syntax diagnostics (every
13 /// error, each as a caret block with the keyword's correct syntax). The
14 /// source label is already inside the [`Diagnostics`] header.
15 #[error("{0}")]
16 Parse(elenchus_parser::Diagnostics),
17 /// A name was reused with a different body *within the same source*.
18 #[error("'{name}' redefined with a different body")]
19 PremiseRedefinition {
20 /// The clashing premise/rule name.
21 name: String,
22 },
23 /// A source did not declare its `DOMAIN` (required, once, as the first
24 /// statement).
25 #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
26 MissingDomain {
27 /// The source label that lacked a `DOMAIN`.
28 file: String,
29 },
30 /// A source declared `DOMAIN` more than once (a file has exactly one domain).
31 #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
32 DuplicateDomain {
33 /// The source label with the duplicate `DOMAIN`.
34 file: String,
35 },
36 /// An atom referenced a `domain.` prefix that is not the file's own domain and
37 /// was not imported in this file.
38 #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
39 UnknownDomain {
40 /// The unresolved domain prefix.
41 domain: String,
42 },
43 /// Two imports bound the same local domain name to different domains (use a
44 /// distinct `AS <alias>` to tell them apart).
45 #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
46 DomainAliasClash {
47 /// The clashing local domain name.
48 alias: String,
49 },
50 /// An `IMPORT` target could not be loaded by the [`Resolver`].
51 #[error("import not found: {0}")]
52 ImportNotFound(String),
53 /// Imports form a cycle (a source transitively imports itself).
54 #[error("circular import: {0}")]
55 CircularImport(String),
56 /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
57 /// disjunction (it would not know which literal to assert). Model it as a
58 /// `PREMISE` constraint instead.
59 #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
60 RuleDisjunctiveConsequent {
61 /// The offending rule name.
62 name: String,
63 },
64 /// A reference used a value outside the closed set an `ONEOF` declared for that
65 /// variable. Almost always a typo: the misspelling would otherwise mint a new
66 /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
67 /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
68 /// Boxed so this comparatively large payload does not bloat every `Result`.
69 #[error(transparent)]
70 UnknownValue(Box<UnknownValue>),
71 /// A `FOR EACH … IN <set>` named a set that was never declared with `SET`.
72 /// Usually a typo in the set name; the suggestion offers the nearest declared
73 /// set when one is close.
74 #[error("{file}:{line}: FOR EACH ranges over '{set}', which is not a declared SET{suggestion}")]
75 UnknownSet {
76 /// The source the offending `FOR EACH` is in.
77 file: String,
78 /// 1-based line of the `FOR EACH`.
79 line: u32,
80 /// The undeclared set name that was referenced.
81 set: String,
82 /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
83 suggestion: String,
84 },
85 /// `CLOSE <relation> TRANSITIVE` found a cycle: a node transitively reaches
86 /// itself. Transitive closure requires a DAG (e.g. a dependency graph).
87 #[error(
88 "{file}:{line}: relation '{relation}' has a cycle (`{node}` reaches itself) \
89 — CLOSE … TRANSITIVE requires a DAG"
90 )]
91 CyclicRelation {
92 /// The source the `CLOSE` is in.
93 file: String,
94 /// 1-based line of the `CLOSE`.
95 line: u32,
96 /// The relation predicate being closed.
97 relation: String,
98 /// A node on the cycle (reaches itself).
99 node: String,
100 },
101 /// A bare proposition (a single-word atom like `db_ready`) was used in the
102 /// program but never declared with `VAR`. Almost always a typo or a forgotten
103 /// declaration; the suggestion offers the nearest declared port when close.
104 #[error(
105 "{file}:{line}: '{name}' is a bare proposition but no VAR declares it \
106 — add `VAR {name}`{suggestion}"
107 )]
108 UndeclaredPort {
109 /// The source the offending reference is in.
110 file: String,
111 /// 1-based line of the offending reference.
112 line: u32,
113 /// The undeclared bare-proposition name.
114 name: String,
115 /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
116 suggestion: String,
117 },
118 /// An external value (`--set`, API, or `PROVIDE`) named a port that no `VAR`
119 /// declares. Strict by design: silently ignoring an unknown key would hide a
120 /// mistake.
121 #[error("no VAR declares the port '{name}' that an external value sets{suggestion}")]
122 UnknownPort {
123 /// The supplied key that matches no declared port.
124 name: String,
125 /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
126 suggestion: String,
127 },
128 /// An external value named a bare target declared in more than one domain, so
129 /// which one it sets is ambiguous. Resolve it by qualifying the key with a
130 /// `domain.` prefix.
131 #[error(
132 "'{name}' is declared in multiple domains ({domains}); qualify which one as `<domain>.{name}`"
133 )]
134 AmbiguousPort {
135 /// The ambiguous bare name (port or atom subject).
136 name: String,
137 /// The domains that declare this name, comma-joined (sorted).
138 domains: String,
139 },
140 /// A multi-word external value (`PROVIDE engine has_fuel: true` or a `--set`/API
141 /// key with a predicate) named an atom that no statement in the program uses.
142 /// Strict by design — like [`CompileError::UnknownPort`], a typo must not be
143 /// silently injected as a new fact.
144 #[error(
145 "no atom '{name}' is used in the program, so an external value cannot set it \
146 (use it in a FACT/PREMISE/RULE, or fix a typo)"
147 )]
148 UnknownExternalAtom {
149 /// The atom reference (e.g. `engine has_fuel`) that matched nothing.
150 name: String,
151 },
152 /// Two sources supplied different values for the same port. Ambiguity is a hard
153 /// error: the engine is about determinism, so it never silently picks one.
154 #[error(
155 "the port '{name}' is set to two different values: {a_value} (from {a_origin}) \
156 and {b_value} (from {b_origin})"
157 )]
158 PortConflict {
159 /// The port set inconsistently.
160 name: String,
161 /// The first binding's value.
162 a_value: bool,
163 /// The first binding's origin tag.
164 a_origin: String,
165 /// The second (conflicting) binding's value.
166 b_value: bool,
167 /// The second binding's origin tag.
168 b_origin: String,
169 },
170 /// A data file (loaded via `--data`) contained a statement other than `PROVIDE`
171 /// (or `DOMAIN`). Data files carry only values, never logic.
172 #[error("{file}:{line}: a data file may only contain PROVIDE (and DOMAIN), not this statement")]
173 DataFileStatement {
174 /// The data file with the offending statement.
175 file: String,
176 /// 1-based line of the offending statement.
177 line: u32,
178 },
179}
180
181/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
182/// in its own (boxed) struct so the common error path stays small.
183#[derive(Debug, Error, PartialEq, Eq)]
184#[error(
185 "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
186 — ONEOF declares {{ {declared} }}{suggestion}"
187)]
188pub struct UnknownValue {
189 /// The source the offending reference is in.
190 pub file: String,
191 /// 1-based line of the offending reference.
192 pub line: u32,
193 /// The variable's subject.
194 pub subject: String,
195 /// The variable's predicate.
196 pub predicate: String,
197 /// The out-of-set value that was used.
198 pub value: String,
199 /// The declared legal values, comma-joined (sorted).
200 pub declared: String,
201 /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
202 pub suggestion: String,
203}
204
205// --- raw (key-based) intermediate, before interning ------------------------
206// While accumulating we key everything by `AtomKey` (the owned triple) rather
207// than by `AtomId`, because ids only become stable once *all* sources are merged
208// and the atom set is sorted in `finalize`. These mirror the public IR types but
209// hold keys instead of ids.
210
211/// `" — did you mean \`x\`?"` for an undeclared set name, or empty when no
212/// declared set name is close enough.
213pub(crate) fn nearest_set_suggestion(set: &str, sets: &BTreeMap<String, Vec<String>>) -> String {
214 let names: Vec<&str> = sets.keys().map(String::as_str).collect();
215 did_you_mean(set, &names)
216}
217
218// --- helpers ---------------------------------------------------------------
219
220/// Levenshtein edit distance over Unicode scalars (rolling two-row DP). Small
221/// inputs (atom/value names), so the simple DP is plenty. The one edit-distance
222/// implementation in the workspace: the compiler's "did you mean" suggestions
223/// (via [`nearest`]) and the solver's typo-hint lint both build on it.
224pub fn levenshtein(a: &[char], b: &[char]) -> usize {
225 let mut prev: Vec<usize> = (0..=b.len()).collect();
226 let mut cur = vec![0usize; b.len() + 1];
227 for (i, &ca) in a.iter().enumerate() {
228 cur[0] = i + 1;
229 for (j, &cb) in b.iter().enumerate() {
230 let cost = usize::from(ca != cb);
231 cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
232 }
233 core::mem::swap(&mut prev, &mut cur);
234 }
235 prev[b.len()]
236}
237
238/// The closest candidate to `word` within an edit-distance threshold, or `None`
239/// when nothing is close enough to be a useful "did you mean".
240///
241/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
242/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
243/// is offered. This is deliberate — for very short tokens (a single CJK character,
244/// where one symbol is a whole word, or a two-letter code) every other short value
245/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
246/// rejection itself is exact (set membership), so suppressing the guess never hides
247/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
248/// or two, mirroring the spirit of the solver's typo-hint lint.
249pub(crate) fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
250 let budget = word.chars().count() / 3;
251 if budget == 0 {
252 return None;
253 }
254 let w: Vec<char> = word.chars().collect();
255 candidates
256 .iter()
257 .map(|&c| (levenshtein(&w, &c.chars().collect::<Vec<char>>()), c))
258 .filter(|&(d, _)| d <= budget)
259 .min_by_key(|&(d, _)| d)
260 .map(|(_, c)| c)
261}
262
263/// `" — did you mean `x`?"` for the nearest candidate to `word`, or empty when
264/// none is close enough. The single spelling of the suggestion suffix, shared by
265/// every "unknown name" diagnostic (values, sets, …).
266pub(crate) fn did_you_mean(word: &str, candidates: &[&str]) -> String {
267 match nearest(word, candidates) {
268 Some(s) => alloc::format!(" — did you mean `{s}`?"),
269 None => String::new(),
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use alloc::vec::Vec;
277
278 #[test]
279 fn levenshtein_basics() {
280 // The canonical distance works on char slices; spell the string cases
281 // through a tiny adapter so the table below reads as before.
282 fn lev(a: &str, b: &str) -> usize {
283 levenshtein(
284 &a.chars().collect::<Vec<char>>(),
285 &b.chars().collect::<Vec<char>>(),
286 )
287 }
288 assert_eq!(lev("", ""), 0);
289 assert_eq!(lev("abc", "abc"), 0);
290 assert_eq!(lev("censoredmtp", "censored_mtp"), 1);
291 assert_eq!(lev("norml", "normal"), 1);
292 assert_eq!(lev("kitten", "sitting"), 3);
293 }
294
295 #[test]
296 fn nearest_respects_the_length_budget() {
297 let cands = ["censored", "censored_mtp", "uncensored"];
298 assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
299 // "zzz" is far from all; no suggestion.
300 assert_eq!(nearest("zzz", &cands), None);
301 }
302
303 #[test]
304 fn nearest_offers_nothing_for_very_short_values() {
305 // 1–2 character values get a budget of 0: every other short token is at
306 // distance 1, so a "did you mean" carries no signal. True for single CJK
307 // characters (one symbol = a whole word) and for two-letter codes alike.
308 assert_eq!(nearest("七", &["一", "二", "三"]), None);
309 assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
310 // A multi-character CJK word still gets a sensible nearest (one wrong
311 // character = distance 1, budget = 3/3 = 1).
312 assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
313 }
314}