Skip to main content

elenchus_compiler/
error.rs

1//! Compile errors and the shared "did you mean" suggestion helpers.
2use alloc::boxed::Box;
3use alloc::collections::BTreeMap;
4use alloc::string::String;
5use alloc::vec;
6use alloc::vec::Vec;
7use thiserror::Error;
8
9/// Anything that can go wrong while compiling (and resolving imports).
10#[derive(Debug, Error, PartialEq, Eq)]
11pub enum CompileError {
12    /// A source failed to parse; carries the full syntax diagnostics (every
13    /// error, each as a caret block with the keyword's correct syntax). The
14    /// source label is already inside the [`Diagnostics`] header.
15    #[error("{0}")]
16    Parse(elenchus_parser::Diagnostics),
17    /// A name was reused with a different body *within the same source*.
18    #[error("'{name}' redefined with a different body")]
19    PremiseRedefinition {
20        /// The clashing premise/rule name.
21        name: String,
22    },
23    /// A source did not declare its `DOMAIN` (required, once, as the first
24    /// statement).
25    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
26    MissingDomain {
27        /// The source label that lacked a `DOMAIN`.
28        file: String,
29    },
30    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
31    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
32    DuplicateDomain {
33        /// The source label with the duplicate `DOMAIN`.
34        file: String,
35    },
36    /// An atom referenced a `domain.` prefix that is not the file's own domain and
37    /// was not imported in this file.
38    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
39    UnknownDomain {
40        /// The unresolved domain prefix.
41        domain: String,
42    },
43    /// Two imports bound the same local domain name to different domains (use a
44    /// distinct `AS <alias>` to tell them apart).
45    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
46    DomainAliasClash {
47        /// The clashing local domain name.
48        alias: String,
49    },
50    /// An `IMPORT` target could not be loaded by the [`Resolver`].
51    #[error("import not found: {0}")]
52    ImportNotFound(String),
53    /// Imports form a cycle (a source transitively imports itself).
54    #[error("circular import: {0}")]
55    CircularImport(String),
56    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
57    /// disjunction (it would not know which literal to assert). Model it as a
58    /// `PREMISE` constraint instead.
59    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
60    RuleDisjunctiveConsequent {
61        /// The offending rule name.
62        name: String,
63    },
64    /// A reference used a value outside the closed set an `ONEOF` declared for that
65    /// variable. Almost always a typo: the misspelling would otherwise mint a new
66    /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
67    /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
68    /// Boxed so this comparatively large payload does not bloat every `Result`.
69    #[error(transparent)]
70    UnknownValue(Box<UnknownValue>),
71    /// A `FOR EACH … IN <set>` named a set that was never declared with `SET`.
72    /// Usually a typo in the set name; the suggestion offers the nearest declared
73    /// set when one is close.
74    #[error("{file}:{line}: FOR EACH ranges over '{set}', which is not a declared SET{suggestion}")]
75    UnknownSet {
76        /// The source the offending `FOR EACH` is in.
77        file: String,
78        /// 1-based line of the `FOR EACH`.
79        line: u32,
80        /// The undeclared set name that was referenced.
81        set: String,
82        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
83        suggestion: String,
84    },
85    /// `CLOSE <relation> TRANSITIVE` found a cycle: a node transitively reaches
86    /// itself. Transitive closure requires a DAG (e.g. a dependency graph).
87    #[error(
88        "{file}:{line}: relation '{relation}' has a cycle (`{node}` reaches itself) \
89         — CLOSE … TRANSITIVE requires a DAG"
90    )]
91    CyclicRelation {
92        /// The source the `CLOSE` is in.
93        file: String,
94        /// 1-based line of the `CLOSE`.
95        line: u32,
96        /// The relation predicate being closed.
97        relation: String,
98        /// A node on the cycle (reaches itself).
99        node: String,
100    },
101    /// A bare proposition (a single-word atom like `db_ready`) was used in the
102    /// program but never declared with `VAR`. Almost always a typo or a forgotten
103    /// declaration; the suggestion offers the nearest declared port when close.
104    #[error(
105        "{file}:{line}: '{name}' is a bare proposition but no VAR declares it \
106         — add `VAR {name}`{suggestion}"
107    )]
108    UndeclaredPort {
109        /// The source the offending reference is in.
110        file: String,
111        /// 1-based line of the offending reference.
112        line: u32,
113        /// The undeclared bare-proposition name.
114        name: String,
115        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
116        suggestion: String,
117    },
118    /// An external value (`--set`, API, or `PROVIDE`) named a port that no `VAR`
119    /// declares. Strict by design: silently ignoring an unknown key would hide a
120    /// mistake.
121    #[error("no VAR declares the port '{name}' that an external value sets{suggestion}")]
122    UnknownPort {
123        /// The supplied key that matches no declared port.
124        name: String,
125        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
126        suggestion: String,
127    },
128    /// An external value named a bare target declared in more than one domain, so
129    /// which one it sets is ambiguous. Resolve it by qualifying the key with a
130    /// `domain.` prefix.
131    #[error(
132        "'{name}' is declared in multiple domains ({domains}); qualify which one as `<domain>.{name}`"
133    )]
134    AmbiguousPort {
135        /// The ambiguous bare name (port or atom subject).
136        name: String,
137        /// The domains that declare this name, comma-joined (sorted).
138        domains: String,
139    },
140    /// A multi-word external value (`PROVIDE engine has_fuel: true` or a `--set`/API
141    /// key with a predicate) named an atom that no statement in the program uses.
142    /// Strict by design — like [`CompileError::UnknownPort`], a typo must not be
143    /// silently injected as a new fact.
144    #[error(
145        "no atom '{name}' is used in the program, so an external value cannot set it \
146         (use it in a FACT/PREMISE/RULE, or fix a typo)"
147    )]
148    UnknownExternalAtom {
149        /// The atom reference (e.g. `engine has_fuel`) that matched nothing.
150        name: String,
151    },
152    /// Two sources supplied different values for the same port. Ambiguity is a hard
153    /// error: the engine is about determinism, so it never silently picks one.
154    #[error(
155        "the port '{name}' is set to two different values: {a_value} (from {a_origin}) \
156         and {b_value} (from {b_origin})"
157    )]
158    PortConflict {
159        /// The port set inconsistently.
160        name: String,
161        /// The first binding's value.
162        a_value: bool,
163        /// The first binding's origin tag.
164        a_origin: String,
165        /// The second (conflicting) binding's value.
166        b_value: bool,
167        /// The second binding's origin tag.
168        b_origin: String,
169    },
170    /// A data file (loaded via `--data`) contained a statement other than `PROVIDE`
171    /// (or `DOMAIN`). Data files carry only values, never logic.
172    #[error("{file}:{line}: a data file may only contain PROVIDE (and DOMAIN), not this statement")]
173    DataFileStatement {
174        /// The data file with the offending statement.
175        file: String,
176        /// 1-based line of the offending statement.
177        line: u32,
178    },
179}
180
181/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
182/// in its own (boxed) struct so the common error path stays small.
183#[derive(Debug, Error, PartialEq, Eq)]
184#[error(
185    "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
186     — ONEOF declares {{ {declared} }}{suggestion}"
187)]
188pub struct UnknownValue {
189    /// The source the offending reference is in.
190    pub file: String,
191    /// 1-based line of the offending reference.
192    pub line: u32,
193    /// The variable's subject.
194    pub subject: String,
195    /// The variable's predicate.
196    pub predicate: String,
197    /// The out-of-set value that was used.
198    pub value: String,
199    /// The declared legal values, comma-joined (sorted).
200    pub declared: String,
201    /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
202    pub suggestion: String,
203}
204
205// --- raw (key-based) intermediate, before interning ------------------------
206// While accumulating we key everything by `AtomKey` (the owned triple) rather
207// than by `AtomId`, because ids only become stable once *all* sources are merged
208// and the atom set is sorted in `finalize`. These mirror the public IR types but
209// hold keys instead of ids.
210
211/// `" — did you mean \`x\`?"` for an undeclared set name, or empty when no
212/// declared set name is close enough.
213pub(crate) fn nearest_set_suggestion(set: &str, sets: &BTreeMap<String, Vec<String>>) -> String {
214    let names: Vec<&str> = sets.keys().map(String::as_str).collect();
215    did_you_mean(set, &names)
216}
217
218// --- helpers ---------------------------------------------------------------
219
220/// Levenshtein edit distance over Unicode scalars (rolling two-row DP). Small
221/// inputs (atom/value names), so the simple DP is plenty. The one edit-distance
222/// implementation in the workspace: the compiler's "did you mean" suggestions
223/// (via [`nearest`]) and the solver's typo-hint lint both build on it.
224pub fn levenshtein(a: &[char], b: &[char]) -> usize {
225    let mut prev: Vec<usize> = (0..=b.len()).collect();
226    let mut cur = vec![0usize; b.len() + 1];
227    for (i, &ca) in a.iter().enumerate() {
228        cur[0] = i + 1;
229        for (j, &cb) in b.iter().enumerate() {
230            let cost = usize::from(ca != cb);
231            cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
232        }
233        core::mem::swap(&mut prev, &mut cur);
234    }
235    prev[b.len()]
236}
237
238/// The closest candidate to `word` within an edit-distance threshold, or `None`
239/// when nothing is close enough to be a useful "did you mean".
240///
241/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
242/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
243/// is offered. This is deliberate — for very short tokens (a single CJK character,
244/// where one symbol is a whole word, or a two-letter code) every other short value
245/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
246/// rejection itself is exact (set membership), so suppressing the guess never hides
247/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
248/// or two, mirroring the spirit of the solver's typo-hint lint.
249pub(crate) fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
250    let budget = word.chars().count() / 3;
251    if budget == 0 {
252        return None;
253    }
254    let w: Vec<char> = word.chars().collect();
255    candidates
256        .iter()
257        .map(|&c| (levenshtein(&w, &c.chars().collect::<Vec<char>>()), c))
258        .filter(|&(d, _)| d <= budget)
259        .min_by_key(|&(d, _)| d)
260        .map(|(_, c)| c)
261}
262
263/// `" — did you mean `x`?"` for the nearest candidate to `word`, or empty when
264/// none is close enough. The single spelling of the suggestion suffix, shared by
265/// every "unknown name" diagnostic (values, sets, …).
266pub(crate) fn did_you_mean(word: &str, candidates: &[&str]) -> String {
267    match nearest(word, candidates) {
268        Some(s) => alloc::format!(" — did you mean `{s}`?"),
269        None => String::new(),
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use alloc::vec::Vec;
277
278    #[test]
279    fn levenshtein_basics() {
280        // The canonical distance works on char slices; spell the string cases
281        // through a tiny adapter so the table below reads as before.
282        fn lev(a: &str, b: &str) -> usize {
283            levenshtein(
284                &a.chars().collect::<Vec<char>>(),
285                &b.chars().collect::<Vec<char>>(),
286            )
287        }
288        assert_eq!(lev("", ""), 0);
289        assert_eq!(lev("abc", "abc"), 0);
290        assert_eq!(lev("censoredmtp", "censored_mtp"), 1);
291        assert_eq!(lev("norml", "normal"), 1);
292        assert_eq!(lev("kitten", "sitting"), 3);
293    }
294
295    #[test]
296    fn nearest_respects_the_length_budget() {
297        let cands = ["censored", "censored_mtp", "uncensored"];
298        assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
299        // "zzz" is far from all; no suggestion.
300        assert_eq!(nearest("zzz", &cands), None);
301    }
302
303    #[test]
304    fn nearest_offers_nothing_for_very_short_values() {
305        // 1–2 character values get a budget of 0: every other short token is at
306        // distance 1, so a "did you mean" carries no signal. True for single CJK
307        // characters (one symbol = a whole word) and for two-letter codes alike.
308        assert_eq!(nearest("七", &["一", "二", "三"]), None);
309        assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
310        // A multi-character CJK word still gets a sensible nearest (one wrong
311        // character = distance 1, budget = 3/3 = 1).
312        assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
313    }
314}