Skip to main content

uni_common/api/
error.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use std::path::PathBuf;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8#[non_exhaustive]
9pub enum UniError {
10    #[error("Database not found: {path}")]
11    NotFound { path: PathBuf },
12
13    #[error("Schema error: {message}")]
14    Schema { message: String },
15
16    #[error("Parse error: {message}")]
17    Parse {
18        message: String,
19        position: Option<usize>,
20        line: Option<usize>,
21        column: Option<usize>,
22        context: Option<String>,
23    },
24
25    #[error("Query error: {message}")]
26    Query {
27        message: String,
28        query: Option<String>,
29    },
30
31    #[error("Transaction error: {message}")]
32    Transaction { message: String },
33
34    #[error("Transaction conflict: {message}")]
35    TransactionConflict { message: String },
36
37    #[error("Transaction already completed")]
38    TransactionAlreadyCompleted,
39
40    /// Operation not supported on read-only database
41    #[error("Operation '{operation}' not supported on read-only database")]
42    ReadOnly { operation: String },
43
44    /// Label not found in schema
45    #[error("Label '{label}' not found in schema")]
46    LabelNotFound { label: String },
47
48    /// Edge type not found in schema
49    #[error("Edge type '{edge_type}' not found in schema")]
50    EdgeTypeNotFound { edge_type: String },
51
52    /// Property not found on node/edge
53    #[error("Property '{property}' not found on {entity_type} with label '{label}'")]
54    PropertyNotFound {
55        property: String,
56        entity_type: String, // "node" or "edge"
57        label: String,
58    },
59
60    /// Index not found
61    #[error("Index '{index}' not found")]
62    IndexNotFound { index: String },
63
64    /// Snapshot not found
65    #[error("Snapshot '{snapshot_id}' not found")]
66    SnapshotNotFound { snapshot_id: String },
67
68    /// Query memory limit exceeded
69    #[error("Query exceeded memory limit of {limit_bytes} bytes")]
70    MemoryLimitExceeded { limit_bytes: usize },
71
72    #[error("Database is locked by another process")]
73    DatabaseLocked,
74
75    #[error("Operation timed out after {timeout_ms}ms")]
76    Timeout { timeout_ms: u64 },
77
78    /// A Locy program stopped before reaching its least fixed point because it
79    /// exceeded its wall-clock `timeout` or its `max_iterations` cap.
80    ///
81    /// This is the default outcome of an over-budget evaluation: partial results
82    /// are *not* returned silently. The boxed [`LocyIncomplete`] carries the
83    /// diagnostics (which rules were skipped, which complement rules are now
84    /// unsound, how far evaluation got). The partial facts themselves are not
85    /// embedded here — to recover them, re-run with `allow_partial` set, which
86    /// returns `Ok` with the partial result instead of this error.
87    #[error("Locy evaluation incomplete: {detail}")]
88    LocyIncomplete { detail: Box<LocyIncomplete> },
89
90    #[error("Type error: expected {expected}, got {actual}")]
91    Type { expected: String, actual: String },
92
93    #[error("Constraint violation: {message}")]
94    Constraint { message: String },
95
96    /// A transaction was aborted at commit because a concurrent transaction
97    /// committed a conflicting write since this transaction began (optimistic
98    /// concurrency control). The transaction may be safely retried.
99    #[error("Serialization conflict: {message}")]
100    SerializationConflict { message: String },
101
102    /// A transaction was aborted at commit because a concurrent transaction
103    /// committed a row with the same unique key (serializable MERGE). The
104    /// transaction may be safely retried, which will observe the existing row.
105    #[error("Constraint conflict: {message}")]
106    ConstraintConflict { message: String },
107
108    #[error("Storage error: {message}")]
109    Storage {
110        message: String,
111        #[source]
112        source: Option<Box<dyn std::error::Error + Send + Sync>>,
113    },
114
115    #[error("IO error: {0}")]
116    Io(#[from] std::io::Error),
117
118    #[error("Internal error: {0}")]
119    Internal(#[from] anyhow::Error),
120
121    #[error("Invalid identifier '{name}': {reason}")]
122    InvalidIdentifier { name: String, reason: String },
123
124    #[error("Label '{label}' already exists")]
125    LabelAlreadyExists { label: String },
126
127    #[error("Edge type '{edge_type}' already exists")]
128    EdgeTypeAlreadyExists { edge_type: String },
129
130    #[error("Permission denied: {action}")]
131    PermissionDenied { action: String },
132
133    #[error("Argument '{arg}' is invalid: {message}")]
134    InvalidArgument { arg: String, message: String },
135
136    /// Write context (transaction, bulk writer, or appender) is already active on session.
137    #[error("A write context is already active on session '{session_id}'")]
138    WriteContextAlreadyActive {
139        session_id: String,
140        hint: &'static str,
141    },
142
143    /// Transaction commit timed out waiting for the global writer lock.
144    #[error("Transaction '{tx_id}' commit timed out")]
145    CommitTimeout { tx_id: String, hint: &'static str },
146
147    /// A `FOR UPDATE` pessimistic row lock could not be acquired within the
148    /// deadline — the holder is another live transaction (contention or a
149    /// lock-ordering deadlock). Unlike a plain [`UniError::Timeout`] (a slow
150    /// operation that would just time out again), this is transient: a fresh
151    /// transaction can retry and win the lock once the holder releases it, so
152    /// it is classified retriable. See `is_retriable`.
153    #[error("FOR UPDATE lock acquisition timed out after {timeout_ms}ms")]
154    LockTimeout { timeout_ms: u64 },
155
156    /// Transaction exceeded its deadline.
157    #[error("Transaction '{tx_id}' expired")]
158    TransactionExpired { tx_id: String, hint: &'static str },
159
160    /// Operation was cancelled via a cancellation token.
161    #[error("Operation cancelled")]
162    Cancelled,
163
164    /// Derived facts are stale relative to the current database version.
165    #[error("Derived facts are stale: version gap is {version_gap}")]
166    StaleDerivedFacts { version_gap: u64 },
167
168    /// A Locy rule conflict was detected during transaction commit rule promotion.
169    #[error("Rule conflict: rule '{rule_name}' conflicts during promotion")]
170    RuleConflict { rule_name: String },
171
172    /// A session hook rejected the operation.
173    #[error("Hook rejected: {message}")]
174    HookRejected { message: String },
175
176    /// A synchronous trigger returned `TriggerOutcome::Reject` (or `Err`)
177    /// during a `BeforeMutation` / `BeforeCommit` phase, aborting commit.
178    #[error("Trigger '{trigger}' rejected commit: {reason}")]
179    TriggerRejected { trigger: String, reason: String },
180
181    /// Authentication failed (M5i). Raised when
182    /// `Uni::session_with_credentials` cannot find a matching
183    /// `AuthProvider` or the matched provider rejects the credentials.
184    #[error("Authentication failed: {reason}")]
185    AuthenticationFailed {
186        /// Human-readable failure reason.
187        reason: String,
188    },
189
190    /// An `AuthzPolicy::check` returned `Decision::Deny` for the
191    /// current principal (M5i).
192    #[error("Authorization denied: {reason}")]
193    AuthorizationDenied {
194        /// Reason from the deciding policy.
195        reason: String,
196    },
197
198    /// A write was attempted against an ephemeral (transient, in-query)
199    /// node or edge — i.e. one whose `Vid` / `Eid` has the
200    /// `EPHEMERAL_BIT` set. Ephemeral entities are return-only
201    /// projections; SET / DELETE / MERGE against them must fail before
202    /// they reach storage (M5g / proposal §4.13.1).
203    #[error("Cannot mutate ephemeral {kind} {id}: ephemeral entities are return-only")]
204    EphemeralWriteAttempt {
205        /// `"node"` or `"edge"`.
206        kind: &'static str,
207        /// Transient id (bottom 63 bits) for diagnostic output.
208        id: u64,
209    },
210
211    /// Fork with the given name does not exist in the registry.
212    #[error("Fork '{name}' not found")]
213    ForkNotFound { name: String },
214
215    /// `session.fork(name).new_()` was called against an existing fork.
216    #[error("Fork '{name}' already exists")]
217    ForkAlreadyExists { name: String },
218
219    /// Phase-1 gate: writes through `forked_session.tx()` are blocked
220    /// until Phase 2 lands. Reads, `locy()`, and admin paths work.
221    #[error(
222        "Writes on a forked session are not yet supported (Phase 2); reads, locy, and admin paths work"
223    )]
224    ForkWritesNotYetSupported,
225
226    /// Drop refused because forked sessions are still alive on the fork.
227    #[error("Fork '{name}' is held by {holder_count} live session(s); drop refused")]
228    ForkInUse { name: String, holder_count: usize },
229
230    /// Drop refused because a transaction has uncommitted mutations on the
231    /// fork. Commit or roll back the transaction first, then retry drop.
232    #[error("Fork '{name}' has uncommitted transaction state; commit or rollback first")]
233    ForkInflightTx { name: String },
234
235    /// Drop refused because the fork has pending async flushes that did
236    /// not drain within `UniConfig::drop_fork_drain_timeout`. Either retry
237    /// later (the streams will eventually complete) or raise the timeout.
238    #[error("Fork '{name}' has pending flushes that did not drain within timeout")]
239    PendingFlushTimeout { name: String },
240
241    /// Registry on disk is malformed (corrupt JSON, missing required field, etc.).
242    #[error("Fork registry is corrupt: {message}")]
243    ForkCorruptRegistry { message: String },
244
245    /// Drop refused because this fork has nested children. Use
246    /// `drop_fork_cascade` to remove the whole subtree, or drop the
247    /// children individually first.
248    #[error(
249        "Fork '{name}' has nested children {children:?}; use drop_fork_cascade or drop them first"
250    )]
251    ForkHasChildren { name: String, children: Vec<String> },
252
253    /// `drop_fork_cascade` refused because at least one fork in the
254    /// subtree has live sessions or in-flight transactions. No branch
255    /// has been deleted yet — the cascade is atomic at the validation
256    /// step. Resolve the blockers and retry.
257    #[error("Fork subtree cannot be dropped: {blockers:?}")]
258    ForkSubtreeInUse { blockers: Vec<String> },
259
260    /// `Session::fork(name)` refused because the configured `max_forks`
261    /// budget is at capacity. Drop existing forks (or wait for the
262    /// sweeper to reap expired ones) and retry. Counts include Active,
263    /// Pending, and Tombstoned entries.
264    #[error("Fork budget exceeded: {current}/{max} forks; drop one or raise UniConfig::max_forks")]
265    ForkBudgetExceeded { current: usize, max: usize },
266
267    /// 2PC step on a fork lifecycle operation failed.
268    ///
269    /// `stage` names the step (`registry_pending`, `create_branch`,
270    /// `registry_active`, `tombstone`, `delete_branch`, `registry_clear`,
271    /// `backend_unsupported`, `recovery`) so recovery and humans can
272    /// triage without parsing prose.
273    #[error("Fork '{name}' lifecycle failed at stage '{stage}': {source}")]
274    ForkLifecycle {
275        name: String,
276        stage: &'static str,
277        #[source]
278        source: Box<dyn std::error::Error + Send + Sync>,
279    },
280}
281
282impl UniError {
283    /// Returns `true` when retrying the failed operation from scratch may succeed.
284    ///
285    /// Distinguishes transient contention failures — optimistic-concurrency
286    /// aborts and lock/commit timeouts, which a fresh transaction can win — from
287    /// deterministic failures (bad query, schema or type violation) that would
288    /// fail identically on retry. This is the signal
289    /// [`Session::transact_with_retry`](../../../uni_db/api/session/struct.Session.html)
290    /// uses to decide whether to re-run a transaction closure.
291    ///
292    /// `TransactionExpired` is deliberately *not* retriable here: a fresh
293    /// transaction gets a new deadline, but the helper treats deadline expiry as
294    /// a caller-set budget, not a contention signal. A plain `Timeout` is
295    /// likewise *not* retriable — re-running the same slow operation would just
296    /// time out again; only `CommitTimeout` (lock contention at the commit point)
297    /// and `LockTimeout` (a contended `FOR UPDATE` row lock / deadlock) signal
298    /// retriable contention.
299    ///
300    /// # Examples
301    /// ```
302    /// use uni_common::UniError;
303    ///
304    /// assert!(UniError::SerializationConflict { message: "lost update".into() }.is_retriable());
305    /// assert!(!UniError::Schema { message: "no such label".into() }.is_retriable());
306    /// ```
307    #[must_use]
308    pub fn is_retriable(&self) -> bool {
309        matches!(
310            self,
311            UniError::SerializationConflict { .. }
312                | UniError::ConstraintConflict { .. }
313                | UniError::TransactionConflict { .. }
314                | UniError::CommitTimeout { .. }
315                | UniError::LockTimeout { .. }
316        )
317    }
318}
319
320pub type Result<T> = std::result::Result<T, UniError>;
321
322/// Why a Locy evaluation stopped before reaching its least fixed point.
323///
324/// A wall-clock timeout and a non-convergence failure are both *incomplete*
325/// outcomes, but they call for different remedies (raise the timeout / fix a
326/// slow rule vs. raise `max_iterations` / fix a non-monotone rule), so they are
327/// reported distinctly rather than collapsed into one flag.
328#[derive(Debug, Clone, Copy, PartialEq, Eq)]
329pub enum LocyIncompleteReason {
330    /// The wall-clock `timeout` budget was exhausted mid-evaluation.
331    Timeout,
332    /// A recursive stratum hit `max_iterations` without converging.
333    IterationLimit,
334}
335
336impl LocyIncompleteReason {
337    /// Returns a stable machine-readable tag (`"timeout"` / `"iteration_limit"`).
338    ///
339    /// Used as the discriminator surfaced to non-Rust callers (e.g. the Python
340    /// bindings), where matching on a Rust enum is not available.
341    #[must_use]
342    pub fn as_str(self) -> &'static str {
343        match self {
344            LocyIncompleteReason::Timeout => "timeout",
345            LocyIncompleteReason::IterationLimit => "iteration_limit",
346        }
347    }
348}
349
350impl std::fmt::Display for LocyIncompleteReason {
351    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
352        f.write_str(self.as_str())
353    }
354}
355
356/// Diagnostics describing a Locy evaluation that stopped before completing.
357///
358/// Returned (boxed) inside [`UniError::LocyIncomplete`] when a program exceeds
359/// its time or iteration budget, and also attached to a `LocyResult` when the
360/// caller opts into partial results. The rule lists exist so a caller can tell
361/// "not evaluated" apart from "genuinely empty": any rule named in
362/// `incomplete_rules` or `skipped_rules` may be missing facts purely because
363/// evaluation was cut short, so a zero-row count for it is not authoritative.
364///
365/// # Examples
366/// ```
367/// use uni_common::{LocyIncomplete, LocyIncompleteReason};
368///
369/// let detail = LocyIncomplete {
370///     reason: LocyIncompleteReason::Timeout,
371///     elapsed_ms: 305_000,
372///     limit_ms: 300_000,
373///     max_iterations: 1000,
374///     completed_strata: 2,
375///     total_strata: 4,
376///     incomplete_rules: vec!["upstream_reaches".into()],
377///     skipped_rules: vec!["healthy_assets".into()],
378///     complement_rules_affected: vec!["healthy_assets".into()],
379/// };
380/// assert!(detail.to_string().contains("timeout"));
381/// assert!(detail.to_string().contains("UNSOUND"));
382/// ```
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct LocyIncomplete {
385    /// Why evaluation stopped.
386    pub reason: LocyIncompleteReason,
387    /// Wall-clock time elapsed when evaluation was cut short, in milliseconds.
388    pub elapsed_ms: u64,
389    /// The configured wall-clock `timeout`, in milliseconds.
390    pub limit_ms: u64,
391    /// The configured `max_iterations` cap for recursive strata.
392    pub max_iterations: usize,
393    /// Number of strata fully evaluated before the cutoff.
394    pub completed_strata: usize,
395    /// Total number of strata in the program.
396    pub total_strata: usize,
397    /// Rules in the stratum that was interrupted mid-evaluation. Their facts may
398    /// be a partial fixpoint rather than the least fixed point.
399    pub incomplete_rules: Vec<String>,
400    /// Rules in strata that were never reached. They derived no facts solely
401    /// because evaluation stopped first, not because their result is empty.
402    pub skipped_rules: Vec<String>,
403    /// Subset of the incomplete/skipped rules that use an `IS NOT` complement.
404    /// Stratified negation over a partial relation is unsound, so these results
405    /// must not be trusted at all — surfaced separately for emphasis.
406    pub complement_rules_affected: Vec<String>,
407}
408
409impl std::fmt::Display for LocyIncomplete {
410    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
411        write!(
412            f,
413            "{reason} after {elapsed_ms}ms (limit {limit_ms}ms, max_iterations {max_iters}); \
414             evaluated {done}/{total} strata, {n_incomplete} rule(s) incomplete, \
415             {n_skipped} rule(s) skipped",
416            reason = self.reason,
417            elapsed_ms = self.elapsed_ms,
418            limit_ms = self.limit_ms,
419            max_iters = self.max_iterations,
420            done = self.completed_strata,
421            total = self.total_strata,
422            n_incomplete = self.incomplete_rules.len(),
423            n_skipped = self.skipped_rules.len(),
424        )?;
425        if !self.complement_rules_affected.is_empty() {
426            write!(
427                f,
428                "; UNSOUND complement rule(s) affected: {:?}",
429                self.complement_rules_affected
430            )?;
431        }
432        Ok(())
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    use super::*;
439
440    #[test]
441    fn retriable_errors_are_contention_failures() {
442        let s = String::new;
443        let retriable = [
444            UniError::SerializationConflict { message: s() },
445            UniError::ConstraintConflict { message: s() },
446            UniError::TransactionConflict { message: s() },
447            UniError::CommitTimeout {
448                tx_id: s(),
449                hint: "",
450            },
451            // A contended FOR UPDATE row lock / deadlock clears when the holder
452            // releases; a fresh transaction can retry and win it.
453            UniError::LockTimeout { timeout_ms: 10_000 },
454        ];
455        for e in &retriable {
456            assert!(e.is_retriable(), "{e:?} should be retriable");
457        }
458    }
459
460    #[test]
461    fn deterministic_errors_are_not_retriable() {
462        let s = String::new;
463        let terminal = [
464            UniError::Parse {
465                message: s(),
466                position: None,
467                line: None,
468                column: None,
469                context: None,
470            },
471            UniError::Query {
472                message: s(),
473                query: None,
474            },
475            UniError::Schema { message: s() },
476            UniError::Constraint { message: s() },
477            UniError::InvalidArgument {
478                arg: s(),
479                message: s(),
480            },
481            // A caller-set deadline is not a contention signal.
482            UniError::TransactionExpired {
483                tx_id: s(),
484                hint: "",
485            },
486            // Re-running the same slow operation would just time out again.
487            UniError::Timeout { timeout_ms: 1 },
488        ];
489        for e in &terminal {
490            assert!(!e.is_retriable(), "{e:?} should not be retriable");
491        }
492    }
493}