uni_common/api/error.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use std::path::PathBuf;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8#[non_exhaustive]
9pub enum UniError {
10 #[error("Database not found: {path}")]
11 NotFound { path: PathBuf },
12
13 #[error("Schema error: {message}")]
14 Schema { message: String },
15
16 #[error("Parse error: {message}")]
17 Parse {
18 message: String,
19 position: Option<usize>,
20 line: Option<usize>,
21 column: Option<usize>,
22 context: Option<String>,
23 },
24
25 #[error("Query error: {message}")]
26 Query {
27 message: String,
28 query: Option<String>,
29 },
30
31 #[error("Transaction error: {message}")]
32 Transaction { message: String },
33
34 #[error("Transaction conflict: {message}")]
35 TransactionConflict { message: String },
36
37 #[error("Transaction already completed")]
38 TransactionAlreadyCompleted,
39
40 /// Operation not supported on read-only database
41 #[error("Operation '{operation}' not supported on read-only database")]
42 ReadOnly { operation: String },
43
44 /// Label not found in schema
45 #[error("Label '{label}' not found in schema")]
46 LabelNotFound { label: String },
47
48 /// Edge type not found in schema
49 #[error("Edge type '{edge_type}' not found in schema")]
50 EdgeTypeNotFound { edge_type: String },
51
52 /// Property not found on node/edge
53 #[error("Property '{property}' not found on {entity_type} with label '{label}'")]
54 PropertyNotFound {
55 property: String,
56 entity_type: String, // "node" or "edge"
57 label: String,
58 },
59
60 /// Index not found
61 #[error("Index '{index}' not found")]
62 IndexNotFound { index: String },
63
64 /// Snapshot not found
65 #[error("Snapshot '{snapshot_id}' not found")]
66 SnapshotNotFound { snapshot_id: String },
67
68 /// Query memory limit exceeded
69 #[error("Query exceeded memory limit of {limit_bytes} bytes")]
70 MemoryLimitExceeded { limit_bytes: usize },
71
72 #[error("Database is locked by another process")]
73 DatabaseLocked,
74
75 #[error("Operation timed out after {timeout_ms}ms")]
76 Timeout { timeout_ms: u64 },
77
78 /// A Locy program stopped before reaching its least fixed point because it
79 /// exceeded its wall-clock `timeout` or its `max_iterations` cap.
80 ///
81 /// This is the default outcome of an over-budget evaluation: partial results
82 /// are *not* returned silently. The boxed [`LocyIncomplete`] carries the
83 /// diagnostics (which rules were skipped, which complement rules are now
84 /// unsound, how far evaluation got). The partial facts themselves are not
85 /// embedded here — to recover them, re-run with `allow_partial` set, which
86 /// returns `Ok` with the partial result instead of this error.
87 #[error("Locy evaluation incomplete: {detail}")]
88 LocyIncomplete { detail: Box<LocyIncomplete> },
89
90 #[error("Type error: expected {expected}, got {actual}")]
91 Type { expected: String, actual: String },
92
93 #[error("Constraint violation: {message}")]
94 Constraint { message: String },
95
96 /// A transaction was aborted at commit because a concurrent transaction
97 /// committed a conflicting write since this transaction began (optimistic
98 /// concurrency control). The transaction may be safely retried.
99 #[error("Serialization conflict: {message}")]
100 SerializationConflict { message: String },
101
102 /// A transaction was aborted at commit because a concurrent transaction
103 /// committed a row with the same unique key (serializable MERGE). The
104 /// transaction may be safely retried, which will observe the existing row.
105 #[error("Constraint conflict: {message}")]
106 ConstraintConflict { message: String },
107
108 #[error("Storage error: {message}")]
109 Storage {
110 message: String,
111 #[source]
112 source: Option<Box<dyn std::error::Error + Send + Sync>>,
113 },
114
115 #[error("IO error: {0}")]
116 Io(#[from] std::io::Error),
117
118 #[error("Internal error: {0}")]
119 Internal(#[from] anyhow::Error),
120
121 #[error("Invalid identifier '{name}': {reason}")]
122 InvalidIdentifier { name: String, reason: String },
123
124 #[error("Label '{label}' already exists")]
125 LabelAlreadyExists { label: String },
126
127 #[error("Edge type '{edge_type}' already exists")]
128 EdgeTypeAlreadyExists { edge_type: String },
129
130 #[error("Permission denied: {action}")]
131 PermissionDenied { action: String },
132
133 #[error("Argument '{arg}' is invalid: {message}")]
134 InvalidArgument { arg: String, message: String },
135
136 /// Write context (transaction, bulk writer, or appender) is already active on session.
137 #[error("A write context is already active on session '{session_id}'")]
138 WriteContextAlreadyActive {
139 session_id: String,
140 hint: &'static str,
141 },
142
143 /// Transaction commit timed out waiting for the global writer lock.
144 #[error("Transaction '{tx_id}' commit timed out")]
145 CommitTimeout { tx_id: String, hint: &'static str },
146
147 /// A `FOR UPDATE` pessimistic row lock could not be acquired within the
148 /// deadline — the holder is another live transaction (contention or a
149 /// lock-ordering deadlock). Unlike a plain [`UniError::Timeout`] (a slow
150 /// operation that would just time out again), this is transient: a fresh
151 /// transaction can retry and win the lock once the holder releases it, so
152 /// it is classified retriable. See `is_retriable`.
153 #[error("FOR UPDATE lock acquisition timed out after {timeout_ms}ms")]
154 LockTimeout { timeout_ms: u64 },
155
156 /// Transaction exceeded its deadline.
157 #[error("Transaction '{tx_id}' expired")]
158 TransactionExpired { tx_id: String, hint: &'static str },
159
160 /// Operation was cancelled via a cancellation token.
161 #[error("Operation cancelled")]
162 Cancelled,
163
164 /// Derived facts are stale relative to the current database version.
165 #[error("Derived facts are stale: version gap is {version_gap}")]
166 StaleDerivedFacts { version_gap: u64 },
167
168 /// A Locy rule conflict was detected during transaction commit rule promotion.
169 #[error("Rule conflict: rule '{rule_name}' conflicts during promotion")]
170 RuleConflict { rule_name: String },
171
172 /// A session hook rejected the operation.
173 #[error("Hook rejected: {message}")]
174 HookRejected { message: String },
175
176 /// A synchronous trigger returned `TriggerOutcome::Reject` (or `Err`)
177 /// during a `BeforeMutation` / `BeforeCommit` phase, aborting commit.
178 #[error("Trigger '{trigger}' rejected commit: {reason}")]
179 TriggerRejected { trigger: String, reason: String },
180
181 /// Authentication failed (M5i). Raised when
182 /// `Uni::session_with_credentials` cannot find a matching
183 /// `AuthProvider` or the matched provider rejects the credentials.
184 #[error("Authentication failed: {reason}")]
185 AuthenticationFailed {
186 /// Human-readable failure reason.
187 reason: String,
188 },
189
190 /// An `AuthzPolicy::check` returned `Decision::Deny` for the
191 /// current principal (M5i).
192 #[error("Authorization denied: {reason}")]
193 AuthorizationDenied {
194 /// Reason from the deciding policy.
195 reason: String,
196 },
197
198 /// A write was attempted against an ephemeral (transient, in-query)
199 /// node or edge — i.e. one whose `Vid` / `Eid` has the
200 /// `EPHEMERAL_BIT` set. Ephemeral entities are return-only
201 /// projections; SET / DELETE / MERGE against them must fail before
202 /// they reach storage (M5g / proposal §4.13.1).
203 #[error("Cannot mutate ephemeral {kind} {id}: ephemeral entities are return-only")]
204 EphemeralWriteAttempt {
205 /// `"node"` or `"edge"`.
206 kind: &'static str,
207 /// Transient id (bottom 63 bits) for diagnostic output.
208 id: u64,
209 },
210
211 /// Fork with the given name does not exist in the registry.
212 #[error("Fork '{name}' not found")]
213 ForkNotFound { name: String },
214
215 /// `session.fork(name).new_()` was called against an existing fork.
216 #[error("Fork '{name}' already exists")]
217 ForkAlreadyExists { name: String },
218
219 /// Phase-1 gate: writes through `forked_session.tx()` are blocked
220 /// until Phase 2 lands. Reads, `locy()`, and admin paths work.
221 #[error(
222 "Writes on a forked session are not yet supported (Phase 2); reads, locy, and admin paths work"
223 )]
224 ForkWritesNotYetSupported,
225
226 /// Drop refused because forked sessions are still alive on the fork.
227 #[error("Fork '{name}' is held by {holder_count} live session(s); drop refused")]
228 ForkInUse { name: String, holder_count: usize },
229
230 /// Drop refused because a transaction has uncommitted mutations on the
231 /// fork. Commit or roll back the transaction first, then retry drop.
232 #[error("Fork '{name}' has uncommitted transaction state; commit or rollback first")]
233 ForkInflightTx { name: String },
234
235 /// Drop refused because the fork has pending async flushes that did
236 /// not drain within `UniConfig::drop_fork_drain_timeout`. Either retry
237 /// later (the streams will eventually complete) or raise the timeout.
238 #[error("Fork '{name}' has pending flushes that did not drain within timeout")]
239 PendingFlushTimeout { name: String },
240
241 /// Registry on disk is malformed (corrupt JSON, missing required field, etc.).
242 #[error("Fork registry is corrupt: {message}")]
243 ForkCorruptRegistry { message: String },
244
245 /// Drop refused because this fork has nested children. Use
246 /// `drop_fork_cascade` to remove the whole subtree, or drop the
247 /// children individually first.
248 #[error(
249 "Fork '{name}' has nested children {children:?}; use drop_fork_cascade or drop them first"
250 )]
251 ForkHasChildren { name: String, children: Vec<String> },
252
253 /// `drop_fork_cascade` refused because at least one fork in the
254 /// subtree has live sessions or in-flight transactions. No branch
255 /// has been deleted yet — the cascade is atomic at the validation
256 /// step. Resolve the blockers and retry.
257 #[error("Fork subtree cannot be dropped: {blockers:?}")]
258 ForkSubtreeInUse { blockers: Vec<String> },
259
260 /// `Session::fork(name)` refused because the configured `max_forks`
261 /// budget is at capacity. Drop existing forks (or wait for the
262 /// sweeper to reap expired ones) and retry. Counts include Active,
263 /// Pending, and Tombstoned entries.
264 #[error("Fork budget exceeded: {current}/{max} forks; drop one or raise UniConfig::max_forks")]
265 ForkBudgetExceeded { current: usize, max: usize },
266
267 /// 2PC step on a fork lifecycle operation failed.
268 ///
269 /// `stage` names the step (`registry_pending`, `create_branch`,
270 /// `registry_active`, `tombstone`, `delete_branch`, `registry_clear`,
271 /// `backend_unsupported`, `recovery`) so recovery and humans can
272 /// triage without parsing prose.
273 #[error("Fork '{name}' lifecycle failed at stage '{stage}': {source}")]
274 ForkLifecycle {
275 name: String,
276 stage: &'static str,
277 #[source]
278 source: Box<dyn std::error::Error + Send + Sync>,
279 },
280}
281
282impl UniError {
283 /// Returns `true` when retrying the failed operation from scratch may succeed.
284 ///
285 /// Distinguishes transient contention failures — optimistic-concurrency
286 /// aborts and lock/commit timeouts, which a fresh transaction can win — from
287 /// deterministic failures (bad query, schema or type violation) that would
288 /// fail identically on retry. This is the signal
289 /// [`Session::transact_with_retry`](../../../uni_db/api/session/struct.Session.html)
290 /// uses to decide whether to re-run a transaction closure.
291 ///
292 /// `TransactionExpired` is deliberately *not* retriable here: a fresh
293 /// transaction gets a new deadline, but the helper treats deadline expiry as
294 /// a caller-set budget, not a contention signal. A plain `Timeout` is
295 /// likewise *not* retriable — re-running the same slow operation would just
296 /// time out again; only `CommitTimeout` (lock contention at the commit point)
297 /// and `LockTimeout` (a contended `FOR UPDATE` row lock / deadlock) signal
298 /// retriable contention.
299 ///
300 /// # Examples
301 /// ```
302 /// use uni_common::UniError;
303 ///
304 /// assert!(UniError::SerializationConflict { message: "lost update".into() }.is_retriable());
305 /// assert!(!UniError::Schema { message: "no such label".into() }.is_retriable());
306 /// ```
307 #[must_use]
308 pub fn is_retriable(&self) -> bool {
309 matches!(
310 self,
311 UniError::SerializationConflict { .. }
312 | UniError::ConstraintConflict { .. }
313 | UniError::TransactionConflict { .. }
314 | UniError::CommitTimeout { .. }
315 | UniError::LockTimeout { .. }
316 )
317 }
318}
319
320pub type Result<T> = std::result::Result<T, UniError>;
321
322/// Why a Locy evaluation stopped before reaching its least fixed point.
323///
324/// A wall-clock timeout and a non-convergence failure are both *incomplete*
325/// outcomes, but they call for different remedies (raise the timeout / fix a
326/// slow rule vs. raise `max_iterations` / fix a non-monotone rule), so they are
327/// reported distinctly rather than collapsed into one flag.
328#[derive(Debug, Clone, Copy, PartialEq, Eq)]
329pub enum LocyIncompleteReason {
330 /// The wall-clock `timeout` budget was exhausted mid-evaluation.
331 Timeout,
332 /// A recursive stratum hit `max_iterations` without converging.
333 IterationLimit,
334}
335
336impl LocyIncompleteReason {
337 /// Returns a stable machine-readable tag (`"timeout"` / `"iteration_limit"`).
338 ///
339 /// Used as the discriminator surfaced to non-Rust callers (e.g. the Python
340 /// bindings), where matching on a Rust enum is not available.
341 #[must_use]
342 pub fn as_str(self) -> &'static str {
343 match self {
344 LocyIncompleteReason::Timeout => "timeout",
345 LocyIncompleteReason::IterationLimit => "iteration_limit",
346 }
347 }
348}
349
350impl std::fmt::Display for LocyIncompleteReason {
351 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
352 f.write_str(self.as_str())
353 }
354}
355
356/// Diagnostics describing a Locy evaluation that stopped before completing.
357///
358/// Returned (boxed) inside [`UniError::LocyIncomplete`] when a program exceeds
359/// its time or iteration budget, and also attached to a `LocyResult` when the
360/// caller opts into partial results. The rule lists exist so a caller can tell
361/// "not evaluated" apart from "genuinely empty": any rule named in
362/// `incomplete_rules` or `skipped_rules` may be missing facts purely because
363/// evaluation was cut short, so a zero-row count for it is not authoritative.
364///
365/// # Examples
366/// ```
367/// use uni_common::{LocyIncomplete, LocyIncompleteReason};
368///
369/// let detail = LocyIncomplete {
370/// reason: LocyIncompleteReason::Timeout,
371/// elapsed_ms: 305_000,
372/// limit_ms: 300_000,
373/// max_iterations: 1000,
374/// completed_strata: 2,
375/// total_strata: 4,
376/// incomplete_rules: vec!["upstream_reaches".into()],
377/// skipped_rules: vec!["healthy_assets".into()],
378/// complement_rules_affected: vec!["healthy_assets".into()],
379/// };
380/// assert!(detail.to_string().contains("timeout"));
381/// assert!(detail.to_string().contains("UNSOUND"));
382/// ```
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct LocyIncomplete {
385 /// Why evaluation stopped.
386 pub reason: LocyIncompleteReason,
387 /// Wall-clock time elapsed when evaluation was cut short, in milliseconds.
388 pub elapsed_ms: u64,
389 /// The configured wall-clock `timeout`, in milliseconds.
390 pub limit_ms: u64,
391 /// The configured `max_iterations` cap for recursive strata.
392 pub max_iterations: usize,
393 /// Number of strata fully evaluated before the cutoff.
394 pub completed_strata: usize,
395 /// Total number of strata in the program.
396 pub total_strata: usize,
397 /// Rules in the stratum that was interrupted mid-evaluation. Their facts may
398 /// be a partial fixpoint rather than the least fixed point.
399 pub incomplete_rules: Vec<String>,
400 /// Rules in strata that were never reached. They derived no facts solely
401 /// because evaluation stopped first, not because their result is empty.
402 pub skipped_rules: Vec<String>,
403 /// Subset of the incomplete/skipped rules that use an `IS NOT` complement.
404 /// Stratified negation over a partial relation is unsound, so these results
405 /// must not be trusted at all — surfaced separately for emphasis.
406 pub complement_rules_affected: Vec<String>,
407}
408
409impl std::fmt::Display for LocyIncomplete {
410 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
411 write!(
412 f,
413 "{reason} after {elapsed_ms}ms (limit {limit_ms}ms, max_iterations {max_iters}); \
414 evaluated {done}/{total} strata, {n_incomplete} rule(s) incomplete, \
415 {n_skipped} rule(s) skipped",
416 reason = self.reason,
417 elapsed_ms = self.elapsed_ms,
418 limit_ms = self.limit_ms,
419 max_iters = self.max_iterations,
420 done = self.completed_strata,
421 total = self.total_strata,
422 n_incomplete = self.incomplete_rules.len(),
423 n_skipped = self.skipped_rules.len(),
424 )?;
425 if !self.complement_rules_affected.is_empty() {
426 write!(
427 f,
428 "; UNSOUND complement rule(s) affected: {:?}",
429 self.complement_rules_affected
430 )?;
431 }
432 Ok(())
433 }
434}
435
436#[cfg(test)]
437mod tests {
438 use super::*;
439
440 #[test]
441 fn retriable_errors_are_contention_failures() {
442 let s = String::new;
443 let retriable = [
444 UniError::SerializationConflict { message: s() },
445 UniError::ConstraintConflict { message: s() },
446 UniError::TransactionConflict { message: s() },
447 UniError::CommitTimeout {
448 tx_id: s(),
449 hint: "",
450 },
451 // A contended FOR UPDATE row lock / deadlock clears when the holder
452 // releases; a fresh transaction can retry and win it.
453 UniError::LockTimeout { timeout_ms: 10_000 },
454 ];
455 for e in &retriable {
456 assert!(e.is_retriable(), "{e:?} should be retriable");
457 }
458 }
459
460 #[test]
461 fn deterministic_errors_are_not_retriable() {
462 let s = String::new;
463 let terminal = [
464 UniError::Parse {
465 message: s(),
466 position: None,
467 line: None,
468 column: None,
469 context: None,
470 },
471 UniError::Query {
472 message: s(),
473 query: None,
474 },
475 UniError::Schema { message: s() },
476 UniError::Constraint { message: s() },
477 UniError::InvalidArgument {
478 arg: s(),
479 message: s(),
480 },
481 // A caller-set deadline is not a contention signal.
482 UniError::TransactionExpired {
483 tx_id: s(),
484 hint: "",
485 },
486 // Re-running the same slow operation would just time out again.
487 UniError::Timeout { timeout_ms: 1 },
488 ];
489 for e in &terminal {
490 assert!(!e.is_retriable(), "{e:?} should not be retriable");
491 }
492 }
493}