gradatum_core/job.rs
1//! Primitive types for the job system.
2//!
3//! Defines the canonical L0 types used across the entire job infrastructure
4//! layer. Lives in `gradatum-core` so that higher-level crates
5//! (`gradatum-queue`, `gradatum-worker`, `gradatum-db-sqlite`) can depend on
6//! it without introducing circular dependencies.
7//!
8//! # Architectural layers
9//!
10//! ```text
11//! gradatum-core (L0) — Job, JobRecord, QueueStore, QueueEvent, DryRunAware
12//! ↑
13//! gradatum-db-sqlite (L2) — SqliteQueueStore impl QueueStore
14//! ↑
15//! gradatum-queue (L3) — GradatumQueue facade (Apalis backend)
16//! ↑
17//! gradatum-worker (L4) — Apalis handlers + orchestration
18//! ```
19//!
20//! # Bincode order — IMMUTABLE
21//!
22//! [`Job`] variants are encoded by position by `bincode`.
23//! **Never reorder existing variants.** New variants must be appended at the
24//! end. Violation causes silent corruption of jobs stored in the database.
25
26#![allow(dead_code)] // types consommés progressivement selon le pipeline
27
28use chrono::{DateTime, Utc};
29use serde::{Deserialize, Serialize};
30use std::time::Duration;
31use tokio::sync::broadcast::Receiver;
32use ulid::Ulid;
33
34// ─────────────────────────────────────────────────────────────────────────────
35// VaultScope — alias canonique
36// ─────────────────────────────────────────────────────────────────────────────
37
38/// Scope of a job or vault request.
39///
40/// `VaultScope` is a type alias for [`JobScope`]. Both names coexist to
41/// preserve consistency between existing vault code (`VaultScope`) and the
42/// job system (`JobScope`).
43pub type VaultScope = JobScope;
44
45// ─────────────────────────────────────────────────────────────────────────────
46// Job enum — ordre bincode figé (v55)
47// ─────────────────────────────────────────────────────────────────────────────
48
49/// Job type submitted to the queue.
50///
51/// # Bincode order — IMMUTABLE
52///
53/// Variants are encoded by position (0-20). Never reorder.
54/// New variants must be appended at the end.
55///
56/// Position comments `(N)` are informational — bincode encodes by Rust
57/// declaration order, not by explicit numeric value.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(tag = "type", content = "data")]
60pub enum Job {
61 // System jobs (0-12) — automatic
62 /// ReAct agent loop — bincode position 0.
63 Agent,
64 /// `[[pipelines]]` step — bincode position 1.
65 Pipeline,
66 /// Web crawler — bincode position 2.
67 Collect,
68 /// Semantic distillation — bincode position 3.
69 ///
70 /// The variant changed from unit to tuple; bincode position 3 is
71 /// UNCHANGED (preserves bincode/serde stability of the `kind` column).
72 /// Any `Distill` jobs in the queue must be absent before deployment.
73 ///
74 /// Only `DistillMode::Semantic` is implemented. `Learn`/`Peer`/`Rationale`
75 /// modes require a complete event-log and are deferred (YAGNI).
76 Distill(DistillSource),
77 /// Vault backup — bincode position 4.
78 Backup,
79 /// Lifecycle purge (removes `Garbage` notes) — bincode position 5.
80 ///
81 /// The variant changed from unit to tuple; bincode position 5 is
82 /// UNCHANGED. Any `Purge` jobs in the queue must be absent before deployment.
83 Purge(PurgeSpec),
84 /// Full-text and/or vector re-index — bincode position 6.
85 ReIndex(ReIndexMode),
86 /// Content summarisation — bincode position 7.
87 Summarize,
88 /// Memory validation and healing — bincode position 8.
89 Validate,
90 /// Vault scoring and deduplication — bincode position 9.
91 Audit,
92 /// Mental model consolidation — bincode position 10.
93 Consolidate,
94 /// Inbox classification and curation — bincode position 11.
95 Curate(CurateSpec),
96 /// Semantic forget of notes — bincode position 12.
97 ///
98 /// The variant changed from unit to tuple; bincode position 12 is
99 /// UNCHANGED. Any `Forget` jobs in the queue must be absent before deployment.
100 Forget(ForgetSpec),
101
102 // Human jobs (13-16) — JobClass::Human required
103 /// Validates or rejects a batch of `needs-review` notes — bincode position 13.
104 Review,
105 /// Manually classifies an unresolved `inbox/` note — bincode position 14.
106 Classify,
107 /// Merges two duplicate notes (after `Job::Audit`) — bincode position 15.
108 Merge,
109 /// Enriches metadata for a batch of notes — bincode position 16.
110 Annotate,
111
112 // Added at the end to preserve bincode order
113 /// Predecessor vault import · `JobClass::Human` — bincode position 17.
114 Migrate(MigrateSource),
115 /// CSV/PDF/JSON export from notes · `JobClass::Agent|Human` — bincode position 18.
116 Export(ExportSource),
117 /// Cascade external notification · `JobClass::System` — bincode position 19.
118 Notify(NotifySource),
119 /// Document ingestion via queue · `JobClass::Agent|Human` — bincode position 20.
120 Ingest(IngestSource),
121
122 // Embed appended at the end to preserve bincode order of positions 0-20
123 /// Vector embedding generation — bincode position 21.
124 Embed(EmbedSpec),
125}
126
127// ─────────────────────────────────────────────────────────────────────────────
128// ReIndexMode
129// ─────────────────────────────────────────────────────────────────────────────
130
131/// Re-index mode for [`Job::ReIndex`].
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub enum ReIndexMode {
134 /// Rebuilds FTS5 index (default).
135 FtsOnly,
136 /// Recomputes all embeddings (e.g. after a model migration).
137 VectorsOnly,
138 /// `FtsOnly` + `VectorsOnly`.
139 Full,
140 /// Embeds only notes that have no vector yet (new notes).
141 ///
142 /// Faster than `VectorsOnly` on a large active vault.
143 MissingOnly,
144}
145
146// ─────────────────────────────────────────────────────────────────────────────
147// Source structs — variants actifs
148// ─────────────────────────────────────────────────────────────────────────────
149
150/// Specification for a curation job (`Job::Curate`).
151///
152/// Handles `inbox/` classification, scoring, and metadata updates.
153///
154/// ## Optional write fields
155///
156/// `title`, `body`, `author`, `tags`, and `section_hint` are carried directly
157/// in `CurateSpec` for the `vault_write → job_store` path. They are optional
158/// (`#[serde(default)]`) and backward-compatible with existing `JobRecord`s
159/// (absent JSON field → `None`/`[]`).
160///
161/// For `Job::Curate` jobs triggered by `vault_write`:
162/// - `title` + `body` are `Some` — carry the content to create in the vault.
163///
164/// For `Job::Curate` jobs triggered by reclassification:
165/// - `title` + `body` are `None` — the note already exists in the vault.
166#[derive(Debug, Clone, Default, Serialize, Deserialize)]
167pub struct CurateSpec {
168 /// ULID identifier of the note to curate.
169 pub note_id: Ulid,
170 /// Owning tenant identifier (default: `"main"`).
171 #[serde(default = "default_tenant_main")]
172 pub tenant_id: String,
173 /// Note title (present for `vault_write`, absent for reclassification).
174 #[serde(default)]
175 pub title: Option<String>,
176 /// Markdown body of the note (present for `vault_write`).
177 #[serde(default)]
178 pub body: Option<String>,
179 /// Note author (optional).
180 #[serde(default)]
181 pub author: Option<String>,
182 /// Initial tags (optional — the curator may add more).
183 #[serde(default)]
184 pub tags: Vec<String>,
185 /// Suggested section (optional — the curator may override).
186 #[serde(default)]
187 pub section_hint: Option<String>,
188 /// Expected SHA-256 hash for optimistic locking (optional).
189 ///
190 /// When `Some`, the worker checks that the note's current hash matches
191 /// before writing. On mismatch: job marked `Conflict`, note not overwritten.
192 /// `None` = unconditional write (backward-compatible behaviour).
193 ///
194 /// Serialised in bincode as `Option<[u8; 32]>` (32 fixed bytes or `None`).
195 /// Payload overhead: +33 bytes (1 bincode discriminant + 32-byte hash) — negligible.
196 #[serde(default, skip_serializing_if = "Option::is_none")]
197 pub expected_sha256: Option<[u8; 32]>,
198}
199
200fn default_tenant_main() -> String {
201 "main".to_string()
202}
203
204/// Specification for an embedding job (`Job::Embed`).
205///
206/// Generates or regenerates the vector via `gradatum-embed::FallbackEmbedder`.
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct EmbedSpec {
209 /// ULID identifier of the note to embed.
210 pub note_id: Ulid,
211 /// Owning tenant identifier (default: `"main"`).
212 pub tenant_id: String,
213 /// Force regeneration even if a vector already exists.
214 pub force_regenerate: bool,
215}
216
217// ─────────────────────────────────────────────────────────────────────────────
218// Source structs — nouveaux variants v59
219// ─────────────────────────────────────────────────────────────────────────────
220
221/// Source for [`Job::Migrate`] — predecessor vault → Gradatum import.
222///
223/// `JobClass::Human` only — irreversible operation.
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct MigrateSource {
226 /// Path to the source vault.
227 pub from_path: String,
228 /// Migration mode.
229 pub mode: MigrateMode,
230 /// Conflict resolution strategy.
231 pub conflict: ConflictStrategy,
232 /// Simulate without writing — required on the first pass.
233 pub dry_run: bool,
234 /// Destination vault.
235 pub target: VaultScope,
236}
237
238/// Migration mode for [`MigrateSource`].
239#[derive(Debug, Clone, Serialize, Deserialize)]
240pub enum MigrateMode {
241 /// Maps predecessor sections to `CognitiveCategory` + `ContentSection`.
242 PredecessorV1,
243 /// Import from another Gradatum vault.
244 GradatumVault,
245 /// Raw Markdown import without section mapping.
246 RawMarkdown,
247}
248
249/// Conflict resolution strategy for [`MigrateSource`].
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub enum ConflictStrategy {
252 /// Overwrites existing notes.
253 Overwrite,
254 /// Keeps existing notes, skips incoming duplicates.
255 Skip,
256 /// Appends `-imported` suffix on conflicts.
257 Rename,
258}
259
260/// Source for [`Job::Export`] — generates a file from vault notes.
261///
262/// `JobClass::Agent | Human`.
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct ExportSource {
265 /// Scope of notes to export.
266 pub scope: VaultScope,
267 /// Optional FTS filter (e.g. `"sections:decisions"`).
268 pub filter: Option<String>,
269 /// Export format.
270 pub format: ExportFormat,
271 /// OpenDAL destination path (e.g. `"exports/decisions-2026-05.pdf"`).
272 pub target: String,
273 /// Markdown template for rendering.
274 pub template: Option<String>,
275}
276
277/// Export format for [`ExportSource`].
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub enum ExportFormat {
280 /// One row per note — title, locus, trust, date, sections.
281 Csv,
282 /// Markdown rendered to PDF (via pandoc or similar).
283 Pdf,
284 /// Full serialisation of `Document` + frontmatter.
285 Json,
286 /// All notes concatenated into a single `.md` file.
287 Markdown,
288 /// Archive of raw `.md` files.
289 Zip,
290}
291
292/// Source for [`Job::Notify`] — cascade external notification.
293///
294/// `JobClass::System` — triggered via `await_jobs` `OnDone`/`OnFailed`.
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct NotifySource {
297 /// Notification channel.
298 pub channel: NotifyChannel,
299 /// Message template with variables (e.g. `"Job {{job_kind}} done: {{notes_created}} notes"`).
300 pub template: String,
301 /// Job whose completion is being notified.
302 pub job_ref: Option<Ulid>,
303}
304
305/// Notification channel for [`NotifySource`].
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub enum NotifyChannel {
308 /// Telegram notification.
309 Telegram {
310 /// Telegram chat identifier.
311 chat_id: String,
312 },
313 /// Slack notification via webhook.
314 Slack {
315 /// Slack webhook URL.
316 webhook_url: String,
317 },
318 /// Generic HTTP webhook notification.
319 Webhook {
320 /// Webhook URL.
321 url: String,
322 /// HTTP method (`POST` recommended).
323 method: String,
324 },
325 /// NATS publication.
326 Nats {
327 /// Target NATS subject.
328 subject: String,
329 },
330 /// Email notification.
331 Email {
332 /// Recipient email address.
333 to: String,
334 },
335}
336
337/// Source for [`Job::Ingest`] — document ingestion via queue.
338///
339/// `JobClass::Agent | Human`. Replaces `gradatum-admin vault import --file`
340/// for large corpora (with progress tracking and retry support).
341#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct IngestSource {
343 /// Input source to ingest.
344 pub source: IngestInputSource,
345 /// Destination vault.
346 pub vault: String,
347 /// Destination locus (e.g. `"rag/"`).
348 pub locus: String,
349 /// Ingestion strategy.
350 pub strategy: IngestStrategy,
351 /// Simulate without writing — legitimate exception (potentially large
352 /// operation; human validation recommended first).
353 pub dry_run: bool,
354}
355
356/// Input source for [`IngestSource`].
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub enum IngestInputSource {
359 /// OpenDAL file path.
360 File {
361 /// File path.
362 path: String,
363 },
364 /// URL to fetch.
365 Url {
366 /// URL to ingest.
367 url: String,
368 },
369 /// Batch of URLs.
370 Urls {
371 /// List of URLs to ingest.
372 urls: Vec<String>,
373 },
374 /// Directory of files already on disk.
375 Locus {
376 /// Directory path.
377 path: String,
378 },
379}
380
381/// Ingestion strategy for [`IngestSource`].
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub enum IngestStrategy {
384 /// Auto-detects between `StructureGuided` and `SlidingWindow`.
385 Auto,
386 /// Skeleton tree + structure-guided chunking.
387 ForceStructured,
388 /// Sliding window even when headings are present.
389 ForceSlidingWindow,
390}
391
392// ─────────────────────────────────────────────────────────────────────────────
393// PurgeSpec + PurgeMode — Job::Purge (F-32C)
394// ─────────────────────────────────────────────────────────────────────────────
395
396/// Purge mode for [`PurgeSpec`].
397///
398/// Only `Lifecycle` is implemented: removes `Garbage` notes whose age in the
399/// `Garbage` state (via `status_changed`) exceeds `grace_days`.
400#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
401#[non_exhaustive]
402pub enum PurgeMode {
403 /// Removes notes with `status=Garbage` that have exceeded the grace period.
404 ///
405 /// Age is measured via the `status_changed` column (updated by `update_status`).
406 Lifecycle,
407}
408
409/// Specification for a purge job (`Job::Purge`).
410///
411/// ## Dry-run first
412///
413/// `dry_run = true` is the safe default. In dry-run mode the handler lists
414/// eligible notes and returns [`JobOutput::dry_run`] **without deleting anything**.
415///
416/// The real purge (`dry_run = false`) must be triggered explicitly.
417///
418/// ## Grace period
419///
420/// `grace_days`: minimum time a note must have been in the `Garbage` state before
421/// deletion. Measured via the `status_changed` column of the SQLite index.
422/// Default: `Some(30)`. `None` = deletes all `Garbage` notes immediately
423/// (dangerous — expert CLI use only).
424///
425/// ## Cron schedule
426///
427/// The nightly purge cron schedule is INTENTIONALLY disabled (see config-full.toml).
428/// Enabling it is a separate operator decision (prerequisite: a nightly backup
429/// strategy must be configured).
430#[derive(Debug, Clone, Serialize, Deserialize)]
431pub struct PurgeSpec {
432 /// Purge mode — only `Lifecycle` is implemented.
433 pub mode: PurgeMode,
434 /// Simulate without writing — default `true` (safe).
435 ///
436 /// This is a legitimate exception to the single `JobMode::DryRun` rule:
437 /// purge is an irreversible, high-impact operation (permanent deletion).
438 /// The double mechanism (`spec.dry_run` + `JobMode::DryRun`) ensures no
439 /// write occurs if either flag is active.
440 #[serde(default = "PurgeSpec::default_dry_run")]
441 pub dry_run: bool,
442 /// Minimum time in the `Garbage` state before deletion (days).
443 ///
444 /// Default: `Some(30)`. `None` = no grace period (expert CLI use only).
445 #[serde(default = "PurgeSpec::default_grace_days")]
446 pub grace_days: Option<u32>,
447}
448
449impl PurgeSpec {
450 fn default_dry_run() -> bool {
451 true
452 }
453
454 fn default_grace_days() -> Option<u32> {
455 Some(30)
456 }
457}
458
459impl Default for PurgeSpec {
460 fn default() -> Self {
461 Self {
462 mode: PurgeMode::Lifecycle,
463 dry_run: true,
464 grace_days: Some(30),
465 }
466 }
467}
468
469// ─────────────────────────────────────────────────────────────────────────────
470// ForgetSpec + ForgetScope — Job::Forget (F-44)
471// ─────────────────────────────────────────────────────────────────────────────
472
473/// Target resolution scope for a forget job (`Job::Forget`).
474///
475/// Three targeting modes:
476/// - `Topic`: FTS resolution (full-text search) within an optional vault.
477/// - `Locus`: locus prefix (vault directory) — LIKE-escaped by the worker.
478/// - `Agent`: all notes from a given agent in the specified vaults.
479#[derive(Debug, Clone, Serialize, Deserialize)]
480#[non_exhaustive]
481pub enum ForgetScope {
482 /// FTS-based resolution — notes matching `query` are targeted.
483 ///
484 /// `vault`: optional tenant (`None` → default vault `"main"`).
485 /// `limit`: safe cap on the number of targeted notes (default 50, max 200).
486 Topic {
487 /// FTS query (e.g. `"secrets api-key"`).
488 query: String,
489 /// Target tenant (optional — `None` = `"main"`).
490 #[serde(default, skip_serializing_if = "Option::is_none")]
491 vault: Option<String>,
492 /// Cap on the number of targeted results (default 50).
493 #[serde(default, skip_serializing_if = "Option::is_none")]
494 limit: Option<usize>,
495 },
496 /// Locus prefix resolution.
497 ///
498 /// `locus` is LIKE-escaped by the worker. Examples: `"inbox/"`, `"rag/corpus-x/"`.
499 Locus {
500 /// Target vault.
501 vault: String,
502 /// Locus prefix (e.g. `"inbox/old/"`) — matches all notes whose locus
503 /// starts with this value.
504 locus: String,
505 },
506 /// Agent-based resolution — all notes from `agent_id` in the specified vaults.
507 ///
508 /// Empty `vaults` → `"main"` vault only.
509 Agent {
510 /// Agent identifier (`author_id` column).
511 agent_id: String,
512 /// Target vault list (`[]` → `["main"]`).
513 #[serde(default)]
514 vaults: Vec<String>,
515 },
516}
517
518/// Specification for a forget job (`Job::Forget`).
519///
520/// ## Dry-run first
521///
522/// `dry_run = true` is the default. In dry-run mode the handler lists target
523/// ULIDs + exclusions (protected sections) and returns [`JobOutput::dry_run`]
524/// **without any mutation**.
525///
526/// The real mutation (`dry_run = false`) additionally requires `confirm_ulids`
527/// matching exactly the ULIDs from the preview (double confirmation).
528///
529/// ## Protected sections
530///
531/// Notes in the `AgentIssues` and `Council` sections are **always excluded**
532/// from the batch, regardless of scope. They are reported in the preview without
533/// blocking the job.
534///
535/// ## Non-destructive
536///
537/// Forget updates the YAML frontmatter (`forgotten=true`, `forgotten_at`,
538/// `forgotten_by`) via the normal write path (traced CoW). No physical deletion.
539/// Physical purge is reserved for `Job::Purge`.
540///
541/// ## Idempotence
542///
543/// A double forget is idempotent: `mark_forgotten` updates the timestamp if
544/// the note is already forgotten — no error.
545#[derive(Debug, Clone, Serialize, Deserialize)]
546pub struct ForgetSpec {
547 /// Resolution scope — determines the target notes.
548 pub scope: ForgetScope,
549 /// Simulate without mutation — default `true` (safe).
550 ///
551 /// Legitimate exception to the single `JobMode::DryRun` rule: forget has a
552 /// persistent effect on scoring. The double mechanism (`spec.dry_run` +
553 /// `JobMode::DryRun`) ensures no mutation if either flag is active.
554 #[serde(default = "ForgetSpec::default_dry_run")]
555 pub dry_run: bool,
556 /// Actor who triggered the forget (for auditability — `forgotten_by`).
557 #[serde(default, skip_serializing_if = "Option::is_none")]
558 pub forgotten_by: Option<String>,
559 /// ULIDs confirmed from a prior preview (double confirmation).
560 ///
561 /// In real mode (`dry_run = false`): must match exactly the ULIDs returned by
562 /// the preview. Mismatch → job error, no mutation. `None` is allowed only in
563 /// dry-run mode.
564 #[serde(default, skip_serializing_if = "Vec::is_empty")]
565 pub confirm_ulids: Vec<String>,
566}
567
568impl ForgetSpec {
569 fn default_dry_run() -> bool {
570 true
571 }
572}
573
574impl Default for ForgetSpec {
575 fn default() -> Self {
576 Self {
577 scope: ForgetScope::Topic {
578 query: String::new(),
579 vault: None,
580 limit: Some(50),
581 },
582 dry_run: true,
583 forgotten_by: None,
584 confirm_ulids: vec![],
585 }
586 }
587}
588
589// ─────────────────────────────────────────────────────────────────────────────
590// DistillSource + DistillMode — Job::Distill (F-22)
591// ─────────────────────────────────────────────────────────────────────────────
592
593/// Distillation mode for [`DistillSource`].
594///
595/// Only `Semantic` is implemented: cosine clustering of non-`processed` notes
596/// in the scope, followed by per-cluster synthesis into a `PendingReview` note.
597///
598/// `Learn`/`Peer`/`Rationale` modes require a complete event-log (threshold
599/// ≥ 100 events) and are intentionally absent here (YAGNI). The
600/// `#[non_exhaustive]` marker allows their future addition without breaking
601/// exhaustive matching in external consumers.
602#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
603#[non_exhaustive]
604pub enum DistillMode {
605 /// Semantic distillation: cosine clustering → per-cluster synthesis.
606 Semantic,
607}
608
609/// Specification for a distillation job (`Job::Distill`).
610///
611/// ## Dry-run first
612///
613/// Distillation writes LLM-generated content to the vault. Like `Purge`/`Forget`,
614/// `JobMode::DryRun` (in `JobSpec`) lists candidate clusters **without any
615/// mutation**. The real synthesis requires `JobMode::Batch`.
616///
617/// ## Scope required in real mode
618///
619/// Clustering is O(n²) bounded by `batch_limit`. In real mode the scope must
620/// be `Locus` or `Notes` — `JobScope::VaultWide` is **rejected outside dry-run**
621/// by the handler (combinatorial explosion mitigation).
622///
623/// ## Cron schedule
624///
625/// The distillation cron schedule is INTENTIONALLY disabled (see config-full.toml).
626/// Enabling it is a separate operator decision (automated LLM writes to the vault).
627#[derive(Debug, Clone, Serialize, Deserialize)]
628pub struct DistillSource {
629 /// Distillation mode — only `Semantic` is implemented.
630 #[serde(default = "DistillSource::default_mode")]
631 pub mode: DistillMode,
632 /// Scope of candidate notes to distill.
633 ///
634 /// In real mode: must target a `Locus` or a set of `Notes`
635 /// (`VaultWide` is rejected outside dry-run).
636 pub scope: JobScope,
637 /// Optional time window — consider only notes created/modified within this
638 /// duration. `None` = no time filter.
639 #[serde(default, skip_serializing_if = "Option::is_none")]
640 pub window: Option<Duration>,
641 /// Maximum number of notes considered per run — bounds the O(n²) clustering.
642 ///
643 /// Default: `500`. Above this limit the scope should be narrowed.
644 #[serde(default = "DistillSource::default_batch_limit")]
645 pub batch_limit: usize,
646 /// Cosine similarity threshold for grouping two notes into a cluster.
647 ///
648 /// Default: `0.75`. A note pair with cosine ≥ this threshold is connected
649 /// (connected components → clusters).
650 #[serde(default = "DistillSource::default_confidence_threshold")]
651 pub confidence_threshold: f32,
652 /// Minimum QA events required before distillation (future `Learn`+ modes).
653 ///
654 /// `None` in `Semantic` mode (unused). Reserved for future use without
655 /// breaking serialisation (`skip_serializing_if`).
656 #[serde(default, skip_serializing_if = "Option::is_none")]
657 pub min_qa_events: Option<u32>,
658}
659
660impl DistillSource {
661 fn default_mode() -> DistillMode {
662 DistillMode::Semantic
663 }
664
665 fn default_batch_limit() -> usize {
666 500
667 }
668
669 fn default_confidence_threshold() -> f32 {
670 0.75
671 }
672}
673
674impl Default for DistillSource {
675 fn default() -> Self {
676 Self {
677 mode: DistillMode::Semantic,
678 scope: JobScope::VaultWide,
679 window: None,
680 batch_limit: 500,
681 confidence_threshold: 0.75,
682 min_qa_events: None,
683 }
684 }
685}
686
687// ─────────────────────────────────────────────────────────────────────────────
688// job_kind_str — helper de routing
689// ─────────────────────────────────────────────────────────────────────────────
690
691/// Returns the name of the [`Job`] variant as a static string.
692///
693/// Used to denormalise the `kind` column in `gradatum_jobs` at enqueue time
694/// and to filter by `kind` in [`QueueStore::dequeue_by_kind`].
695///
696/// # Exhaustiveness
697///
698/// The match has no `_ =>` arm so that adding a new [`Job`] variant produces a
699/// compile error rather than a silently incorrect routing.
700///
701/// # JSON correspondence
702///
703/// The returned value matches the `"type"` key of the payload serialised via
704/// `#[serde(tag = "type", content = "data")]` (e.g. `{"spec":{"kind":{"type":"Curate",...}}}`).
705#[must_use]
706pub fn job_kind_str(job: &Job) -> &'static str {
707 match job {
708 Job::Agent => "Agent",
709 Job::Pipeline => "Pipeline",
710 Job::Collect => "Collect",
711 Job::Distill(_) => "Distill",
712 Job::Backup => "Backup",
713 Job::Purge(_) => "Purge",
714 Job::ReIndex(_) => "ReIndex",
715 Job::Summarize => "Summarize",
716 Job::Validate => "Validate",
717 Job::Audit => "Audit",
718 Job::Consolidate => "Consolidate",
719 Job::Curate(_) => "Curate",
720 Job::Forget(_) => "Forget",
721 Job::Review => "Review",
722 Job::Classify => "Classify",
723 Job::Merge => "Merge",
724 Job::Annotate => "Annotate",
725 Job::Migrate(_) => "Migrate",
726 Job::Export(_) => "Export",
727 Job::Notify(_) => "Notify",
728 Job::Ingest(_) => "Ingest",
729 Job::Embed(_) => "Embed",
730 }
731}
732
733// ─────────────────────────────────────────────────────────────────────────────
734// JobSpec — ce que fait le job
735// ─────────────────────────────────────────────────────────────────────────────
736
737/// Functional specification of a job.
738///
739/// Contains the work type, trigger class, execution mode, scope, and priority.
740#[derive(Debug, Clone, Serialize, Deserialize)]
741pub struct JobSpec {
742 /// Job type and payload.
743 pub kind: Job,
744 /// Who triggers the job.
745 pub class: JobClass,
746 /// How the job executes.
747 pub mode: JobMode,
748 /// What the job operates on.
749 pub scope: JobScope,
750 /// Priority in the queue.
751 pub priority: JobPriority,
752}
753
754// ─────────────────────────────────────────────────────────────────────────────
755// JobClass
756// ─────────────────────────────────────────────────────────────────────────────
757
758/// Trigger class of a job.
759///
760/// Determines the default priority and queue routing.
761#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
762pub enum JobClass {
763 /// Autonomous cron — no human actor.
764 System,
765 /// Triggered/executed by an LLM agent.
766 Agent,
767 /// Explicit CLI/studio action.
768 Human,
769 /// External machine call (MCP, third-party).
770 Api,
771}
772
773// ─────────────────────────────────────────────────────────────────────────────
774// JobMode
775// ─────────────────────────────────────────────────────────────────────────────
776
777/// Execution mode of a job.
778#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
779pub enum JobMode {
780 /// Processes N items then stops (default).
781 #[default]
782 Batch,
783 /// Processes continuously until the queue is empty.
784 Streaming,
785 /// Requires back-and-forth with an actor.
786 Interactive,
787 /// Simulates without writing.
788 DryRun,
789}
790
791// ─────────────────────────────────────────────────────────────────────────────
792// JobScope
793// ─────────────────────────────────────────────────────────────────────────────
794
795/// Scope of a job — what the work operates on.
796#[derive(Debug, Clone, Serialize, Deserialize)]
797pub enum JobScope {
798 /// The entire vault.
799 VaultWide,
800 /// A specific locus (vault directory).
801 Locus(String),
802 /// A targeted set of notes.
803 Notes(Vec<Ulid>),
804 /// An agent session (isolated context).
805 Session(Ulid),
806}
807
808// ─────────────────────────────────────────────────────────────────────────────
809// JobPriority
810// ─────────────────────────────────────────────────────────────────────────────
811
812/// Job priority in the queue.
813///
814/// Default mapping:
815/// - `Agent` → `High` (active agent in conversation — response expected)
816/// - `Human` → `High` (explicit human action — response expected)
817/// - `Api` → `Normal` (machine call — acceptable latency)
818/// - `System` → `Low` (background cron task — must not block agents)
819///
820/// Wired in `GradatumQueue.dequeue()` via `ORDER BY priority DESC`.
821#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
822pub enum JobPriority {
823 /// Active agent and human jobs — response expected.
824 High,
825 /// API calls — acceptable latency (default).
826 #[default]
827 Normal,
828 /// System cron — background task, must not block agents.
829 Low,
830 /// Scheduled in the future (e.g. quarterly `Consolidate`).
831 Deferred,
832}
833
834impl JobPriority {
835 /// SQL value for `ORDER BY priority DESC` (High=3 sorts before Low=0).
836 #[must_use]
837 pub fn as_u8(&self) -> u8 {
838 match self {
839 Self::High => 3,
840 Self::Normal => 2,
841 Self::Low => 1,
842 Self::Deferred => 0,
843 }
844 }
845
846 /// Default priority for the given job class.
847 #[must_use]
848 pub fn default_for(class: &JobClass) -> Self {
849 match class {
850 JobClass::Agent => Self::High,
851 JobClass::Human => Self::High,
852 JobClass::Api => Self::Normal,
853 JobClass::System => Self::Low,
854 }
855 }
856}
857
858// ─────────────────────────────────────────────────────────────────────────────
859// JobScheduling — quand s'exécute le job
860// ─────────────────────────────────────────────────────────────────────────────
861
862/// Scheduling constraints for a job.
863#[derive(Debug, Clone, Serialize, Deserialize)]
864pub struct JobScheduling {
865 /// Trigger source.
866 pub trigger: TriggerSource,
867 /// Scheduled date/time (UTC).
868 pub scheduled_at: DateTime<Utc>,
869 /// Declarative chaining — `[]` = immediate · `[x]` = chain · `[x,y]` = DAG.
870 ///
871 /// Semantics: "trigger me when these jobs complete".
872 /// More robust than a `not_before: DateTime` (which depends on wall-clock durations).
873 pub await_jobs: Vec<JobTrigger>,
874 /// Deadline for `Interactive` jobs — acts as a timeout.
875 pub deadline: Option<DateTime<Utc>>,
876 /// Cron expression (e.g. `"0 2 * * *"`).
877 pub cron_expr: Option<String>,
878}
879
880/// Cascade trigger condition.
881#[derive(Debug, Clone, Serialize, Deserialize)]
882pub struct JobTrigger {
883 /// Identifier of the awaited job.
884 pub job_id: Ulid,
885 /// Trigger condition.
886 pub condition: TriggerCondition,
887}
888
889/// Condition on the terminal state of an awaited job.
890#[derive(Debug, Clone, Serialize, Deserialize)]
891pub enum TriggerCondition {
892 /// Only if `Done` (success).
893 OnDone,
894 /// `Done | Failed | DLQ` — regardless of outcome.
895 OnAnyTerminal,
896 /// Only if `Failed` (alerting).
897 OnFailed,
898}
899
900/// Trigger source of a job.
901#[derive(Debug, Clone, Serialize, Deserialize)]
902pub enum TriggerSource {
903 /// `[[worker.schedules]]` — tokio-cron-scheduler.
904 Cron,
905 /// `[[pipelines]]` step — pipeline_executor.
906 Pipeline,
907 /// `await_jobs` → `on_job_complete()` → `set_pending()`.
908 Cascade,
909 /// `WriteHook` or `QaEvent` interceptor.
910 OnEvent,
911 /// `POST /api/v1/jobs/trigger` · admin CLI · `invoke_agent()`.
912 Demand,
913}
914
915// ─────────────────────────────────────────────────────────────────────────────
916// JobLifecycle — où en est le job
917// ─────────────────────────────────────────────────────────────────────────────
918
919/// Current lifecycle state of a job.
920#[derive(Debug, Clone, Serialize, Deserialize)]
921pub struct JobLifecycle {
922 /// Current status.
923 pub status: JobStatus,
924 /// Creation timestamp (UTC).
925 pub created_at: DateTime<Utc>,
926 /// Start timestamp (UTC) — `None` if not yet started.
927 pub started_at: Option<DateTime<Utc>>,
928 /// Completion timestamp (UTC) — `None` if not yet finished.
929 pub completed_at: Option<DateTime<Utc>>,
930 /// SQLite lease expiry — prevents duplicate execution.
931 pub lease_until: Option<DateTime<Utc>>,
932 /// Job result — `None` if not yet finished.
933 pub result: Option<JobResult>,
934}
935
936/// Status of a job in its lifecycle.
937#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
938pub enum JobStatus {
939 /// In the queue, ready to start.
940 Pending,
941 /// Lease active — currently executing.
942 Running,
943 /// Awaiting unsatisfied `await_jobs`.
944 Waiting,
945 /// Completed successfully.
946 Done,
947 /// Failed, retry possible.
948 Failed,
949 /// Dead-letter — `max_retries` reached.
950 DLQ,
951 /// Deadline exceeded or orphaned.
952 Cancelled,
953 /// Optimistic-lock conflict: write rejected because the provided
954 /// `expected_sha256` does not match the note's current hash.
955 /// Terminal state without retry — the note was NOT modified.
956 /// Read `lifecycle.result.result_note_md` to retrieve the `current_sha256`
957 /// encoded as JSON (`{ "current_sha256": "hex...", "attempted_sha256": "hex..." }`).
958 Conflict,
959}
960
961impl JobStatus {
962 /// Returns `true` if this status is a terminal state — no further transitions
963 /// or retries are possible.
964 ///
965 /// Terminal states: `Done`, `DLQ`, `Cancelled`, `Conflict`.
966 /// Non-terminal states: `Pending`, `Running`, `Waiting`, `Failed`.
967 ///
968 /// This is the single source of truth for terminal state membership,
969 /// used by SQL guards (`status NOT IN (...)`) and Rust logic alike.
970 ///
971 /// # Examples
972 ///
973 /// ```
974 /// use gradatum_core::JobStatus;
975 /// assert!(JobStatus::Done.is_terminal());
976 /// assert!(!JobStatus::Pending.is_terminal());
977 /// ```
978 #[inline]
979 pub fn is_terminal(&self) -> bool {
980 matches!(
981 self,
982 Self::Done | Self::DLQ | Self::Cancelled | Self::Conflict
983 )
984 }
985
986 /// SQL fragment `'Done','DLQ','Cancelled','Conflict'` for use in
987 /// `WHERE status NOT IN (...)` guards.
988 ///
989 /// **Single source of truth** for the terminal set at the SQL level.
990 /// Derived mechanically from `is_terminal` variants — any future change
991 /// to `is_terminal` must be reflected here (test `job_status_sql_fragment_matches_is_terminal`
992 /// guards against divergence).
993 ///
994 /// # Returns
995 ///
996 /// A static `&str` ready for interpolation into a SQL query. The fragment
997 /// does not include the surrounding parentheses.
998 pub const TERMINAL_SQL: &'static str = "'Done','DLQ','Cancelled','Conflict'";
999}
1000
1001/// Result of a completed job.
1002#[derive(Debug, Clone, Serialize, Deserialize)]
1003pub struct JobResult {
1004 /// `true` if the job completed successfully.
1005 pub success: bool,
1006 /// Execution duration in milliseconds.
1007 pub duration_ms: u32,
1008 /// LLM cost in USD — `None` if no LLM was involved.
1009 pub cost_usd: Option<f32>,
1010 /// Result Gradatum note — single entry point for the agent.
1011 ///
1012 /// `vault_read(result_note)` → frontmatter + paths + wikilinks to produced notes.
1013 /// Present when `success=true`. Error note when `DLQ`.
1014 pub result_note: Option<Ulid>,
1015 /// Optimistic-lock conflict JSON payload (present when `JobStatus::Conflict`).
1016 ///
1017 /// Contains `current_sha256` (current hash) and `attempted_sha256` (attempted hash).
1018 /// Allows a polling client to retrieve the current hash to resolve the conflict.
1019 ///
1020 /// Example payload:
1021 /// ```json
1022 /// { "current_sha256": "a3f1...", "attempted_sha256": "b2e0...", "timestamp_ms": 1234567890 }
1023 /// ```
1024 ///
1025 /// `None` for all other statuses.
1026 #[serde(default, skip_serializing_if = "Option::is_none")]
1027 pub conflict_payload: Option<serde_json::Value>,
1028}
1029
1030// ─────────────────────────────────────────────────────────────────────────────
1031// JobWorkspace — workspace physique OpenDAL
1032// ─────────────────────────────────────────────────────────────────────────────
1033
1034/// Physical job workspace — OpenDAL-backed structure.
1035///
1036/// All I/O goes through OpenDAL — same API regardless of the backend (fs/s3/gcs).
1037#[derive(Debug, Clone, Serialize, Deserialize)]
1038pub struct JobWorkspace {
1039 /// Input path — e.g. `"worker/2026-05-20/01J-XYZ/input/"`.
1040 pub input: String,
1041 /// Output path — e.g. `"worker/2026-05-20/01J-XYZ/output/"`.
1042 pub output: String,
1043 /// Metadata path — e.g. `"worker/2026-05-20/01J-XYZ/meta/"`.
1044 pub meta: String,
1045}
1046
1047impl JobWorkspace {
1048 /// Builds the workspace from a [`JobRecord`].
1049 ///
1050 /// Format: `worker/{YYYY-MM-DD}/{job_id}/{input|output|meta}/`
1051 #[must_use]
1052 pub fn from_job(job: &JobRecord) -> Self {
1053 let date = job.lifecycle.created_at.format("%Y-%m-%d").to_string();
1054 let base = format!("worker/{}/{}", date, job.id);
1055 Self {
1056 input: format!("{}/input/", base),
1057 output: format!("{}/output/", base),
1058 meta: format!("{}/meta/", base),
1059 }
1060 }
1061}
1062
1063// ─────────────────────────────────────────────────────────────────────────────
1064// JobProgress — progress d'un job en cours
1065// ─────────────────────────────────────────────────────────────────────────────
1066
1067/// Progress of a running job.
1068///
1069/// Persisted to SQLite periodically.
1070/// `GET /api/v1/jobs/:id/status` → `{ status: "running", progress: { current: 47, total: 200 } }`
1071#[derive(Debug, Clone, Serialize, Deserialize)]
1072pub struct JobProgress {
1073 /// Items processed so far.
1074 pub current: u32,
1075 /// Total items (if known).
1076 pub total: u32,
1077 /// Description of the current step.
1078 pub step: String,
1079 /// Estimated time remaining in seconds.
1080 pub eta_secs: Option<u32>,
1081}
1082
1083// ─────────────────────────────────────────────────────────────────────────────
1084// JobOutputFile + JobOutput
1085// ─────────────────────────────────────────────────────────────────────────────
1086
1087/// File produced by a job — stored via OpenDAL in `output/`.
1088#[derive(Debug, Clone, Serialize, Deserialize)]
1089pub struct JobOutputFile {
1090 /// File name (e.g. `"export.csv"` | `"report.pdf"` | `"chart.png"`).
1091 pub name: String,
1092 /// MIME type (e.g. `"text/csv"` | `"application/pdf"` | `"image/png"`).
1093 pub mime_type: String,
1094 /// Size in bytes.
1095 pub size: u64,
1096 /// TTL in days — `None` = locus default (`worker/`=30d, `exports/`=90d).
1097 pub ttl_days: Option<u32>,
1098}
1099
1100/// Complete outputs produced by a job.
1101#[derive(Debug, Clone, Serialize, Deserialize)]
1102pub struct JobOutput {
1103 /// Markdown notes created in the vault.
1104 pub notes_created: Vec<Ulid>,
1105 /// Notes modified (Validate/Heal).
1106 pub notes_modified: Vec<Ulid>,
1107 /// Binaries/CSV/images in `output/`.
1108 pub files: Vec<JobOutputFile>,
1109 /// Markdown content of the result note.
1110 ///
1111 /// Written to `output/result.md` and copied to `vault work/jobs/` for `vault_read()`.
1112 pub result_note_md: String,
1113}
1114
1115impl JobOutput {
1116 /// Returns a dry-run output for `JobMode::DryRun` — no writes performed.
1117 #[must_use]
1118 pub fn dry_run(would_affect: usize, description: &str) -> Self {
1119 Self {
1120 notes_created: vec![],
1121 notes_modified: vec![],
1122 files: vec![],
1123 result_note_md: format!(
1124 "## DRY-RUN — {description}\n\n\
1125 **Simulation uniquement — aucune écriture effectuée.**\n\n\
1126 Notes qui auraient été affectées : {would_affect}\n",
1127 ),
1128 }
1129 }
1130}
1131
1132// ─────────────────────────────────────────────────────────────────────────────
1133// JobRetry — comment le job récupère
1134// ─────────────────────────────────────────────────────────────────────────────
1135
1136/// Retry policy for a job.
1137#[derive(Debug, Clone, Serialize, Deserialize)]
1138pub struct JobRetry {
1139 /// Attempts made so far.
1140 pub count: u32,
1141 /// Maximum number of attempts — `0` = no retry.
1142 pub max: u32,
1143 /// Backoff strategy.
1144 pub backoff: RetryBackoff,
1145 /// Last recorded error.
1146 pub last_error: Option<String>,
1147 /// Full error history.
1148 pub errors: Vec<JobError>,
1149}
1150
1151impl Default for JobRetry {
1152 fn default() -> Self {
1153 Self {
1154 count: 0,
1155 max: 3,
1156 backoff: RetryBackoff::Exponential { base: 5, max: 120 },
1157 last_error: None,
1158 errors: vec![],
1159 }
1160 }
1161}
1162
1163/// Individual error recorded during an attempt.
1164#[derive(Debug, Clone, Serialize, Deserialize)]
1165pub struct JobError {
1166 /// Error timestamp (UTC).
1167 pub at: DateTime<Utc>,
1168 /// Error message.
1169 pub message: String,
1170 /// Attempt number.
1171 pub attempt: u32,
1172}
1173
1174/// Backoff strategy between retry attempts.
1175#[derive(Debug, Clone, Serialize, Deserialize)]
1176pub enum RetryBackoff {
1177 /// Fixed N seconds between each attempt.
1178 Fixed(u64),
1179 /// Exponential backoff: `base → 2×base → ... → max` seconds.
1180 Exponential {
1181 /// Base delay in seconds.
1182 base: u64,
1183 /// Maximum delay in seconds.
1184 max: u64,
1185 },
1186}
1187
1188impl RetryBackoff {
1189 /// Computes the wait duration for attempt `attempt` (0-indexed).
1190 ///
1191 /// For `Fixed(n)`: always `n` seconds.
1192 /// For `Exponential { base, max }`: `min(base * 2^attempt, max)` seconds.
1193 #[must_use]
1194 pub fn duration_for(&self, attempt: u32) -> Duration {
1195 match self {
1196 Self::Fixed(secs) => Duration::from_secs(*secs),
1197 Self::Exponential { base, max } => {
1198 let secs = base.saturating_mul(1_u64 << attempt.min(62));
1199 Duration::from_secs(secs.min(*max))
1200 }
1201 }
1202 }
1203}
1204
1205// ─────────────────────────────────────────────────────────────────────────────
1206// JobLineage — d'où vient le job
1207// ─────────────────────────────────────────────────────────────────────────────
1208
1209/// Traceability of the job's emitter and trigger context.
1210#[derive(Debug, Clone, Serialize, Deserialize)]
1211pub struct JobLineage {
1212 /// `agent_id` | `user_id` | `cron_id` — `None` if not traced.
1213 pub triggered_by: Option<String>,
1214 /// Parent job if this is a child job (cascade, agent spawn).
1215 pub parent_job: Option<Ulid>,
1216 /// Pipeline identifier if this is a `[[pipelines]]` step.
1217 pub pipeline_id: Option<Ulid>,
1218 /// Step name within the pipeline.
1219 pub pipeline_step: Option<String>,
1220 /// Jobs created by this job (outgoing cascade).
1221 pub children: Vec<Ulid>,
1222 /// Cumulative LLM cost in USD.
1223 pub cost_usd: Option<f32>,
1224}
1225
1226// ─────────────────────────────────────────────────────────────────────────────
1227// JobRecord — enveloppe complète 5 blocs
1228// ─────────────────────────────────────────────────────────────────────────────
1229
1230/// Complete job envelope structured as 5 orthogonal blocks.
1231///
1232/// `JobRecord` is the canonical L0 type circulating across the entire job layer.
1233/// Serialised as JSON by the `QueueStore` for persistence in SQLite.
1234///
1235/// # The 5 blocks
1236///
1237/// 1. [`JobSpec`] — WHAT the job does
1238/// 2. [`JobScheduling`] — WHEN it executes
1239/// 3. [`JobLifecycle`] — WHERE it stands
1240/// 4. [`JobRetry`] — HOW it recovers
1241/// 5. [`JobLineage`] — WHERE it comes from / links
1242#[derive(Debug, Clone, Serialize, Deserialize)]
1243pub struct JobRecord {
1244 /// Unique job identifier (monotonic ULID, implicit FIFO order).
1245 pub id: Ulid,
1246 /// Block 1 — WHAT the job does.
1247 pub spec: JobSpec,
1248 /// Block 2 — WHEN it executes.
1249 pub scheduling: JobScheduling,
1250 /// Block 3 — WHERE it stands.
1251 pub lifecycle: JobLifecycle,
1252 /// Block 4 — HOW it recovers.
1253 pub retry: JobRetry,
1254 /// Block 5 — WHERE it comes from / links.
1255 pub lineage: JobLineage,
1256}
1257
1258// ─────────────────────────────────────────────────────────────────────────────
1259// JobFilter — introspection F-16
1260// ─────────────────────────────────────────────────────────────────────────────
1261
1262/// Sort order for [`QueueStore::list`].
1263///
1264/// Sorting is performed on the ULID `id`, which is monotonic and therefore
1265/// equivalent to creation order.
1266///
1267/// `CreatedAsc` is the **default**: it strictly preserves the historical
1268/// `list()` behaviour (`id ASC`). The studio jobs page passes `CreatedDesc`
1269/// explicitly to display the most recent jobs first.
1270#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
1271pub enum JobOrder {
1272 /// `id ASC` — oldest first (default, historical behaviour).
1273 #[default]
1274 CreatedAsc,
1275 /// `id DESC` — most recent first.
1276 CreatedDesc,
1277}
1278
1279/// Filter for [`QueueStore::list`].
1280///
1281/// The `cursor` field enables cursor-based pagination. `cursor` is the last
1282/// returned ULID `id`; the comparison direction depends on [`JobFilter::order`]:
1283/// - [`JobOrder::CreatedAsc`] (default): `id > cursor ORDER BY id ASC` (historical behaviour).
1284/// - [`JobOrder::CreatedDesc`]: `id < cursor ORDER BY id DESC` (most recent page first).
1285///
1286/// Since ULID is monotonic, comparing `id` is equivalent to temporal order.
1287///
1288/// # Serialisation stability
1289///
1290/// `order` and `created_before` are additive fields with `#[serde(default)]`:
1291/// a pre-existing `JobFilter` JSON (without these fields) deserialises with
1292/// `order = CreatedAsc` and `created_before = None` — identical to historical behaviour.
1293#[derive(Debug, Clone, Serialize, Deserialize)]
1294pub struct JobFilter {
1295 /// Filter by job class.
1296 pub class: Option<JobClass>,
1297 /// Filter by status.
1298 pub status: Option<JobStatus>,
1299 /// Filter by kind (name of the `Job` variant).
1300 pub kind: Option<String>,
1301 /// Filter jobs created strictly after this date (exclusive lower bound).
1302 pub created_after: Option<DateTime<Utc>>,
1303 /// Filter jobs created strictly before this date (exclusive upper bound).
1304 ///
1305 /// Combined with `created_after`, isolates a time range (e.g. a single day).
1306 #[serde(default)]
1307 pub created_before: Option<DateTime<Utc>>,
1308 /// Sort order (default: [`JobOrder::CreatedAsc`] — historical behaviour).
1309 #[serde(default)]
1310 pub order: JobOrder,
1311 /// Maximum number of results (default: 50, max: 500).
1312 pub limit: usize,
1313 /// Pagination cursor — last returned ULID `id` (exclusive).
1314 ///
1315 /// `None` = start of list. `Some(ulid)` = jobs after (ASC) or before (DESC) this ULID.
1316 /// Use `next_cursor` from the previous API response.
1317 pub cursor: Option<Ulid>,
1318}
1319
1320impl Default for JobFilter {
1321 fn default() -> Self {
1322 Self {
1323 class: None,
1324 status: None,
1325 kind: None,
1326 created_after: None,
1327 created_before: None,
1328 order: JobOrder::CreatedAsc,
1329 limit: 50,
1330 cursor: None,
1331 }
1332 }
1333}
1334
1335// ─────────────────────────────────────────────────────────────────────────────
1336// QueueEvent — événements publiés par le backend
1337// ─────────────────────────────────────────────────────────────────────────────
1338
1339/// Events published by the `QueueStore` via broadcast.
1340///
1341/// Consumed by:
1342/// - SSE endpoint `GET /api/v1/jobs/events`
1343/// - Cascade engine (`find_awaiting` + `set_pending`)
1344/// - Real-time monitoring dashboard
1345#[derive(Debug, Clone, Serialize, Deserialize)]
1346pub enum QueueEvent {
1347 /// New job inserted — `Pending` or `Waiting`.
1348 JobInserted(Ulid),
1349 /// Job completed — `Done` or `DLQ`.
1350 JobCompleted(Ulid, JobStatus, JobResult),
1351 /// Job failed — `Failed` + attempt number.
1352 JobFailed(Ulid, u32),
1353 /// Job transitioned `Waiting → Pending` (cascade satisfied).
1354 JobReady(Ulid),
1355 /// Job cancelled — deadline or orphaned.
1356 JobCancelled(Ulid),
1357}
1358
1359// ─────────────────────────────────────────────────────────────────────────────
1360// GradatumJob — payload Apalis
1361// ─────────────────────────────────────────────────────────────────────────────
1362
1363/// Apalis payload wrapping a [`JobRecord`].
1364///
1365/// Serialised as JSON in the `job` column of the Apalis table. The `priority`
1366/// field duplicates `spec.priority.as_u8()` to allow `ORDER BY priority DESC`
1367/// without deserialising the payload.
1368#[derive(Debug, Clone, Serialize, Deserialize)]
1369pub struct GradatumJob {
1370 /// Complete job envelope.
1371 pub record: JobRecord,
1372 /// Denormalised priority value for SQL sorting (0-3).
1373 pub priority: u8,
1374}
1375
1376// ─────────────────────────────────────────────────────────────────────────────
1377// DryRunAware trait
1378// ─────────────────────────────────────────────────────────────────────────────
1379
1380/// Trait for jobs and handlers that support `DryRun` mode.
1381///
1382/// The single-mechanism rule: only [`JobMode::DryRun`] in [`JobSpec`] controls
1383/// dry-run. `Source` structs do NOT carry a `dry_run` field, except for
1384/// legitimate exceptions (`MigrateSource.dry_run`, `IngestSource.dry_run`) for
1385/// irreversible operations requiring human validation first.
1386///
1387/// In ALL handlers, the check is the first instruction:
1388///
1389/// ```rust,ignore
1390/// if job.spec.mode == JobMode::DryRun {
1391/// let count = ctx.vault.count(&src.scopes).await?;
1392/// return Ok(JobOutput::dry_run(count, "description"));
1393/// }
1394/// ```
1395pub trait DryRunAware {
1396 /// Returns `true` if this job is in dry-run mode.
1397 fn is_dry_run(&self) -> bool;
1398
1399 /// Number of notes that would be affected (estimate).
1400 ///
1401 /// Returns `0` by default — overridden by implementations that can compute
1402 /// the value without side effects.
1403 fn notes_would_affect(&self) -> usize {
1404 0
1405 }
1406}
1407
1408impl DryRunAware for JobRecord {
1409 fn is_dry_run(&self) -> bool {
1410 self.spec.mode == JobMode::DryRun
1411 }
1412}
1413
1414// ─────────────────────────────────────────────────────────────────────────────
1415// JobSource trait — factorisation v63
1416// ─────────────────────────────────────────────────────────────────────────────
1417
1418/// Common trait for `Job` variant source structs.
1419///
1420/// Factorises fields common to multiple `Source` structs. Rust does not support
1421/// struct inheritance — a trait is preferred over embedding.
1422pub trait JobSource {
1423 /// Vault scopes the job operates on.
1424 fn scopes(&self) -> &[VaultScope];
1425
1426 /// `true` if the job is in simulation mode (no writes).
1427 fn dry_run(&self) -> bool;
1428
1429 /// Optional time window (notes created/modified within this duration).
1430 fn window(&self) -> Option<Duration> {
1431 None
1432 }
1433}
1434
1435// ─────────────────────────────────────────────────────────────────────────────
1436// QueueStore trait — L0
1437// ─────────────────────────────────────────────────────────────────────────────
1438
1439/// Job queue storage contract — L0 `gradatum-core`.
1440///
1441/// Implementations:
1442/// - `SqliteQueueStore` in `gradatum-db-sqlite` (default, embedded)
1443/// - `LibsqlQueueStore` in `gradatum-db-sqlite` (remote, opt-in)
1444///
1445/// # Errors
1446///
1447/// All methods return `Result<_, QueueError>`.
1448/// Implementations must not panic — propagate errors via `?`.
1449#[async_trait::async_trait]
1450pub trait QueueStore: Send + Sync {
1451 // ── Opérations de base ────────────────────────────────────────────────
1452
1453 /// Inserts a new job into the queue — returns its `Ulid`.
1454 async fn enqueue(&self, job: JobRecord) -> Result<Ulid, QueueError>;
1455
1456 /// Dequeues the next ready job (atomic lease).
1457 ///
1458 /// Returns `None` if the queue is empty or no job is ready.
1459 async fn dequeue(&self) -> Result<Option<JobRecord>, QueueError>;
1460
1461 /// Dequeues the next ready job filtered by `kind` (atomic lease).
1462 ///
1463 /// Guarantees that a `curate` worker never receives an `Embed` or `ReIndex`
1464 /// job, eliminating the routing race condition (DLQ `UnexpectedVariant` bug).
1465 ///
1466 /// # Default implementation
1467 ///
1468 /// Unfiltered fallback. Implementations with native SQL filtering (e.g.
1469 /// `SqliteQueueStore`) should override with `WHERE kind = ?` to exploit the
1470 /// `idx_jobs_status_kind` index.
1471 ///
1472 /// # Parameter
1473 ///
1474 /// `kind`: name of the `Job` variant as returned by [`job_kind_str`] —
1475 /// e.g. `"Curate"`, `"Embed"`, `"ReIndex"`.
1476 async fn dequeue_by_kind(&self, _kind: &str) -> Result<Option<JobRecord>, QueueError> {
1477 self.dequeue().await
1478 }
1479
1480 /// Retrieves a job by identifier — `None` if not found.
1481 async fn get(&self, id: Ulid) -> Result<Option<JobRecord>, QueueError>;
1482
1483 /// Marks a job as `Done` with its result.
1484 async fn complete(&self, id: Ulid, result: JobResult) -> Result<(), QueueError>;
1485
1486 /// Marks a job as `Failed` (retry possible per policy).
1487 async fn fail(&self, id: Ulid, err: &str, attempt: u32) -> Result<(), QueueError>;
1488
1489 /// Cancels a job (`Cancelled`).
1490 async fn cancel(&self, id: Ulid) -> Result<(), QueueError>;
1491
1492 /// Sends a job to the dead-letter queue (`DLQ`) — max retries reached.
1493 async fn fail_dlq(&self, id: Ulid, err: &str) -> Result<(), QueueError>;
1494
1495 // ── Cascade — await_jobs chaining ────────────────────────────────────
1496
1497 /// Finds jobs in `Waiting` whose `await_jobs` list contains `job_id`.
1498 async fn find_awaiting(&self, job_id: Ulid) -> Result<Vec<JobRecord>, QueueError>;
1499
1500 /// Transitions a job from `Waiting` to `Pending`.
1501 async fn set_pending(&self, id: Ulid) -> Result<(), QueueError>;
1502
1503 // ── Periodic sweep ────────────────────────────────────────────────────
1504
1505 /// Recovers jobs with expired leases → resets them to `Pending`.
1506 async fn recover_stale_leases(&self, ttl: Duration) -> Result<Vec<Ulid>, QueueError>;
1507
1508 /// Cancels jobs whose deadline has passed.
1509 async fn cancel_expired_deadlines(&self, now: DateTime<Utc>) -> Result<Vec<Ulid>, QueueError>;
1510
1511 /// Promotes retry-scheduled jobs with `scheduled_at <= now` → `Pending`.
1512 ///
1513 /// Guard: if `retry.count >= retry.max` → `fail_dlq` instead of re-`Pending`.
1514 /// Prevents infinite loops (`Failed → schedule → Failed → ...`).
1515 async fn promote_retries(&self, now: DateTime<Utc>) -> Result<Vec<Ulid>, QueueError>;
1516
1517 /// Schedules a job for retry at `at` (transition `Failed → Waiting`).
1518 async fn schedule_retry(&self, id: Ulid, at: DateTime<Utc>) -> Result<(), QueueError>;
1519
1520 // ── Introspection ────────────────────────────────────────────────────
1521
1522 /// Lists jobs matching a filter.
1523 async fn list(&self, filter: JobFilter) -> Result<Vec<JobRecord>, QueueError>;
1524
1525 /// Counts jobs grouped by status (`GROUP BY status`).
1526 ///
1527 /// Includes all present statuses (including DLQ). Statuses with zero count
1528 /// may be absent from the map (callers treat absence as `0`).
1529 ///
1530 /// # Default implementation
1531 ///
1532 /// Returns an empty map. Database-backed stores (e.g. `SqliteQueueStore`)
1533 /// override with a native `GROUP BY status` (single query).
1534 async fn count_jobs_by_status(
1535 &self,
1536 ) -> Result<std::collections::HashMap<JobStatus, u64>, QueueError> {
1537 Ok(std::collections::HashMap::new())
1538 }
1539
1540 /// Permanently deletes jobs in the Dead Letter Queue.
1541 ///
1542 /// Destructive operation: deleted DLQ entries are NOT recoverable (unlike
1543 /// `--replay`, which moves them back to `Pending`). Reserved for
1544 /// `gradatum-admin jobs dlq --prune`.
1545 ///
1546 /// # Parameters
1547 ///
1548 /// - `older_than`: if `Some(cutoff)`, only deletes DLQ jobs created before
1549 /// `cutoff`. If `None`, deletes all DLQ jobs.
1550 ///
1551 /// # Returns
1552 ///
1553 /// The number of jobs actually deleted.
1554 ///
1555 /// # Default implementation
1556 ///
1557 /// Returns `Ok(0)` (no-op). Database-backed stores (e.g. `SqliteQueueStore`)
1558 /// override with a native `DELETE FROM ... WHERE status = 'DLQ'`. Mocks and
1559 /// in-memory stores inherit the no-op.
1560 #[must_use = "le nombre de jobs DLQ supprimés doit être consommé (compte rendu prune)"]
1561 async fn delete_dlq_jobs(&self, _older_than: Option<DateTime<Utc>>) -> Result<u64, QueueError> {
1562 Ok(0)
1563 }
1564
1565 /// Counts DLQ jobs that would be deleted by `delete_dlq_jobs` with the
1566 /// same `older_than` — faithful dry-run of the prune operation.
1567 ///
1568 /// # Parameters
1569 ///
1570 /// - `older_than`: must be **identical** to the value passed to
1571 /// `delete_dlq_jobs` so that the count matches the DELETE exactly
1572 /// (same WHERE clause).
1573 ///
1574 /// # Returns
1575 ///
1576 /// The exact number of targeted DLQ jobs (dedicated `COUNT(*)`, no `LIMIT`).
1577 ///
1578 /// # Motivation
1579 ///
1580 /// The original dry-run counted via `list(limit: 200)` — producing a false
1581 /// count when DLQ > 200, and an erroneous early-return "nothing to prune" when
1582 /// the matching jobs fell outside the first 200. A `COUNT(*)` with the **same**
1583 /// WHERE clause as the DELETE eliminates both bugs.
1584 ///
1585 /// # Default implementation
1586 ///
1587 /// Returns `Ok(0)`. Database-backed stores override.
1588 #[must_use = "le compte DLQ ciblé doit être consommé (dry-run prune)"]
1589 async fn count_dlq_jobs(&self, _older_than: Option<DateTime<Utc>>) -> Result<u64, QueueError> {
1590 Ok(0)
1591 }
1592
1593 /// Rattrapage DAG : promeuvet les jobs `Waiting` dont toutes les dépendances
1594 /// sont `Done` mais dont la cascade post-commit n'a pas été exécutée.
1595 ///
1596 /// Appelée par le sweep périodique (`run_sweep_once`) comme filet de rattrapage.
1597 /// N'est PAS le chemin nominal — le chemin nominal est `cascade_check_and_promote`
1598 /// appelé immédiatement après `complete()`.
1599 ///
1600 /// # Returns
1601 ///
1602 /// Le nombre de jobs `Waiting` promus en `Pending`.
1603 ///
1604 /// # Errors
1605 ///
1606 /// Retourne `QueueError::Storage` en cas d'erreur d'accès à la base.
1607 ///
1608 /// # Default implementation
1609 ///
1610 /// Returns `Ok(0)` (no-op). `SqliteQueueStore` overrides with a real scan
1611 /// of `gradatum_jobs WHERE status = 'Waiting' AND await_jobs IS NOT NULL AND await_jobs != '[]'`.
1612 /// Mocks and in-memory stores inherit the no-op — the sweep becomes inert for them,
1613 /// which is the correct behavior in test environments.
1614 async fn promote_stranded_waiting_jobs(&self) -> Result<u32, QueueError> {
1615 Ok(0)
1616 }
1617
1618 /// Returns the **most recent** job (last inserted), or `None` if the queue
1619 /// is empty.
1620 ///
1621 /// The dashboard shows a "last job": the expected semantics is the most
1622 /// recently inserted job. `list()` orders by `id ASC` (cursor pagination)
1623 /// and would return the *oldest* — hence this dedicated method with
1624 /// `ORDER BY id DESC`. Since ULID `id` is monotonic, lexicographic order
1625 /// on `id` is equivalent to creation order.
1626 ///
1627 /// The `tenant` parameter allows future multi-tenant filtering. Single-tenant
1628 /// stores (e.g. `SqliteQueueStore`, whose `gradatum_jobs` table has no tenant
1629 /// column) ignore it and document that fact.
1630 ///
1631 /// # Default implementation
1632 ///
1633 /// Returns `None` (queue considered empty). Database-backed stores override
1634 /// with a native `ORDER BY id DESC LIMIT 1`.
1635 #[must_use = "le dernier job retourné doit être consommé ou explicitement ignoré"]
1636 async fn latest_job(&self, _tenant: &str) -> Result<Option<JobRecord>, QueueError> {
1637 Ok(None)
1638 }
1639
1640 // ── Événements ────────────────────────────────────────────────────────
1641
1642 /// Subscribes to the [`QueueEvent`] broadcast.
1643 ///
1644 /// Each call returns a new independent `Receiver`. Events are emitted
1645 /// without delivery guarantees if the consumer is too slow (fixed-capacity
1646 /// broadcast channel).
1647 fn subscribe(&self) -> Receiver<QueueEvent>;
1648
1649 /// Marks a job in the terminal `Conflict` state (optimistic-lock).
1650 ///
1651 /// The note was NOT written. `result_note_md` contains the conflict JSON
1652 /// (`WriteConflictDto`) so the client can retrieve the `current_sha256`.
1653 ///
1654 /// Distinct from `fail()` (which may trigger retries) and `complete()`
1655 /// (which indicates success). `Conflict` is terminal without retry.
1656 ///
1657 /// # Default implementation
1658 ///
1659 /// Delegates to `complete()` with `success: false`, then patches the status
1660 /// in the JSON payload (workaround: `complete()` writes `Done` to the SQL
1661 /// status column, but `lifecycle.status` in the JSON payload is what the
1662 /// polling client reads).
1663 ///
1664 /// Implementations with direct DB access can override to write
1665 /// `status = 'Conflict'` directly to the SQL column.
1666 async fn mark_conflict(
1667 &self,
1668 id: Ulid,
1669 result_note_md: String,
1670 duration_ms: u32,
1671 ) -> Result<(), QueueError> {
1672 // Implémentation par défaut : utilise complete() avec success=false,
1673 // puis corrige le lifecycle.status dans le payload via un get+patch+re-save.
1674 // Les implémentations concrètes (SqliteQueueStore) surchargent cette méthode
1675 // pour écrire directement le bon statut SQL.
1676 let result = JobResult {
1677 success: false,
1678 duration_ms,
1679 cost_usd: None,
1680 result_note: None,
1681 conflict_payload: serde_json::from_str(&result_note_md).ok(),
1682 };
1683 // Appel complet() avec le résultat — le lifecycle.status payload sera Done,
1684 // mais l'implémentation concrète le corrige dans sa surcharge.
1685 // L'implémentation par défaut ne peut pas corriger le status SQL sans accès à la DB.
1686 self.complete(id, result).await
1687 }
1688}
1689
1690// ─────────────────────────────────────────────────────────────────────────────
1691// QueueError — erreurs L0 (sans dépendances externes)
1692// ─────────────────────────────────────────────────────────────────────────────
1693
1694/// Errors from the `QueueStore` — no dependency on `sqlx` or any other driver.
1695///
1696/// Implementations (`SqliteQueueStore`, etc.) map their internal errors to these
1697/// variants via `map_err()`.
1698#[derive(Debug, thiserror::Error)]
1699pub enum QueueError {
1700 /// Storage error (SQLite driver, libsql, etc.).
1701 #[error("erreur de stockage : {0}")]
1702 Storage(String),
1703
1704 /// Job not found by identifier.
1705 #[error("job introuvable : {0}")]
1706 NotFound(Ulid),
1707
1708 /// JSON payload serialisation/deserialisation error.
1709 #[error("erreur de sérialisation : {0}")]
1710 Serialization(String),
1711
1712 /// Invalid state transition (e.g. `Done → Running`).
1713 #[error("transition d'état invalide : {0}")]
1714 InvalidTransition(String),
1715
1716 /// Operation cancelled (timeout, shutdown).
1717 #[error("opération annulée : {0}")]
1718 Cancelled(String),
1719
1720 /// Operation not implemented in this version.
1721 ///
1722 /// Used instead of `todo!()` to cleanly signal a deferred trait method
1723 /// implementation. Must never panic in production.
1724 #[error("opération non implémentée : {method}")]
1725 NotImplemented {
1726 /// Name of the unimplemented method.
1727 method: &'static str,
1728 },
1729}
1730
1731// ─────────────────────────────────────────────────────────────────────────────
1732// Tests unitaires
1733// ─────────────────────────────────────────────────────────────────────────────
1734
1735#[cfg(test)]
1736mod tests {
1737 use super::*;
1738
1739 fn make_job_record(job: Job, class: JobClass) -> JobRecord {
1740 let now = Utc::now();
1741 JobRecord {
1742 id: Ulid::new(),
1743 spec: JobSpec {
1744 kind: job,
1745 class,
1746 mode: JobMode::Batch,
1747 scope: JobScope::VaultWide,
1748 priority: JobPriority::default_for(&class),
1749 },
1750 scheduling: JobScheduling {
1751 trigger: TriggerSource::Demand,
1752 scheduled_at: now,
1753 await_jobs: vec![],
1754 deadline: None,
1755 cron_expr: None,
1756 },
1757 lifecycle: JobLifecycle {
1758 status: JobStatus::Pending,
1759 created_at: now,
1760 started_at: None,
1761 completed_at: None,
1762 lease_until: None,
1763 result: None,
1764 },
1765 retry: JobRetry::default(),
1766 lineage: JobLineage {
1767 triggered_by: None,
1768 parent_job: None,
1769 pipeline_id: None,
1770 pipeline_step: None,
1771 children: vec![],
1772 cost_usd: None,
1773 },
1774 }
1775 }
1776
1777 #[test]
1778 fn job_priority_as_u8_ordering() {
1779 assert!(JobPriority::High.as_u8() > JobPriority::Normal.as_u8());
1780 assert!(JobPriority::Normal.as_u8() > JobPriority::Low.as_u8());
1781 assert!(JobPriority::Low.as_u8() > JobPriority::Deferred.as_u8());
1782 }
1783
1784 #[test]
1785 fn job_priority_default_for_class() {
1786 assert_eq!(
1787 JobPriority::default_for(&JobClass::Agent),
1788 JobPriority::High
1789 );
1790 assert_eq!(
1791 JobPriority::default_for(&JobClass::Human),
1792 JobPriority::High
1793 );
1794 assert_eq!(
1795 JobPriority::default_for(&JobClass::Api),
1796 JobPriority::Normal
1797 );
1798 assert_eq!(
1799 JobPriority::default_for(&JobClass::System),
1800 JobPriority::Low
1801 );
1802 }
1803
1804 // ── E2: JobStatus::is_terminal() — exhaustive coverage all variants ──────
1805
1806 /// Terminal variants must return `true` — anti-regression guard for the
1807 /// `status NOT IN (...)` SQL guards in `queue_store_sqlite.rs`.
1808 #[test]
1809 fn job_status_is_terminal_returns_true_for_terminal_variants() {
1810 assert!(JobStatus::Done.is_terminal(), "Done must be terminal");
1811 assert!(JobStatus::DLQ.is_terminal(), "DLQ must be terminal");
1812 assert!(
1813 JobStatus::Cancelled.is_terminal(),
1814 "Cancelled must be terminal"
1815 );
1816 assert!(
1817 JobStatus::Conflict.is_terminal(),
1818 "Conflict must be terminal"
1819 );
1820 }
1821
1822 /// Non-terminal variants must return `false` — prevents silent inclusion
1823 /// of active job states in the terminal set.
1824 #[test]
1825 fn job_status_is_terminal_returns_false_for_non_terminal_variants() {
1826 assert!(
1827 !JobStatus::Pending.is_terminal(),
1828 "Pending must NOT be terminal"
1829 );
1830 assert!(
1831 !JobStatus::Running.is_terminal(),
1832 "Running must NOT be terminal"
1833 );
1834 assert!(
1835 !JobStatus::Waiting.is_terminal(),
1836 "Waiting must NOT be terminal"
1837 );
1838 assert!(
1839 !JobStatus::Failed.is_terminal(),
1840 "Failed must NOT be terminal"
1841 );
1842 }
1843
1844 /// `TERMINAL_SQL` must exactly match the string used in SQL guards.
1845 /// This is the anti-divergence lock: if `is_terminal` is updated, this
1846 /// test will fail until `TERMINAL_SQL` is updated to match.
1847 #[test]
1848 fn job_status_sql_fragment_matches_is_terminal() {
1849 let expected = "'Done','DLQ','Cancelled','Conflict'";
1850 assert_eq!(
1851 JobStatus::TERMINAL_SQL,
1852 expected,
1853 "TERMINAL_SQL diverged from is_terminal() terminal set — update both together"
1854 );
1855 }
1856
1857 #[test]
1858 fn job_mode_default_is_batch() {
1859 assert_eq!(JobMode::default(), JobMode::Batch);
1860 }
1861
1862 #[test]
1863 fn job_retry_default_values() {
1864 let r = JobRetry::default();
1865 assert_eq!(r.count, 0);
1866 assert_eq!(r.max, 3);
1867 assert!(r.errors.is_empty());
1868 }
1869
1870 #[test]
1871 fn retry_backoff_fixed_is_constant() {
1872 let b = RetryBackoff::Fixed(10);
1873 assert_eq!(b.duration_for(0), Duration::from_secs(10));
1874 assert_eq!(b.duration_for(5), Duration::from_secs(10));
1875 }
1876
1877 #[test]
1878 fn retry_backoff_exponential_caps_at_max() {
1879 let b = RetryBackoff::Exponential { base: 5, max: 120 };
1880 assert_eq!(b.duration_for(0), Duration::from_secs(5));
1881 assert_eq!(b.duration_for(1), Duration::from_secs(10));
1882 assert_eq!(b.duration_for(10), Duration::from_secs(120)); // plafonné
1883 }
1884
1885 #[test]
1886 fn job_record_serialize_roundtrip() {
1887 let record = make_job_record(
1888 Job::Embed(EmbedSpec {
1889 note_id: Ulid::new(),
1890 tenant_id: "main".to_string(),
1891 force_regenerate: false,
1892 }),
1893 JobClass::Agent,
1894 );
1895
1896 let json =
1897 serde_json::to_string(&record).expect("JobRecord doit être sérialisable en JSON");
1898 let back: JobRecord =
1899 serde_json::from_str(&json).expect("JobRecord doit être désérialisable depuis JSON");
1900 assert_eq!(record.id, back.id);
1901 assert_eq!(record.spec.priority.as_u8(), back.spec.priority.as_u8());
1902 }
1903
1904 #[test]
1905 fn job_workspace_paths_format() {
1906 let record = make_job_record(Job::Consolidate, JobClass::System);
1907 let ws = JobWorkspace::from_job(&record);
1908 assert!(ws.input.ends_with("/input/"));
1909 assert!(ws.output.ends_with("/output/"));
1910 assert!(ws.meta.ends_with("/meta/"));
1911 }
1912
1913 #[test]
1914 fn dry_run_job_record() {
1915 let record = {
1916 let now = Utc::now();
1917 JobRecord {
1918 id: Ulid::new(),
1919 spec: JobSpec {
1920 kind: Job::Curate(CurateSpec {
1921 note_id: Ulid::new(),
1922 tenant_id: "main".to_string(),
1923 ..Default::default()
1924 }),
1925 class: JobClass::Agent,
1926 mode: JobMode::DryRun,
1927 scope: JobScope::VaultWide,
1928 priority: JobPriority::High,
1929 },
1930 scheduling: JobScheduling {
1931 trigger: TriggerSource::Demand,
1932 scheduled_at: now,
1933 await_jobs: vec![],
1934 deadline: None,
1935 cron_expr: None,
1936 },
1937 lifecycle: JobLifecycle {
1938 status: JobStatus::Pending,
1939 created_at: now,
1940 started_at: None,
1941 completed_at: None,
1942 lease_until: None,
1943 result: None,
1944 },
1945 retry: JobRetry::default(),
1946 lineage: JobLineage {
1947 triggered_by: None,
1948 parent_job: None,
1949 pipeline_id: None,
1950 pipeline_step: None,
1951 children: vec![],
1952 cost_usd: None,
1953 },
1954 }
1955 };
1956 assert!(record.is_dry_run());
1957 }
1958
1959 #[test]
1960 fn job_output_dry_run_format() {
1961 let out = JobOutput::dry_run(42, "test curate");
1962 assert!(out.notes_created.is_empty());
1963 assert!(out.result_note_md.contains("DRY-RUN"));
1964 assert!(out.result_note_md.contains("42"));
1965 }
1966
1967 #[test]
1968 fn job_filter_default_limit() {
1969 let f = JobFilter::default();
1970 assert_eq!(f.limit, 50);
1971 assert!(f.class.is_none());
1972 assert!(f.status.is_none());
1973 }
1974
1975 #[test]
1976 fn gradatum_job_priority_matches_spec() {
1977 let record = make_job_record(Job::Agent, JobClass::Human);
1978 let expected_priority = record.spec.priority.as_u8();
1979 let job = GradatumJob {
1980 priority: expected_priority,
1981 record,
1982 };
1983 assert_eq!(job.priority, 3); // Human → High → 3
1984 }
1985
1986 #[test]
1987 fn vault_scope_is_alias_of_job_scope() {
1988 // VaultScope = JobScope — vérification que le type alias compile
1989 let vs: VaultScope = JobScope::VaultWide;
1990 let js: JobScope = vs;
1991 assert!(matches!(js, JobScope::VaultWide));
1992 }
1993
1994 #[test]
1995 fn queue_event_variants_serialize() {
1996 let id = Ulid::new();
1997 let ev = QueueEvent::JobInserted(id);
1998 let json = serde_json::to_string(&ev).expect("QueueEvent doit être sérialisable");
1999 assert!(json.contains("JobInserted"));
2000 }
2001
2002 // ── Tests PurgeSpec + stabilité serde position 5 ─────────────────────────
2003
2004 /// PurgeSpec par défaut : dry_run=true, grace_days=Some(30), mode=Lifecycle.
2005 ///
2006 /// Vérifie les valeurs prudentes par défaut.
2007 #[test]
2008 fn purge_spec_default_values() {
2009 let spec = PurgeSpec::default();
2010 assert!(spec.dry_run, "dry_run doit être true par défaut");
2011 assert_eq!(spec.grace_days, Some(30));
2012 assert_eq!(spec.mode, PurgeMode::Lifecycle);
2013 }
2014
2015 /// Job::Purge(PurgeSpec) est sérialisable en JSON et le type discriminant est "Purge".
2016 ///
2017 /// Stabilité serde : `#[serde(tag = "type", content = "data")]` encode le variant
2018 /// par son nom ("Purge") — invariant pour la colonne `kind` de la queue.
2019 #[test]
2020 fn purge_job_serializes_with_correct_type_tag() {
2021 let job = Job::Purge(PurgeSpec::default());
2022 let json = serde_json::to_string(&job).expect("Job::Purge doit être sérialisable");
2023 assert!(
2024 json.contains("\"type\":\"Purge\""),
2025 "le tag serde doit être 'Purge', obtenu : {json}"
2026 );
2027 }
2028
2029 /// Roundtrip JSON de Job::Purge — désérialisation depuis JSON correcte.
2030 #[test]
2031 fn purge_job_json_roundtrip() {
2032 let original = Job::Purge(PurgeSpec {
2033 mode: PurgeMode::Lifecycle,
2034 dry_run: false,
2035 grace_days: Some(7),
2036 });
2037 let json = serde_json::to_string(&original).expect("sérialisation Job::Purge");
2038 let back: Job = serde_json::from_str(&json).expect("désérialisation Job::Purge");
2039 assert!(
2040 matches!(back, Job::Purge(ref s) if !s.dry_run && s.grace_days == Some(7)),
2041 "roundtrip JSON incorrect : {json}"
2042 );
2043 }
2044
2045 /// job_kind_str retourne "Purge" pour Job::Purge(_).
2046 #[test]
2047 fn job_kind_str_purge() {
2048 let job = Job::Purge(PurgeSpec::default());
2049 assert_eq!(job_kind_str(&job), "Purge");
2050 }
2051
2052 /// PurgeSpec grace_days=None : pas de délai de grâce, dry_run=true par défaut.
2053 #[test]
2054 fn purge_spec_no_grace_serializes() {
2055 let spec = PurgeSpec {
2056 mode: PurgeMode::Lifecycle,
2057 dry_run: true,
2058 grace_days: None,
2059 };
2060 let json = serde_json::to_string(&spec).expect("sérialisation PurgeSpec sans grace");
2061 let back: PurgeSpec = serde_json::from_str(&json).expect("désérialisation PurgeSpec");
2062 assert!(back.grace_days.is_none());
2063 assert!(back.dry_run);
2064 }
2065
2066 // ── Tests ForgetSpec + stabilité serde position 12 ───────────────────────
2067
2068 /// ForgetSpec par défaut : dry_run=true, confirm_ulids vide.
2069 #[test]
2070 fn forget_spec_default_values() {
2071 let spec = ForgetSpec::default();
2072 assert!(spec.dry_run, "dry_run doit être true par défaut");
2073 assert!(spec.confirm_ulids.is_empty());
2074 assert!(spec.forgotten_by.is_none());
2075 }
2076
2077 /// Job::Forget(ForgetSpec) est sérialisable en JSON et le type discriminant est "Forget".
2078 ///
2079 /// Stabilité serde : `#[serde(tag = "type", content = "data")]` encode le variant
2080 /// par son nom ("Forget") — invariant pour la colonne `kind` de la queue.
2081 #[test]
2082 fn forget_job_serializes_with_correct_type_tag() {
2083 let job = Job::Forget(ForgetSpec::default());
2084 let json = serde_json::to_string(&job).expect("Job::Forget doit être sérialisable");
2085 assert!(
2086 json.contains("\"type\":\"Forget\""),
2087 "le tag serde doit être 'Forget', obtenu : {json}"
2088 );
2089 }
2090
2091 /// Roundtrip JSON de Job::Forget — désérialisation correcte depuis JSON.
2092 #[test]
2093 fn forget_job_json_roundtrip() {
2094 let original = Job::Forget(ForgetSpec {
2095 scope: ForgetScope::Topic {
2096 query: "secret api-key".to_string(),
2097 vault: None,
2098 limit: Some(10),
2099 },
2100 dry_run: false,
2101 forgotten_by: Some("operator-1".to_string()),
2102 confirm_ulids: vec!["01HTEST00000000000000000AB".to_string()],
2103 });
2104 let json = serde_json::to_string(&original).expect("sérialisation Job::Forget");
2105 let back: Job = serde_json::from_str(&json).expect("désérialisation Job::Forget");
2106 assert!(
2107 matches!(back, Job::Forget(ref s) if !s.dry_run && s.forgotten_by.as_deref() == Some("operator-1")),
2108 "roundtrip JSON incorrect : {json}"
2109 );
2110 }
2111
2112 /// job_kind_str retourne "Forget" pour Job::Forget(_).
2113 #[test]
2114 fn job_kind_str_forget() {
2115 let job = Job::Forget(ForgetSpec::default());
2116 assert_eq!(job_kind_str(&job), "Forget");
2117 }
2118
2119 /// ForgetScope::Locus sérialisable roundtrip.
2120 #[test]
2121 fn forget_scope_locus_roundtrip() {
2122 let spec = ForgetSpec {
2123 scope: ForgetScope::Locus {
2124 vault: "main".to_string(),
2125 locus: "inbox/old/".to_string(),
2126 },
2127 dry_run: true,
2128 forgotten_by: None,
2129 confirm_ulids: vec![],
2130 };
2131 let json = serde_json::to_string(&spec).expect("sérialisation ForgetScope::Locus");
2132 let back: ForgetSpec =
2133 serde_json::from_str(&json).expect("désérialisation ForgetScope::Locus");
2134 assert!(
2135 matches!(back.scope, ForgetScope::Locus { ref locus, .. } if locus == "inbox/old/")
2136 );
2137 }
2138
2139 /// ForgetScope::Agent sérialisable roundtrip avec vaults vide → défaut ["main"] côté handler.
2140 #[test]
2141 fn forget_scope_agent_empty_vaults() {
2142 let spec = ForgetSpec {
2143 scope: ForgetScope::Agent {
2144 agent_id: "claude-agent".to_string(),
2145 vaults: vec![],
2146 },
2147 dry_run: true,
2148 forgotten_by: None,
2149 confirm_ulids: vec![],
2150 };
2151 let json = serde_json::to_string(&spec).expect("sérialisation ForgetScope::Agent");
2152 let back: ForgetSpec =
2153 serde_json::from_str(&json).expect("désérialisation ForgetScope::Agent");
2154 assert!(matches!(back.scope, ForgetScope::Agent { ref vaults, .. } if vaults.is_empty()));
2155 }
2156
2157 // ── Tests DistillSource + stabilité serde position 3 (F-22 T1) ────────────
2158
2159 /// `DistillSource::default` : mode Semantic, batch_limit 500, seuil 0.75.
2160 #[test]
2161 fn distill_source_default_values() {
2162 let src = DistillSource::default();
2163 assert_eq!(src.mode, DistillMode::Semantic);
2164 assert_eq!(src.batch_limit, 500);
2165 assert!((src.confidence_threshold - 0.75).abs() < f32::EPSILON);
2166 assert!(src.window.is_none());
2167 assert!(src.min_qa_events.is_none());
2168 assert!(matches!(src.scope, JobScope::VaultWide));
2169 }
2170
2171 /// Job::Distill(DistillSource) sérialise avec le tag discriminant "Distill".
2172 ///
2173 /// Stabilité serde : `#[serde(tag = "type", content = "data")]` encode le variant
2174 /// par son nom ("Distill") — invariant pour la colonne `kind` de la queue, inchangé
2175 /// par la conversion unit→tuple (position 3 préservée).
2176 #[test]
2177 fn distill_job_serializes_with_correct_type_tag() {
2178 let job = Job::Distill(DistillSource::default());
2179 let json = serde_json::to_string(&job).expect("Job::Distill doit être sérialisable");
2180 assert!(
2181 json.contains("\"type\":\"Distill\""),
2182 "le tag serde doit être 'Distill', obtenu : {json}"
2183 );
2184 }
2185
2186 /// Roundtrip JSON de Job::Distill — désérialisation correcte depuis JSON.
2187 #[test]
2188 fn distill_job_json_roundtrip() {
2189 let original = Job::Distill(DistillSource {
2190 mode: DistillMode::Semantic,
2191 scope: JobScope::Locus("rag/corpus-x/".to_string()),
2192 window: Some(Duration::from_secs(86_400)),
2193 batch_limit: 200,
2194 confidence_threshold: 0.80,
2195 min_qa_events: None,
2196 });
2197 let json = serde_json::to_string(&original).expect("sérialisation Job::Distill");
2198 let back: Job = serde_json::from_str(&json).expect("désérialisation Job::Distill");
2199 assert!(
2200 matches!(
2201 back,
2202 Job::Distill(ref s)
2203 if s.batch_limit == 200
2204 && (s.confidence_threshold - 0.80).abs() < f32::EPSILON
2205 && matches!(s.scope, JobScope::Locus(ref l) if l == "rag/corpus-x/")
2206 ),
2207 "roundtrip JSON incorrect : {json}"
2208 );
2209 }
2210
2211 /// job_kind_str retourne "Distill" pour Job::Distill(_).
2212 #[test]
2213 fn job_kind_str_distill() {
2214 let job = Job::Distill(DistillSource::default());
2215 assert_eq!(job_kind_str(&job), "Distill");
2216 }
2217
2218 /// Rétrocompatibilité serde : un payload `{"window": null}` omis désérialise en None.
2219 ///
2220 /// Les champs avec valeurs par défaut (`mode`, `batch_limit`, `confidence_threshold`)
2221 /// sont restaurés depuis les défauts si absents du JSON — robustesse forward-compat.
2222 #[test]
2223 fn distill_source_deserializes_with_defaults_when_minimal() {
2224 // Payload minimal : seul `scope` est requis (pas de #[serde(default)]).
2225 let json = r#"{"scope":"VaultWide"}"#;
2226 let src: DistillSource =
2227 serde_json::from_str(json).expect("DistillSource minimal doit désérialiser");
2228 assert_eq!(src.mode, DistillMode::Semantic);
2229 assert_eq!(src.batch_limit, 500);
2230 assert!((src.confidence_threshold - 0.75).abs() < f32::EPSILON);
2231 }
2232}