cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
//! Schema migration use case.
//!
//! ## Architecture
//!
//! The migration is a static, ordered list of [`MigrationStep`]s, one per
//! version transition. Each step lives in its own file under [`steps/`] and
//! owns three things: the pure frontmatter transform, the `MigrationStep`
//! impl that orchestrates I/O via [`MigrationCorpus`], and the unit tests
//! that pin both. Open one file, read one migration.
//!
//! [`run`] threads a [`MigrationCtx`] (paths, id remapping, dry-run flag)
//! through every step in order, collects per-step [`StepOutcome`]s, and
//! returns the aggregate [`MigrationReport`].
//!
//! ## Why the corpus port
//!
//! The domain stays free of `std::fs`. Tests build a `FakeMigrationCorpus`
//! and exercise each step in milliseconds without a tempfile; the CLI
//! injects an `FsMigrationCorpus` (production) or wraps it in
//! [`DryRunCorpus`](corpus::DryRunCorpus) for `--dry-run`. The two
//! mechanisms coexist because they cover different concerns: the corpus
//! decides whether writes hit disk, `ctx.dry_run` is consulted by exactly
//! one step (the v3→v4 id rewrite) to decide whether to advance `paths`
//! after a rename that did not actually happen.
//!
//! ## Auditability
//!
//! Steps emit [`Detail`]s — structured `(verb, subject)` records — into
//! their `StepOutcome`. The orchestrator conjugates each `Detail` based on
//! `dry_run` (`"would migrate"` vs `"migrated"`) so the step doesn't have
//! to. The final summary table lists every step with its file count.
//!
//! ## `cartulary.toml` is special
//!
//! The TOML version bump is not a `MigrationStep` — it edits config, not
//! records. It runs after every step in [`run`] using the same corpus, and
//! is reported on its own line in the summary.

pub mod corpus;
pub mod legacy;
mod steps;

use std::collections::HashMap;
use std::path::{Path, PathBuf};

use anyhow::Context as _;

pub use corpus::{DryRunCorpus, FakeMigrationCorpus, MigrationCorpus};

/// The schema version this binary understands — i.e. the target every
/// migration brings older corpora to. The history of shape changes is
/// summarised here so a reader can map a frontmatter version to its
/// expected layout.
///
/// | Version | What changed                                                  |
/// |---------|---------------------------------------------------------------|
/// | 1       | Initial format — `action` is a plain string in the event log  |
/// | 2       | ADR-0015/0016 — `action` is a tagged map `{name, ...}`        |
/// | 3       | ADR-0021 — `type:`/`priority:`/`size:` move into `tags:` as `flow:`/`priority:`/`size:` |
/// | 4       | ADR-0022 — sequential `KIND-NNNN` IDs are rewritten to `KIND-<TSID>` form; old IDs preserved in `aliases:`; cross-record `links:` rewritten to the new IDs |
/// | 5       | DDR-018QWJVHRH35B / ISSUE-018P03NSC7VNQ — `relationship: relates` entries are extracted from `links:` into a top-level `relates:` field of bare entity refs |
/// | 6       | DDR-018QWJVHRH35B / ISSUE-017GTGB3FMPV2 — DR workflow is hardcoded; `[decisions.<kind>.statuses]` and `preset` keys are stripped from `cartulary.toml`; non-canonical DR statuses (`draft`, `under-review`) are rewritten to `proposed` |
/// | 7       | ADR-01HMR0G23XH0N — the `events:` frontmatter block is lifted into an `events.jsonl` companion sibling to `index.md`; one JSON object per line, append-only |
/// | 8       | ADR-01JAKPGBYT2PF — `KIND-<TSID>` ids (13 chars, custom epoch, 22 random bits) are rewritten to `KIND-<ULID>` (26 chars, Unix epoch, 80 random bits); old TSIDs preserved in `aliases:`; `links:` and `relates:` rewritten via the rename table |
pub const CURRENT_SCHEMA_VERSION: u32 = 8;

/// Shared state threaded through every [`MigrationStep`]. Steps mutate
/// `paths` (the rename pass replaces entries) and `id_map` (the v3→v4 id
/// pass fills, the v3→v4 links pass reads). The corpus is the only handle
/// on the filesystem.
pub struct MigrationCtx<'a> {
    pub root_dir: &'a Path,
    pub corpus: &'a dyn MigrationCorpus,
    pub paths: Vec<PathBuf>,
    pub dr_dirs: Vec<PathBuf>,
    pub id_map: HashMap<String, String>,
    /// Factory for fresh TSIDs from a Unix-millis timestamp. Sourced by
    /// the infra caller so the domain stays clock/RNG-free. Used by the
    /// v3→v4 id rewrite to seed TSIDs from the first `created` event.
    pub tsid_factory: &'a dyn Fn(i64) -> crate::domain::usecases::migrate::legacy::tsid::Tsid,
    /// Factory for fresh ULIDs from a Unix-millis timestamp. Used by the
    /// v7→v8 id rewrite — the input ms comes from the TSID's embedded
    /// timestamp when reliable, otherwise from the entry's `created`
    /// event or `date:` field.
    pub ulid_factory: &'a dyn Fn(i64) -> crate::domain::model::ulid::Ulid,
    /// Consulted only by id-rewriting steps to decide whether to advance
    /// `paths` after a rename that the [`DryRunCorpus`] discarded. Every
    /// other step is dry-run-agnostic.
    pub dry_run: bool,
}

impl<'a> MigrationCtx<'a> {
    /// Subset of `paths` that lives under any decision-record directory.
    /// Steps that target DR records only (e.g. status canonicalisation)
    /// use this to avoid touching the configurable issue side.
    pub fn dr_paths(&self) -> Vec<PathBuf> {
        self.paths
            .iter()
            .filter(|p| self.dr_dirs.iter().any(|d| p.starts_with(d)))
            .cloned()
            .collect()
    }
}

/// One migration's result: how many files were changed (or would be in
/// dry-run), plus a list of [`Detail`]s describing each change for the
/// per-step audit output.
#[derive(Default)]
pub struct StepOutcome {
    pub files_changed: usize,
    pub details: Vec<Detail>,
}

impl StepOutcome {
    pub fn record(&mut self, detail: Detail) {
        self.files_changed += 1;
        self.details.push(detail);
    }
}

/// One audited action. The orchestrator conjugates `verb` (`"migrate"`,
/// `"rewrite links in"`, `"rename"`, `"backfill"`) into past tense or the
/// `"would …"` future form based on dry-run; the step never picks the
/// tense itself.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Detail {
    /// Past-tense form ("migrated", "rewrote links in", "renamed", "backfilled").
    pub past: &'static str,
    /// Bare infinitive ("migrate", "rewrite links in", "rename", "backfill").
    pub infinitive: &'static str,
    /// What the verb was applied to: a path, an `"old → new"` rename, a
    /// `"<rel> → <id> into <path>"` backfill, etc. Pre-formatted by the step.
    pub subject: String,
}

impl Detail {
    pub fn migrate(subject: impl Into<String>) -> Self {
        Self {
            past: "migrated",
            infinitive: "migrate",
            subject: subject.into(),
        }
    }

    pub fn rewrite_links_in(subject: impl Into<String>) -> Self {
        Self {
            past: "rewrote links in",
            infinitive: "rewrite links in",
            subject: subject.into(),
        }
    }

    pub fn rename(from: &Path, to: &Path) -> Self {
        Self {
            past: "renamed",
            infinitive: "rename",
            subject: format!("{}{}", from.display(), to.display()),
        }
    }

    pub fn backfill(relationship: &str, source_id: &str, target: &Path) -> Self {
        Self {
            past: "backfilled",
            infinitive: "backfill",
            subject: format!("{relationship}{source_id} into {}", target.display()),
        }
    }
}

/// A single version-bump step. Steps are ordered by [`source_version`] and
/// run in sequence; they may share state via [`MigrationCtx`].
///
/// [`source_version`]: MigrationStep::source_version
pub trait MigrationStep {
    /// Stable identifier surfaced in the audit output (e.g.
    /// `"v02-to-v03/flow-tags"`).
    fn id(&self) -> &'static str;

    /// Source schema version this step migrates from. Shown in the
    /// per-step header so the audit output traces the version path.
    fn source_version(&self) -> u32;

    /// One-line description shown in the per-step header.
    fn description(&self) -> &'static str;

    fn run(&self, ctx: &mut MigrationCtx) -> anyhow::Result<StepOutcome>;
}

/// Per-step summary row returned by [`run`] alongside the audit details.
pub struct StepSummary {
    pub id: &'static str,
    pub source_version: u32,
    pub description: &'static str,
    pub files_changed: usize,
    pub details: Vec<Detail>,
}

/// Aggregate result of [`run`].
pub struct MigrationReport {
    pub visited: usize,
    pub steps: Vec<StepSummary>,
    pub toml_bumped: bool,
    pub toml_already_current: bool,
}

impl MigrationReport {
    pub fn total_changed(&self) -> usize {
        self.steps.iter().map(|s| s.files_changed).sum()
    }
}

/// Run every migration step against the corpus rooted at `root_dir`.
///
/// `record_dirs` is the full set of directories that contain `index.md`
/// records (issues + every configured decision kind); `dr_dirs` is the
/// subset that holds decision records (used to scope DR-only steps).
pub fn run(
    corpus: &dyn MigrationCorpus,
    root_dir: &Path,
    record_dirs: &[PathBuf],
    dr_dirs: Vec<PathBuf>,
    tsid_factory: &dyn Fn(i64) -> crate::domain::usecases::migrate::legacy::tsid::Tsid,
    ulid_factory: &dyn Fn(i64) -> crate::domain::model::ulid::Ulid,
    dry_run: bool,
) -> anyhow::Result<MigrationReport> {
    let initial_paths = corpus.collect_index_paths(record_dirs);
    let visited = initial_paths.len();

    let mut ctx = MigrationCtx {
        root_dir,
        corpus,
        paths: initial_paths,
        dr_dirs,
        id_map: HashMap::new(),
        tsid_factory,
        ulid_factory,
        dry_run,
    };

    let mut summaries = Vec::new();
    for step in steps::all() {
        let outcome = step
            .run(&mut ctx)
            .with_context(|| format!("step {}", step.id()))?;
        summaries.push(StepSummary {
            id: step.id(),
            source_version: step.source_version(),
            description: step.description(),
            files_changed: outcome.files_changed,
            details: outcome.details,
        });
    }

    let toml_path = root_dir.join("cartulary.toml");
    let _ = strip_toml_dr_workflow_keys(corpus, &toml_path)?;
    let toml_bumped = bump_schema_version(corpus, &toml_path)?;
    let toml_already_current = !toml_bumped;

    Ok(MigrationReport {
        visited,
        steps: summaries,
        toml_bumped,
        toml_already_current,
    })
}

// ── `cartulary.toml` helpers ─────────────────────────────────────────────

/// Strip `[decisions.<kind>.statuses…]` sub-tables and `preset = "…"`
/// lines from `cartulary.toml`. The DR workflow is hardcoded per
/// DDR-018QWJVHRH35B; these keys are dead config. Idempotent.
///
/// Reads via the corpus so [`DryRunCorpus`] turns the write into a no-op.
/// Returns whether the content needed rewriting.
pub fn strip_toml_dr_workflow_keys(
    corpus: &dyn MigrationCorpus,
    toml_path: &Path,
) -> anyhow::Result<bool> {
    let Ok(content) = corpus.read(toml_path) else {
        return Ok(false);
    };
    let stripped = strip_dr_workflow_keys_v5_to_v6(&content);
    if stripped == content {
        return Ok(false);
    }
    corpus.write(toml_path, &stripped)?;
    Ok(true)
}

/// Bump the `version = N` line of `cartulary.toml` to the current schema
/// version. Returns whether the content needed rewriting.
pub fn bump_schema_version(corpus: &dyn MigrationCorpus, toml_path: &Path) -> anyhow::Result<bool> {
    let Ok(content) = corpus.read(toml_path) else {
        return Ok(false);
    };
    let mut lines: Vec<String> = content.lines().map(String::from).collect();
    let mut changed = false;
    let mut had_version_line = false;
    for line in lines.iter_mut() {
        let trimmed = line.trim_start();
        if let Some(rest) = trimmed.strip_prefix("version") {
            if rest.trim_start().strip_prefix('=').is_some() {
                had_version_line = true;
                let new_line = format!("version = {CURRENT_SCHEMA_VERSION}");
                if line.trim() != new_line {
                    *line = new_line;
                    changed = true;
                }
                break;
            }
        }
    }
    if !had_version_line {
        lines.insert(0, format!("version = {CURRENT_SCHEMA_VERSION}"));
        changed = true;
    }
    if changed {
        let mut out = lines.join("\n");
        if content.ends_with('\n') {
            out.push('\n');
        }
        corpus.write(toml_path, &out)?;
    }
    Ok(changed)
}

/// v5→v6 `cartulary.toml` strip transform. Pure: input string → output
/// string. The I/O wrapper above ([`strip_toml_dr_workflow_keys`]) reads
/// the file, applies this, and writes the result via the corpus.
pub fn strip_dr_workflow_keys_v5_to_v6(toml: &str) -> String {
    let lines: Vec<&str> = toml.lines().collect();
    let mut out: Vec<String> = Vec::with_capacity(lines.len());
    let mut i = 0;
    let mut in_decision_kind = false;
    let mut changed = false;

    while i < lines.len() {
        let line = lines[i];
        let trimmed = line.trim();

        if trimmed.starts_with('[') && trimmed.ends_with(']') {
            let header = trimmed.trim_matches(|c| c == '[' || c == ']');
            in_decision_kind = is_decision_kind_header(header);

            if header.starts_with("decisions.")
                && (header.contains(".statuses") || header.ends_with(".statuses"))
            {
                changed = true;
                i += 1;
                while i < lines.len() {
                    let next = lines[i];
                    let next_trimmed = next.trim();
                    if next_trimmed.starts_with('[') && next_trimmed.ends_with(']') {
                        break;
                    }
                    i += 1;
                }
                continue;
            }
        }

        if in_decision_kind && trimmed.starts_with("preset") && trimmed.contains('=') {
            changed = true;
            i += 1;
            continue;
        }

        out.push(line.to_string());
        i += 1;
    }

    if !changed {
        return toml.to_string();
    }
    let mut joined = out.join("\n");
    if toml.ends_with('\n') {
        joined.push('\n');
    }
    joined
}

fn is_decision_kind_header(header: &str) -> bool {
    let Some(rest) = header.strip_prefix("decisions.") else {
        return false;
    };
    !rest.is_empty() && !rest.contains('.')
}

// ── shared internals used by step files ──────────────────────────────────

/// Split a record file's content into `(frontmatter, body)`. Returns
/// `None` if the file does not have the canonical `---\n…\n---\n…` shape.
pub(crate) fn split_frontmatter(source: &str) -> Option<(&str, &str)> {
    let after = source.strip_prefix("---\n")?;
    let end = after.find("\n---")?;
    let fm = &after[..end];
    let body = after[end + 4..].trim_start_matches('\n');
    Some((fm, body))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn v5_to_v6_strips_decisions_kind_statuses_subtable() {
        let toml = indoc::indoc! {r#"
            [decisions]
            types = ["adr"]

            [decisions.adr]
            dir = "docs/adr"

            [decisions.adr.statuses]
            draft = { next = ["proposed"], active = true }
            proposed = { next = ["accepted"], active = true }

            [issues]
            dir = "docs/issues"
        "#};
        let stripped = strip_dr_workflow_keys_v5_to_v6(toml);
        assert!(!stripped.contains("[decisions.adr.statuses]"));
        assert!(!stripped.contains("draft = "));
        assert!(stripped.contains("[issues]"));
        assert!(stripped.contains("dir = \"docs/adr\""));
    }

    #[test]
    fn v5_to_v6_strips_decisions_kind_preset_line() {
        let toml = indoc::indoc! {r#"
            [decisions]
            types = ["adr"]

            [decisions.adr]
            dir = "docs/adr"
            preset = "extended"
        "#};
        let stripped = strip_dr_workflow_keys_v5_to_v6(toml);
        assert!(!stripped.contains("preset"));
        assert!(stripped.contains("dir = \"docs/adr\""));
    }

    #[test]
    fn v5_to_v6_leaves_issue_preset_alone() {
        let toml = indoc::indoc! {r#"
            [issues]
            preset = "scrum"
        "#};
        assert_eq!(strip_dr_workflow_keys_v5_to_v6(toml), toml);
    }

    #[test]
    fn v5_to_v6_toml_strip_is_idempotent_on_clean_input() {
        let toml = indoc::indoc! {r#"
            [decisions]
            types = ["adr"]

            [decisions.adr]
            dir = "docs/adr"

            [issues]
            dir = "docs/issues"
        "#};
        assert_eq!(strip_dr_workflow_keys_v5_to_v6(toml), toml);
    }

    #[test]
    fn detail_migrate_uses_migrated_past_form() {
        let d = Detail::migrate("foo");
        assert_eq!(d.past, "migrated");
        assert_eq!(d.infinitive, "migrate");
        assert_eq!(d.subject, "foo");
    }

    #[test]
    fn detail_rename_formats_arrow_subject() {
        let d = Detail::rename(Path::new("a"), Path::new("b"));
        assert_eq!(d.subject, "a → b");
    }
}