heddle-cli 0.3.1

An AI-native version control system
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
// SPDX-License-Identifier: Apache-2.0
//! Crash-mid-write integration tests (R7 + R9 from the OSS-launch
//! plan).
//!
//! W2b landed the rollback machinery — atomic mapping persistence,
//! mirror Drop guard, HEAD/index restore on failure, snapshot
//! state-then-ref ordering. These tests crash the heddle process at
//! the load-bearing transition points (via `HEDDLE_FAULT_INJECT`)
//! and verify the next clean process recovers without corruption.
//!
//! Both tests spawn child processes — the parent test sets
//! `HEDDLE_FAULT_INJECT`, runs the child, observes the intentional
//! panic, then runs a fresh child without the env var and asserts
//! the recovery contract.

use std::process::Command;

use oplog::{OpLogBackend, OpRecord};

use super::*;

/// R9: bridge mapping persistence.
///
/// `bridge export` writes the served heddle↔git mapping to disk via a
/// tmp-rename-rename pattern (`bridge-mapping.json.tmp` →
/// `bridge-mapping.json`). The fault checkpoint
/// `mapping_after_tmp_before_commit` panics in the gap between those
/// two operations, leaving the sidecar in a state where the .tmp
/// file exists but the canonical file does not (or is stale).
///
/// `recover_mapping_tmp` (in `load_mapping_from_disk`) is the recovery
/// path: on the next load, if a .tmp exists, it gets atomically
/// renamed into place. This test verifies that contract end-to-end:
/// crash, observe the on-disk shape, run a clean import, observe the
/// recovered shape.
#[test]
#[ignore = "fault-injection: spawns child processes with HEDDLE_FAULT_INJECT"]
fn bridge_recovers_from_crash_after_tmp_before_commit() {
    let temp = TempDir::new().unwrap();
    let origin = temp.path().join("origin.git");
    let work = temp.path().join("work");
    let export = temp.path().join("export.git");

    // Build a small synthetic upstream so the mapping has real
    // entries to write (not just empty tables, which would
    // short-circuit the fault checkpoint). Reuses the helpers from
    // `cli_integration.rs` so the fixture shape matches the other
    // bridge tests.
    let origin_repo = SleyRepository::init_bare(&origin).expect("init origin");
    let blob = origin_repo.write_blob(b"fn a() {}\n").unwrap();
    let empty = git_empty_tree_oid(&origin_repo);
    let mut tree_editor = origin_repo.edit_tree(&empty).expect("tree editor");
    tree_editor.upsert("core.rs", EntryKind::Blob, blob);
    let tree_oid = origin_repo.write_tree(tree_editor).unwrap();
    let _commit =
        git_commit_with_tree(&origin_repo, Some("refs/heads/main"), tree_oid, "seed", &[]);

    heddle_output_with_env(
        &["clone", origin.to_str().unwrap(), work.to_str().unwrap()],
        Some(temp.path()),
        &[],
    )
    .expect("initial clone succeeds");

    // ── Phase 1: spawn the export with fault injection armed ──
    //
    // The process should panic with our intentional message rather
    // than completing the bridge export. We explicitly assert the
    // panic message so a regression that silently no-ops the
    // checkpoint surfaces here, not three commits downstream.
    let crashed = Command::new(env!("CARGO_BIN_EXE_heddle"))
        .args([
            "bridge",
            "git",
            "export",
            "--destination",
            export.to_str().unwrap(),
        ])
        .current_dir(&work)
        .env("HEDDLE_FAULT_INJECT", "mapping_after_tmp_before_commit")
        .env("HEDDLE_CONFIG", work.join(".heddle-user/config.toml"))
        .output()
        .expect("spawn child");
    assert!(
        !crashed.status.success(),
        "child should panic, got success: stdout={} stderr={}",
        String::from_utf8_lossy(&crashed.stdout),
        String::from_utf8_lossy(&crashed.stderr)
    );
    let stderr = String::from_utf8_lossy(&crashed.stderr);
    assert!(
        stderr.contains("HEDDLE_FAULT_INJECT")
            && stderr.contains("mapping_after_tmp_before_commit"),
        "child should report the intentional panic: stderr={stderr}"
    );

    // ── Phase 2: observe the intermediate on-disk shape ──
    //
    // After the crash we expect either the .tmp to exist, or a
    // partial canonical file (depending on whether the crash
    // happened before or after the rename). Both shapes are valid
    // pre-recovery states; what matters is the recovery primitive
    // accepts both.
    let mapping_dir = work.join(".heddle").join("git-bridge");
    let canonical = mapping_dir.join("bridge-mapping.json");
    let tmp = mapping_dir.join("bridge-mapping.json.tmp");
    assert!(
        tmp.exists() || canonical.exists(),
        "after crash, at least one of the mapping files must exist; \
         dir contents: {:?}",
        std::fs::read_dir(&mapping_dir)
            .map(|d| d.flatten().map(|e| e.file_name()).collect::<Vec<_>>())
            .unwrap_or_default()
    );

    // ── Phase 3: clean re-run recovers ──
    //
    // No fault injection this time. The bridge load path runs
    // `recover_mapping_tmp`, atomically renames any leftover .tmp
    // into the canonical position, and proceeds with a normal
    // export. Final assertion: the canonical mapping file exists,
    // is non-empty, and parses as the expected shape.
    let recovered = heddle_output_with_env(
        &[
            "bridge",
            "git",
            "export",
            "--destination",
            export.to_str().unwrap(),
        ],
        Some(&work),
        &[],
    )
    .expect("recovery export succeeds");
    assert!(
        recovered.status.success(),
        "post-crash export should succeed cleanly: stderr={}",
        String::from_utf8_lossy(&recovered.stderr)
    );
    assert!(
        canonical.exists(),
        "canonical mapping file must exist after recovery"
    );
    let body = std::fs::read_to_string(&canonical).expect("read mapping");
    let parsed: serde_json::Value =
        serde_json::from_str(&body).expect("recovered mapping must parse as JSON");
    assert!(
        parsed.get("entries").is_some(),
        "recovered mapping must have entries field: {body}"
    );
    assert!(
        !tmp.exists(),
        "post-recovery, the .tmp must be gone (renamed into canonical)"
    );
}

fn current_main_tip(repo: &std::path::Path) -> String {
    let log: Value = serde_json::from_str(
        &heddle(&["--output", "json", "log", "main", "-n", "5"], Some(repo)).expect("log"),
    )
    .unwrap();
    log["states"][0]["change_id"]
        .as_str()
        .expect("tip change_id")
        .to_string()
}

fn current_head_tip(repo: &std::path::Path) -> String {
    let log: Value = serde_json::from_str(
        &heddle(&["--output", "json", "log", "--limit", "1"], Some(repo)).expect("log"),
    )
    .unwrap();
    log["states"][0]["change_id"]
        .as_str()
        .expect("tip change_id")
        .to_string()
}

fn thread_update_count(repo: &std::path::Path) -> usize {
    let repo = Repository::open(repo).expect("open repo");
    repo.oplog()
        .recent(512)
        .expect("read oplog")
        .iter()
        .filter(|entry| matches!(entry.operation, OpRecord::ThreadUpdate { .. }))
        .count()
}

fn scoped_undo_redo_thread_update_count(repo: &std::path::Path) -> usize {
    let repo = Repository::open(repo).expect("open repo");
    let scope = repo.op_scope();
    let undo = repo
        .oplog()
        .undo_batches_scoped(16, Some(&scope))
        .expect("read undo queue");
    let redo = repo
        .oplog()
        .redo_batches_scoped(16, Some(&scope))
        .expect("read redo queue");
    undo.iter()
        .chain(redo.iter())
        .flat_map(|batch| batch.entries.iter())
        .filter(|entry| matches!(entry.operation, OpRecord::ThreadUpdate { .. }))
        .count()
}

fn assert_intentional_snapshot_crash(crashed: std::process::Output, checkpoint: &str) {
    assert!(
        !crashed.status.success(),
        "child should panic, got success: stdout={} stderr={}",
        String::from_utf8_lossy(&crashed.stdout),
        String::from_utf8_lossy(&crashed.stderr)
    );
    let stderr = String::from_utf8_lossy(&crashed.stderr);
    assert!(
        stderr.contains("HEDDLE_FAULT_INJECT") && stderr.contains(checkpoint),
        "child should report the intentional panic at {checkpoint}: stderr={stderr}"
    );
}

fn crash_capture_at(repo: &std::path::Path, checkpoint: &str, message: &str) {
    let crashed = heddle_output_with_env(
        &["capture", "-m", message],
        Some(repo),
        &[("HEDDLE_FAULT_INJECT", checkpoint)],
    )
    .expect("spawn child");
    assert_intentional_snapshot_crash(crashed, checkpoint);
}

fn crash_goto_at(repo: &std::path::Path, checkpoint: &str, target: &str) {
    let crashed = heddle_output_with_env(
        &["switch", target],
        Some(repo),
        &[("HEDDLE_FAULT_INJECT", checkpoint)],
    )
    .expect("spawn child");
    assert_intentional_snapshot_crash(crashed, checkpoint);
}

fn init_repo_with_baseline() -> (TempDir, String) {
    let temp = TempDir::new().unwrap();
    heddle(&["init"], Some(temp.path())).expect("init");

    // Take a clean baseline snapshot so we have a known prior tip
    // to compare against post-crash.
    std::fs::write(temp.path().join("base.txt"), "baseline").unwrap();
    heddle(&["capture", "-m", "baseline"], Some(temp.path())).expect("baseline snapshot");

    let baseline_tip = current_main_tip(temp.path());
    (temp, baseline_tip)
}

/// R7/O1: pre-commit snapshot crash is invisible.
///
/// `repo::Repository::snapshot_with_attribution` stages snapshot objects before
/// appending the atomic `TransactionCommit` marker. A crash at
/// `snapshot_after_stage_before_atomic_commit` leaves staged content behind, but
/// without the commit marker it is not authoritative. The next Heddle read must
/// keep the thread tip on the prior baseline and must not resurrect the staged
/// but uncommitted content.
#[test]
#[ignore = "fault-injection: spawns child processes with HEDDLE_FAULT_INJECT"]
fn snapshot_atomicity_before_commit_crash_stays_on_baseline() {
    let (temp, baseline_tip) = init_repo_with_baseline();

    // ── Phase 1: snapshot with pre-commit fault injection armed ──
    std::fs::write(temp.path().join("base.txt"), "would-be-captured").unwrap();
    crash_capture_at(
        temp.path(),
        "snapshot_after_stage_before_atomic_commit",
        "the capture that crashes before commit",
    );

    // ── Phase 2: invariant — no TransactionCommit marker landed, so the
    //              prior tip remains authoritative after reconcile-on-read.
    let post_crash_tip = current_main_tip(temp.path());
    assert_eq!(
        post_crash_tip, baseline_tip,
        "thread tip must still point at the baseline state — anything else \
         is a half-written advance and a real atomicity bug",
    );

    let reread_tip = current_main_tip(temp.path());
    assert_eq!(
        reread_tip, baseline_tip,
        "a second read must not resurrect the staged-but-uncommitted snapshot",
    );
}

/// R7/O1: post-commit snapshot crash is recovered exactly once.
///
/// After `TransactionCommit` is durable, the oplog is the commit point and the
/// thread ref is only a materialized view. A crash at
/// `snapshot_after_atomic_commit_before_ref_publish` leaves the ref stale, but
/// the next Heddle read reconciles from the committed oplog tail and republishes
/// the capture. Re-reading must be idempotent: no second logical snapshot is
/// applied.
#[test]
#[ignore = "fault-injection: spawns child processes with HEDDLE_FAULT_INJECT"]
fn snapshot_atomicity_after_commit_crash_recovers_once() {
    let (temp, baseline_tip) = init_repo_with_baseline();

    // ── Phase 1: snapshot with post-commit fault injection armed ──
    std::fs::write(temp.path().join("base.txt"), "committed-before-ref-publish").unwrap();
    crash_capture_at(
        temp.path(),
        "snapshot_after_atomic_commit_before_ref_publish",
        "the capture that commits before crashing",
    );

    // ── Phase 2: the first Heddle read reconciles the committed oplog record
    //              and advances the materialized thread ref.
    let recovered_tip = current_main_tip(temp.path());
    assert_ne!(
        recovered_tip, baseline_tip,
        "post-commit crash recovery must advance the tip from the baseline",
    );

    let reread_tip = current_main_tip(temp.path());
    assert_eq!(
        reread_tip, recovered_tip,
        "a second read must not apply the committed snapshot a second time",
    );

    let retry_read_tip = current_main_tip(temp.path());
    assert_eq!(
        retry_read_tip, recovered_tip,
        "retrying reconcile-on-read must not advance the tip again",
    );
}

/// Goto is record-first: a crash after its `OpRecord::Goto` commits but before
/// HEAD is published must reconstruct detached HEAD from the record on the next
/// read. Re-reading must be idempotent and must not move the still-attached
/// source thread.
#[test]
#[ignore = "fault-injection: spawns child processes with HEDDLE_FAULT_INJECT"]
fn goto_after_commit_crash_recovers_detached_head_once() {
    let (temp, baseline_tip) = init_repo_with_baseline();

    std::fs::write(temp.path().join("base.txt"), "second").unwrap();
    heddle(&["capture", "-m", "second"], Some(temp.path())).expect("second snapshot");
    let second_tip = current_main_tip(temp.path());
    assert_ne!(
        second_tip, baseline_tip,
        "fixture must have a distinct second tip"
    );

    crash_goto_at(
        temp.path(),
        "goto_after_oplog_commit_before_ref_publish",
        &baseline_tip,
    );

    let recovered_head = current_head_tip(temp.path());
    assert_eq!(
        recovered_head, baseline_tip,
        "post-commit goto crash recovery must detach HEAD to the committed target",
    );
    assert_eq!(
        current_head_tip(temp.path()),
        recovered_head,
        "a second HEAD read must not apply the committed goto a second time",
    );
    assert_eq!(
        current_main_tip(temp.path()),
        second_tip,
        "goto recovery must not move the source thread ref",
    );
}

#[test]
#[ignore = "fault-injection: spawns child processes with HEDDLE_FAULT_INJECT"]
fn thread_update_save_failure_does_not_commit_undoable_record() {
    let temp = TempDir::new().unwrap();
    heddle(&["init"], Some(temp.path())).expect("init");
    std::fs::write(temp.path().join("base.txt"), "base\n").unwrap();
    heddle(&["capture", "-m", "base"], Some(temp.path())).expect("base capture");

    let thread_path = temp.path().join("feature-checkout");
    heddle(
        &[
            "start",
            "feature/atomic-save",
            "--path",
            thread_path.to_str().unwrap(),
        ],
        Some(temp.path()),
    )
    .expect("start feature thread");
    std::fs::write(thread_path.join("feature.txt"), "feature\n").unwrap();
    heddle(&["capture", "-m", "feature work"], Some(&thread_path)).expect("feature capture");

    let before_count = thread_update_count(temp.path());
    let failed = heddle_output_with_env(
        &["thread", "resolve", "feature/atomic-save"],
        Some(temp.path()),
        &[(
            "HEDDLE_FAULT_INJECT",
            "thread_manager_save_in_thread_update",
        )],
    )
    .expect("spawn injected thread resolve");
    assert!(
        !failed.status.success(),
        "thread resolve should fail at the ThreadUpdate manager.save fault point: stdout={} stderr={}",
        String::from_utf8_lossy(&failed.stdout),
        String::from_utf8_lossy(&failed.stderr)
    );
    let stderr = String::from_utf8_lossy(&failed.stderr);
    assert!(
        stderr.contains("HEDDLE_FAULT_INJECT")
            && stderr.contains("thread_manager_save_in_thread_update"),
        "thread resolve should report the intentional manager.save failure: stderr={stderr}"
    );

    assert_eq!(
        thread_update_count(temp.path()),
        before_count,
        "failed manager.save must not leave a committed ThreadUpdate record"
    );
    assert_eq!(
        scoped_undo_redo_thread_update_count(&thread_path),
        0,
        "undo/redo queues must not expose a ThreadUpdate whose manager body failed to save"
    );
}