Skip to main content

maw/oplog/
write.rs

1//! Op log write operations: store operations as git blobs and update head refs.
2//!
3//! This module implements the write path of the git-native per-workspace
4//! operation log (§5.3):
5//!
6//! 1. **Serialize** an [`Operation`] to canonical JSON.
7//! 2. **Write as a git blob** via `git hash-object -w --stdin` — the blob OID
8//!    becomes the operation's identity.
9//! 3. **Update the head ref** atomically with `git update-ref` using
10//!    compare-and-swap (CAS) to guard against concurrent writes.
11//!
12//! # Single-writer invariant
13//!
14//! Each workspace has exactly one writer at a time (§5.3 §5.1). The CAS
15//! step is therefore a safety net, not a retry loop: if CAS fails, something
16//! has gone wrong with the single-writer invariant and the error bubbles up.
17//!
18//! # Ref layout
19//!
20//! ```text
21//! refs/manifold/head/<workspace>  ← latest operation blob OID
22//! ```
23//!
24//! # Example flow
25//! ```text
26//! write_operation_blob(root, &op)  → blob_oid
27//! append_operation(root, ws, &op, old_head) → blob_oid
28//!   ├── hash-object → blob_oid
29//!   └── update-ref refs/manifold/head/<ws> <blob_oid> [<old_head>]
30//! ```
31
32use std::fmt;
33use std::io::Write as IoWrite;
34use std::path::Path;
35use std::process::{Command, Stdio};
36
37use crate::model::types::{GitOid, WorkspaceId};
38use crate::refs as manifold_refs;
39
40use super::types::Operation;
41
42// ---------------------------------------------------------------------------
43// Error type
44// ---------------------------------------------------------------------------
45
46/// Errors that can occur during an op log write.
47#[derive(Debug)]
48pub enum OpLogWriteError {
49    /// Serializing the operation to canonical JSON failed.
50    Serialize(serde_json::Error),
51
52    /// `git hash-object -w --stdin` failed or returned unexpected output.
53    HashObject {
54        /// Stderr from git.
55        stderr: String,
56        /// Process exit code, if available.
57        exit_code: Option<i32>,
58    },
59
60    /// The OID returned by `git hash-object` was malformed.
61    InvalidOid {
62        /// The raw bytes git printed.
63        raw: String,
64    },
65
66    /// I/O error (e.g. spawning git, writing to its stdin).
67    Io(std::io::Error),
68
69    /// CAS failed: the head ref was modified between read and write.
70    ///
71    /// With the single-writer invariant this should never happen. If it does,
72    /// it indicates a bug or a broken invariant upstream.
73    CasMismatch {
74        /// The workspace whose head ref could not be updated.
75        workspace_id: WorkspaceId,
76    },
77
78    /// A lower-level ref operation (other than CAS mismatch) failed.
79    RefError(manifold_refs::RefError),
80}
81
82impl fmt::Display for OpLogWriteError {
83    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84        match self {
85            Self::Serialize(e) => write!(f, "failed to serialize operation to JSON: {e}"),
86            Self::HashObject { stderr, exit_code } => {
87                write!(f, "`git hash-object` failed")?;
88                if let Some(code) = exit_code {
89                    write!(f, " (exit code {code})")?;
90                }
91                if !stderr.is_empty() {
92                    write!(f, ": {stderr}")?;
93                }
94                write!(
95                    f,
96                    "\n  To fix: check that the repository is not bare and that git is available."
97                )
98            }
99            Self::InvalidOid { raw } => {
100                write!(
101                    f,
102                    "`git hash-object` returned an invalid OID: {raw:?} \
103                     (expected 40 lowercase hex characters)"
104                )
105            }
106            Self::Io(e) => write!(f, "I/O error during op log write: {e}"),
107            Self::CasMismatch { workspace_id } => {
108                write!(
109                    f,
110                    "CAS mismatch on workspace '{workspace_id}' head ref — \
111                     the single-writer invariant was violated.\n  \
112                     To fix: check that no other process is writing to this workspace."
113                )
114            }
115            Self::RefError(e) => write!(f, "ref update failed: {e}"),
116        }
117    }
118}
119
120impl std::error::Error for OpLogWriteError {
121    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
122        match self {
123            Self::Serialize(e) => Some(e),
124            Self::Io(e) => Some(e),
125            Self::RefError(e) => Some(e),
126            _ => None,
127        }
128    }
129}
130
131impl From<std::io::Error> for OpLogWriteError {
132    fn from(e: std::io::Error) -> Self {
133        Self::Io(e)
134    }
135}
136
137// ---------------------------------------------------------------------------
138// Core helpers
139// ---------------------------------------------------------------------------
140
141/// Write an [`Operation`] as a git blob and return its OID.
142///
143/// Serializes the operation to canonical JSON and pipes it to
144/// `git hash-object -w --stdin`. The returned OID is the operation's
145/// content-addressed identity.
146///
147/// # Arguments
148/// * `root` — absolute path to the git repository root.
149/// * `op` — the operation to store.
150///
151/// # Errors
152/// Returns an error if serialization fails, if git cannot be spawned,
153/// or if git fails to write the blob.
154pub fn write_operation_blob(root: &Path, op: &Operation) -> Result<GitOid, OpLogWriteError> {
155    // 1. Serialize to canonical JSON.
156    let json = op.to_canonical_json().map_err(OpLogWriteError::Serialize)?;
157
158    // 2. Spawn `git hash-object -w --stdin` and pipe JSON in.
159    let mut child = Command::new("git")
160        .args(["hash-object", "-w", "--stdin"])
161        .current_dir(root)
162        .stdin(Stdio::piped())
163        .stdout(Stdio::piped())
164        .stderr(Stdio::piped())
165        .spawn()?;
166
167    // Write JSON to stdin then close it so git sees EOF.
168    {
169        let stdin = child.stdin.as_mut().ok_or_else(|| {
170            std::io::Error::new(std::io::ErrorKind::BrokenPipe, "failed to open git stdin")
171        })?;
172        stdin.write_all(&json)?;
173    } // stdin is dropped here, signalling EOF
174
175    let output = child.wait_with_output()?;
176
177    if !output.status.success() {
178        return Err(OpLogWriteError::HashObject {
179            stderr: String::from_utf8_lossy(&output.stderr).trim().to_owned(),
180            exit_code: output.status.code(),
181        });
182    }
183
184    // 3. Parse the OID from stdout.
185    let raw = String::from_utf8_lossy(&output.stdout);
186    let oid_str = raw.trim();
187
188    GitOid::new(oid_str).map_err(|_| OpLogWriteError::InvalidOid {
189        raw: oid_str.to_owned(),
190    })
191}
192
193// ---------------------------------------------------------------------------
194// High-level append
195// ---------------------------------------------------------------------------
196
197/// Write an operation blob and atomically advance the workspace head ref.
198///
199/// This is the primary entry point for recording an operation to the log.
200/// It performs two steps:
201///
202/// 1. Call [`write_operation_blob`] to store the operation as a git blob.
203/// 2. Update `refs/manifold/head/<workspace>` via compare-and-swap:
204///    - If `old_head` is `None` (first operation ever), use the zero OID as
205///      the expected old value so git only succeeds if the ref doesn't exist.
206///    - If `old_head` is `Some(oid)`, use that OID as the CAS guard.
207///
208/// Returns the new operation blob OID on success.
209///
210/// # CAS and the single-writer invariant
211///
212/// Each workspace is written to by exactly one agent at a time (§5.3).
213/// The CAS is therefore a safety net: it should always succeed. A
214/// [`OpLogWriteError::CasMismatch`] indicates a broken invariant.
215///
216/// # Arguments
217/// * `root` — absolute path to the git repository root.
218/// * `workspace_id` — the workspace whose log is being extended.
219/// * `op` — the operation to append.
220/// * `old_head` — the current head ref value (`None` for the first operation).
221///
222/// # Errors
223/// Returns an error if the blob write fails, if the ref update fails,
224/// or if the CAS guard is violated.
225#[allow(clippy::missing_panics_doc)]
226pub fn append_operation(
227    root: &Path,
228    workspace_id: &WorkspaceId,
229    op: &Operation,
230    old_head: Option<&GitOid>,
231) -> Result<GitOid, OpLogWriteError> {
232    // Step 1: write the blob.
233    let new_oid = write_operation_blob(root, op)?;
234
235    // Step 2: update the head ref atomically.
236    let ref_name = manifold_refs::workspace_head_ref(workspace_id.as_str());
237
238    let result = old_head.map_or_else(
239        || {
240            // First operation: the ref must not yet exist.
241            // Use the zero OID as the expected old value.
242            let zero = GitOid::new(&"0".repeat(40)).expect("zero OID is valid");
243            manifold_refs::write_ref_cas(root, &ref_name, &zero, &new_oid)
244        },
245        |old_oid| {
246            // Subsequent operations: CAS from old → new.
247            manifold_refs::write_ref_cas(root, &ref_name, old_oid, &new_oid)
248        },
249    );
250
251    result.map_err(|e| match e {
252        manifold_refs::RefError::CasMismatch { .. } => OpLogWriteError::CasMismatch {
253            workspace_id: workspace_id.clone(),
254        },
255        other => OpLogWriteError::RefError(other),
256    })?;
257
258    Ok(new_oid)
259}
260
261// ---------------------------------------------------------------------------
262// Tests
263// ---------------------------------------------------------------------------
264
265#[cfg(test)]
266#[allow(clippy::all, clippy::pedantic, clippy::nursery)]
267mod tests {
268    use super::*;
269    use crate::model::types::{EpochId, WorkspaceId};
270    use crate::oplog::types::{OpPayload, Operation};
271    use crate::refs::{read_ref, workspace_head_ref};
272    use std::fs;
273    use std::process::Command;
274    use tempfile::TempDir;
275
276    // -----------------------------------------------------------------------
277    // Test helpers
278    // -----------------------------------------------------------------------
279
280    /// Create a fresh git repo with one commit.
281    fn setup_repo() -> (TempDir, GitOid) {
282        let dir = TempDir::new().unwrap();
283        let root = dir.path();
284
285        Command::new("git")
286            .args(["init"])
287            .current_dir(root)
288            .output()
289            .unwrap();
290        Command::new("git")
291            .args(["config", "user.name", "Test"])
292            .current_dir(root)
293            .output()
294            .unwrap();
295        Command::new("git")
296            .args(["config", "user.email", "test@test.com"])
297            .current_dir(root)
298            .output()
299            .unwrap();
300        Command::new("git")
301            .args(["config", "commit.gpgsign", "false"])
302            .current_dir(root)
303            .output()
304            .unwrap();
305
306        fs::write(root.join("README.md"), "# Test\n").unwrap();
307        Command::new("git")
308            .args(["add", "README.md"])
309            .current_dir(root)
310            .output()
311            .unwrap();
312        Command::new("git")
313            .args(["commit", "-m", "initial"])
314            .current_dir(root)
315            .output()
316            .unwrap();
317
318        let out = Command::new("git")
319            .args(["rev-parse", "HEAD"])
320            .current_dir(root)
321            .output()
322            .unwrap();
323        let oid_str = String::from_utf8_lossy(&out.stdout).trim().to_owned();
324        let oid = GitOid::new(&oid_str).unwrap();
325
326        (dir, oid)
327    }
328
329    fn epoch(c: char) -> EpochId {
330        EpochId::new(&c.to_string().repeat(40)).unwrap()
331    }
332
333    fn ws(name: &str) -> WorkspaceId {
334        WorkspaceId::new(name).unwrap()
335    }
336
337    fn make_create_op(ws_id: &WorkspaceId) -> Operation {
338        Operation {
339            parent_ids: vec![],
340            workspace_id: ws_id.clone(),
341            timestamp: "2026-02-19T12:00:00Z".to_owned(),
342            payload: OpPayload::Create { epoch: epoch('a') },
343        }
344    }
345
346    fn make_describe_op(ws_id: &WorkspaceId, parent: GitOid, message: &str) -> Operation {
347        Operation {
348            parent_ids: vec![parent],
349            workspace_id: ws_id.clone(),
350            timestamp: "2026-02-19T13:00:00Z".to_owned(),
351            payload: OpPayload::Describe {
352                message: message.to_owned(),
353            },
354        }
355    }
356
357    // -----------------------------------------------------------------------
358    // write_operation_blob
359    // -----------------------------------------------------------------------
360
361    #[test]
362    fn write_blob_returns_valid_oid() {
363        let (dir, _) = setup_repo();
364        let root = dir.path();
365        let ws_id = ws("agent-1");
366        let op = make_create_op(&ws_id);
367
368        let oid = write_operation_blob(root, &op).unwrap();
369        // OID should be a valid 40-char hex string
370        assert_eq!(oid.as_str().len(), 40);
371        assert!(
372            oid.as_str()
373                .chars()
374                .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
375        );
376    }
377
378    #[test]
379    fn write_blob_is_readable_with_cat_file() {
380        let (dir, _) = setup_repo();
381        let root = dir.path();
382        let ws_id = ws("agent-1");
383        let op = make_create_op(&ws_id);
384
385        let oid = write_operation_blob(root, &op).unwrap();
386
387        // `git cat-file -p <oid>` should succeed and return valid JSON
388        let out = Command::new("git")
389            .args(["cat-file", "-p", oid.as_str()])
390            .current_dir(root)
391            .output()
392            .unwrap();
393        assert!(
394            out.status.success(),
395            "git cat-file should succeed: {}",
396            String::from_utf8_lossy(&out.stderr)
397        );
398
399        let json_str = String::from_utf8_lossy(&out.stdout);
400        // Should parse back to the original operation
401        let parsed = Operation::from_json(json_str.as_bytes()).unwrap();
402        assert_eq!(parsed, op);
403    }
404
405    #[test]
406    fn write_blob_is_deterministic() {
407        let (dir, _) = setup_repo();
408        let root = dir.path();
409        let ws_id = ws("agent-1");
410        let op = make_create_op(&ws_id);
411
412        let oid1 = write_operation_blob(root, &op).unwrap();
413        let oid2 = write_operation_blob(root, &op).unwrap();
414        assert_eq!(
415            oid1, oid2,
416            "same operation must produce the same blob OID (content-addressed)"
417        );
418    }
419
420    #[test]
421    fn write_blob_different_ops_have_different_oids() {
422        let (dir, _) = setup_repo();
423        let root = dir.path();
424        let ws_id = ws("agent-1");
425
426        let op1 = make_create_op(&ws_id);
427        let op2 = Operation {
428            parent_ids: vec![],
429            workspace_id: ws_id,
430            timestamp: "2026-02-19T12:00:00Z".to_owned(),
431            payload: OpPayload::Create { epoch: epoch('b') },
432        };
433
434        let oid1 = write_operation_blob(root, &op1).unwrap();
435        let oid2 = write_operation_blob(root, &op2).unwrap();
436        assert_ne!(
437            oid1, oid2,
438            "different operations must produce different OIDs"
439        );
440    }
441
442    // -----------------------------------------------------------------------
443    // append_operation — first operation (no prior head)
444    // -----------------------------------------------------------------------
445
446    #[test]
447    fn append_first_op_creates_head_ref() {
448        let (dir, _) = setup_repo();
449        let root = dir.path();
450        let ws_id = ws("agent-1");
451        let op = make_create_op(&ws_id);
452
453        let oid = append_operation(root, &ws_id, &op, None).unwrap();
454
455        // The head ref should now point to the blob OID
456        let ref_name = workspace_head_ref(ws_id.as_str());
457        let head = read_ref(root, &ref_name).unwrap();
458        assert_eq!(head, Some(oid));
459    }
460
461    #[test]
462    fn append_first_op_ref_name_is_correct() {
463        let (dir, _) = setup_repo();
464        let root = dir.path();
465        let ws_id = ws("feature-x");
466        let op = make_create_op(&ws_id);
467
468        let oid = append_operation(root, &ws_id, &op, None).unwrap();
469
470        // Verify via git show-ref
471        let out = Command::new("git")
472            .args(["show-ref", "refs/manifold/head/feature-x"])
473            .current_dir(root)
474            .output()
475            .unwrap();
476        assert!(
477            out.status.success(),
478            "show-ref should find the ref: {}",
479            String::from_utf8_lossy(&out.stderr)
480        );
481        let output_str = String::from_utf8_lossy(&out.stdout);
482        assert!(output_str.contains(oid.as_str()));
483    }
484
485    #[test]
486    fn append_first_op_fails_if_ref_exists() {
487        let (dir, _) = setup_repo();
488        let root = dir.path();
489        let ws_id = ws("agent-1");
490        let op = make_create_op(&ws_id);
491
492        // Create the ref manually to simulate "already exists"
493        let oid1 = append_operation(root, &ws_id, &op, None).unwrap();
494
495        // Try to append again with old_head=None (should fail: ref now exists)
496        let result = append_operation(root, &ws_id, &op, None);
497        assert!(
498            matches!(result, Err(OpLogWriteError::CasMismatch { .. })),
499            "appending with old_head=None when ref exists should fail with CasMismatch: {result:?}"
500        );
501
502        // Ref should still point to the original OID
503        let ref_name = workspace_head_ref(ws_id.as_str());
504        let head = read_ref(root, &ref_name).unwrap();
505        assert_eq!(head, Some(oid1));
506    }
507
508    // -----------------------------------------------------------------------
509    // append_operation — subsequent operations (with prior head)
510    // -----------------------------------------------------------------------
511
512    #[test]
513    fn append_second_op_advances_head() {
514        let (dir, _) = setup_repo();
515        let root = dir.path();
516        let ws_id = ws("agent-1");
517
518        // First operation
519        let op1 = make_create_op(&ws_id);
520        let oid1 = append_operation(root, &ws_id, &op1, None).unwrap();
521
522        // Second operation (parent = oid1)
523        let op2 = make_describe_op(&ws_id, oid1.clone(), "implementing feature");
524        let oid2 = append_operation(root, &ws_id, &op2, Some(&oid1)).unwrap();
525
526        // Head should now point to oid2
527        let ref_name = workspace_head_ref(ws_id.as_str());
528        let head = read_ref(root, &ref_name).unwrap();
529        assert_eq!(head, Some(oid2));
530    }
531
532    #[test]
533    fn append_chain_of_three_ops() {
534        let (dir, _) = setup_repo();
535        let root = dir.path();
536        let ws_id = ws("agent-1");
537
538        let op1 = make_create_op(&ws_id);
539        let oid1 = append_operation(root, &ws_id, &op1, None).unwrap();
540
541        let op2 = make_describe_op(&ws_id, oid1.clone(), "step 2");
542        let oid2 = append_operation(root, &ws_id, &op2, Some(&oid1)).unwrap();
543
544        let op3 = make_describe_op(&ws_id, oid2.clone(), "step 3");
545        let oid3 = append_operation(root, &ws_id, &op3, Some(&oid2)).unwrap();
546
547        // Head should now be oid3
548        let ref_name = workspace_head_ref(ws_id.as_str());
549        let head = read_ref(root, &ref_name).unwrap();
550        assert_eq!(head, Some(oid3));
551
552        // Previous blobs are still accessible
553        let out = Command::new("git")
554            .args(["cat-file", "-t", oid1.as_str()])
555            .current_dir(root)
556            .output()
557            .unwrap();
558        assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), "blob");
559
560        let out = Command::new("git")
561            .args(["cat-file", "-t", oid2.as_str()])
562            .current_dir(root)
563            .output()
564            .unwrap();
565        assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), "blob");
566    }
567
568    #[test]
569    fn cas_mismatch_on_wrong_old_head() {
570        let (dir, _) = setup_repo();
571        let root = dir.path();
572        let ws_id = ws("agent-1");
573
574        // First op
575        let op1 = make_create_op(&ws_id);
576        let oid1 = append_operation(root, &ws_id, &op1, None).unwrap();
577
578        // Second op (advances head to oid2)
579        let op2 = make_describe_op(&ws_id, oid1.clone(), "step 2");
580        let oid2 = append_operation(root, &ws_id, &op2, Some(&oid1)).unwrap();
581
582        // Try to append a third op using stale old_head (oid1 instead of oid2)
583        let op3 = make_describe_op(&ws_id, oid2.clone(), "step 3");
584        let result = append_operation(root, &ws_id, &op3, Some(&oid1));
585        assert!(
586            matches!(result, Err(OpLogWriteError::CasMismatch { .. })),
587            "stale old_head should produce CasMismatch: {result:?}"
588        );
589
590        // Head should still be oid2
591        let ref_name = workspace_head_ref(ws_id.as_str());
592        let head = read_ref(root, &ref_name).unwrap();
593        assert_eq!(head, Some(oid2));
594    }
595
596    // -----------------------------------------------------------------------
597    // blob content verification
598    // -----------------------------------------------------------------------
599
600    #[test]
601    fn blob_content_is_valid_json() {
602        let (dir, _) = setup_repo();
603        let root = dir.path();
604        let ws_id = ws("default");
605        let op = make_create_op(&ws_id);
606
607        let oid = write_operation_blob(root, &op).unwrap();
608
609        let out = Command::new("git")
610            .args(["cat-file", "-p", oid.as_str()])
611            .current_dir(root)
612            .output()
613            .unwrap();
614        assert!(out.status.success());
615
616        // Content should be valid JSON
617        let json_bytes = out.stdout;
618        let value: serde_json::Value =
619            serde_json::from_slice(&json_bytes).expect("blob content should be valid JSON");
620
621        // Should have the expected top-level keys
622        assert!(value.get("workspace_id").is_some());
623        assert!(value.get("parent_ids").is_some());
624        assert!(value.get("timestamp").is_some());
625        assert!(value.get("payload").is_some());
626    }
627
628    #[test]
629    fn blob_content_round_trips_through_json() {
630        let (dir, _) = setup_repo();
631        let root = dir.path();
632        let ws_id = ws("default");
633
634        // Use a complex operation with multiple parent IDs
635        let op = Operation {
636            parent_ids: vec![
637                GitOid::new(&"a".repeat(40)).unwrap(),
638                GitOid::new(&"b".repeat(40)).unwrap(),
639            ],
640            workspace_id: ws_id,
641            timestamp: "2026-02-19T15:30:00Z".to_owned(),
642            payload: OpPayload::Describe {
643                message: "implementing the feature\nwith a multiline description".to_owned(),
644            },
645        };
646
647        let oid = write_operation_blob(root, &op).unwrap();
648
649        let out = Command::new("git")
650            .args(["cat-file", "-p", oid.as_str()])
651            .current_dir(root)
652            .output()
653            .unwrap();
654        assert!(out.status.success());
655
656        let parsed = Operation::from_json(&out.stdout).unwrap();
657        assert_eq!(parsed, op);
658    }
659
660    // -----------------------------------------------------------------------
661    // Error display
662    // -----------------------------------------------------------------------
663
664    #[test]
665    fn error_display_cas_mismatch() {
666        let err = OpLogWriteError::CasMismatch {
667            workspace_id: ws("agent-1"),
668        };
669        let msg = format!("{err}");
670        assert!(msg.contains("agent-1"));
671        assert!(msg.contains("single-writer invariant"));
672        assert!(msg.contains("CAS mismatch"));
673    }
674
675    #[test]
676    fn error_display_hash_object() {
677        let err = OpLogWriteError::HashObject {
678            stderr: "fatal: not a git repo".to_owned(),
679            exit_code: Some(128),
680        };
681        let msg = format!("{err}");
682        assert!(msg.contains("hash-object"));
683        assert!(msg.contains("128"));
684        assert!(msg.contains("fatal: not a git repo"));
685    }
686
687    #[test]
688    fn error_display_invalid_oid() {
689        let err = OpLogWriteError::InvalidOid {
690            raw: "not-a-sha".to_owned(),
691        };
692        let msg = format!("{err}");
693        assert!(msg.contains("invalid OID"));
694        assert!(msg.contains("not-a-sha"));
695    }
696
697    #[test]
698    fn error_display_io() {
699        let err = OpLogWriteError::Io(std::io::Error::new(
700            std::io::ErrorKind::NotFound,
701            "git not found",
702        ));
703        let msg = format!("{err}");
704        assert!(msg.contains("I/O error"));
705        assert!(msg.contains("git not found"));
706    }
707}