Skip to main content

solid_pod_rs_git/
mark.rs

1//! Native `GitMarker` implementation — the cheap, always-on provenance tier.
2//!
3//! Implements [`solid_pod_rs::provenance::GitMarker`] by shelling out to the
4//! system `git` binary via [`tokio::process::Command`], mirroring the style of
5//! [`crate::api`] (`git_commit`/`git_add`) and [`crate::service`]. This is the
6//! single canonical write-provenance path: an LDP `PUT`/`POST`/`PATCH` that
7//! lands in a git-backed pod is followed by a [`ShellGitMarker::mark_write`]
8//! call that stages the written file and commits it, so every pod write becomes
9//! a git commit whose SHA is captured and surfaced as a
10//! [`GitMark`](solid_pod_rs::provenance::GitMark).
11//!
12//! ## Why native-only
13//!
14//! The pure provenance *surface* (types + `prov_ttl`) lives in
15//! `solid-pod-rs::provenance` and compiles for `wasm32`. This implementor
16//! spawns a subprocess, so it is native-only and lives here in
17//! `solid-pod-rs-git` (which already depends on `tokio::process`). Wasm
18//! consumers compile against a no-op marker, never this one
19//! (ADR-059 D4).
20//!
21//! ## Commit identity
22//!
23//! Commits are authored as `solid-pod-rs <agent_did>` — the writer's
24//! `did:nostr` (NIP-98 principal) becomes the commit author *email*, binding
25//! the on-disk git history to the authenticated agent. Identity is injected
26//! per-invocation via `git -c user.name=… -c user.email=…` so no global git
27//! config is mutated and the repo needs no pre-seeded `user.*`.
28
29use std::path::Path;
30
31use async_trait::async_trait;
32use solid_pod_rs::provenance::{GitMark, GitMarker, ProvenanceError};
33use tokio::process::Command;
34
35/// Branch the auto-init pins (`init.rs` runs `git init -b main`). Every mark
36/// records this as its branch; surfacing it keeps the [`GitMark`] self-describing
37/// without a second `rev-parse --abbrev-ref` round-trip per write.
38const PINNED_BRANCH: &str = "main";
39
40/// A [`GitMarker`] that commits pod writes by shelling to `git`.
41///
42/// Stateless and cheap to clone — it holds only the committer name used in the
43/// `git -c user.name=…` override.
44#[derive(Debug, Clone)]
45pub struct ShellGitMarker {
46    /// Value passed to `git -c user.name=…`. Defaults to `solid-pod-rs`.
47    committer_name: String,
48}
49
50impl ShellGitMarker {
51    /// Construct with the default committer name (`solid-pod-rs`).
52    #[must_use]
53    pub fn new() -> Self {
54        Self {
55            committer_name: "solid-pod-rs".to_string(),
56        }
57    }
58
59    /// Override the committer name written into `git -c user.name=…`.
60    #[must_use]
61    pub fn with_committer_name(name: impl Into<String>) -> Self {
62        Self {
63            committer_name: name.into(),
64        }
65    }
66}
67
68impl Default for ShellGitMarker {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74/// Run a git command in `repo`, returning trimmed stdout on success.
75///
76/// Maps a missing `git` binary and any non-zero exit into
77/// [`ProvenanceError::Git`] so the server hook can log-and-swallow uniformly.
78async fn git(repo: &Path, args: &[&str]) -> Result<String, ProvenanceError> {
79    let output = Command::new("git")
80        .args(args)
81        .current_dir(repo)
82        .output()
83        .await
84        .map_err(|e| {
85            if e.kind() == std::io::ErrorKind::NotFound {
86                ProvenanceError::Git("git binary not found in PATH".into())
87            } else {
88                ProvenanceError::Git(format!("spawn git {args:?}: {e}"))
89            }
90        })?;
91
92    if output.status.success() {
93        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
94    } else {
95        // `git commit` writes its human status ("nothing to commit, working
96        // tree clean") to STDOUT, not stderr, yet still exits non-zero. Fold
97        // both streams into the error message so the no-op detection in
98        // `mark_write` can match on it.
99        let stderr = String::from_utf8_lossy(&output.stderr);
100        let stdout = String::from_utf8_lossy(&output.stdout);
101        let detail = match (stderr.trim().is_empty(), stdout.trim().is_empty()) {
102            (false, false) => format!("{}; {}", stderr.trim(), stdout.trim()),
103            (false, true) => stderr.trim().to_string(),
104            (true, false) => stdout.trim().to_string(),
105            (true, true) => String::new(),
106        };
107        Err(ProvenanceError::Git(format!(
108            "git {args:?} exited {:?}: {detail}",
109            output.status.code(),
110        )))
111    }
112}
113
114/// Derive the pod repo slug from the repo directory's final path component
115/// (the pod name / pubkey segment), matching the server's
116/// `data_root/{pod}` layout. Falls back to the full lossy path when the
117/// component cannot be extracted (e.g. a root path).
118fn repo_slug(repo: &Path) -> String {
119    repo.file_name()
120        .map(|s| s.to_string_lossy().into_owned())
121        .unwrap_or_else(|| repo.to_string_lossy().into_owned())
122}
123
124/// Resolve `HEAD` to a SHA, mapping the unborn-branch case (a fresh
125/// `git init` with no commits) to `Ok(None)` rather than an error.
126async fn head_sha(repo: &Path) -> Result<Option<String>, ProvenanceError> {
127    match git(repo, &["rev-parse", "HEAD"]).await {
128        Ok(sha) if !sha.is_empty() => Ok(Some(sha)),
129        Ok(_) => Ok(None),
130        // `rev-parse HEAD` fails on an unborn branch (`fatal: ambiguous
131        // argument 'HEAD'` / `unknown revision`). That is "no commits yet",
132        // not a hard failure.
133        Err(ProvenanceError::Git(msg))
134            if msg.contains("unknown revision")
135                || msg.contains("ambiguous argument")
136                || msg.contains("bad revision")
137                || msg.contains("does not have any commits") =>
138        {
139            Ok(None)
140        }
141        Err(e) => Err(e),
142    }
143}
144
145#[async_trait(?Send)]
146impl GitMarker for ShellGitMarker {
147    async fn mark_write(
148        &self,
149        repo: &Path,
150        path: &str,
151        agent_did: &str,
152        message: &str,
153    ) -> Result<GitMark, ProvenanceError> {
154        // Reject path escapes defensively — the server already constrains the
155        // path, but a marker must never `git add` outside the repo.
156        if path.starts_with('/') || path.contains("..") {
157            return Err(ProvenanceError::InvalidPath(path.to_string()));
158        }
159
160        // 1. Record the pre-write HEAD — this is the parent of whatever commit
161        //    we are about to create (the append-only chain link).
162        let parent = head_sha(repo).await?;
163
164        // 2. Stage the written file. `git add` materialises the index entry the
165        //    commit will snapshot.
166        git(repo, &["add", "--", path]).await?;
167
168        // 3. Commit with the agent's did:nostr as the author email. Identity is
169        //    injected per-invocation via `-c` overrides so no global git config
170        //    is touched and the repo needs no pre-seeded `user.*`.
171        //    `--allow-empty` is deliberately NOT passed: an idempotent re-write
172        //    of identical bytes yields "nothing to commit", which we treat as a
173        //    no-op below rather than fabricating an empty commit.
174        let name_cfg = format!("user.name={}", self.committer_name);
175        let email_cfg = format!("user.email={agent_did}");
176        let commit_res = git(
177            repo,
178            &[
179                "-c",
180                &name_cfg,
181                "-c",
182                &email_cfg,
183                "commit",
184                "-m",
185                message,
186            ],
187        )
188        .await;
189
190        // 4. Resolve the resulting HEAD. On success it is the new commit; on
191        //    "nothing to commit" it is unchanged from `parent`.
192        match commit_res {
193            Ok(_) => {
194                let commit_sha = head_sha(repo)
195                    .await?
196                    .ok_or_else(|| ProvenanceError::Git("HEAD unresolved after commit".into()))?;
197                Ok(GitMark {
198                    commit_sha,
199                    repo: repo_slug(repo),
200                    branch: PINNED_BRANCH.to_string(),
201                    parent,
202                })
203            }
204            Err(ProvenanceError::Git(msg))
205                if msg.contains("nothing to commit")
206                    || msg.contains("no changes added")
207                    || msg.contains("working tree clean")
208                    || msg.contains("nothing added to commit") =>
209            {
210                // Idempotent re-write: nothing changed. Surface the current
211                // HEAD as the mark without erroring (the resource is already at
212                // this commit). Its "parent" is HEAD's own parent.
213                match &parent {
214                    Some(head) => {
215                        let head_parent = git(repo, &["rev-parse", &format!("{head}^")])
216                            .await
217                            .ok()
218                            .filter(|s| !s.is_empty());
219                        Ok(GitMark {
220                            commit_sha: head.clone(),
221                            repo: repo_slug(repo),
222                            branch: PINNED_BRANCH.to_string(),
223                            parent: head_parent,
224                        })
225                    }
226                    // No prior commit AND nothing to commit: an empty add of an
227                    // already-clean tree. Nothing to surface — propagate as a
228                    // soft git error the caller swallows.
229                    None => Err(ProvenanceError::Git(
230                        "nothing to commit and no prior HEAD".into(),
231                    )),
232                }
233            }
234            Err(e) => Err(e),
235        }
236    }
237
238    async fn head(&self, repo: &Path) -> Result<Option<String>, ProvenanceError> {
239        head_sha(repo).await
240    }
241}
242
243// ---------------------------------------------------------------------------
244// Tests
245// ---------------------------------------------------------------------------
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use std::process::Stdio;
251    use tempfile::TempDir;
252
253    fn git_available() -> bool {
254        std::process::Command::new("git")
255            .arg("--version")
256            .stdout(Stdio::null())
257            .stderr(Stdio::null())
258            .status()
259            .map(|s| s.success())
260            .unwrap_or(false)
261    }
262
263    /// Initialise a non-bare repo on branch `main` with a committer identity.
264    async fn init_repo() -> TempDir {
265        let td = TempDir::new().unwrap();
266        let run = |args: &[&str]| {
267            std::process::Command::new("git")
268                .args(args)
269                .current_dir(td.path())
270                .stdout(Stdio::null())
271                .stderr(Stdio::null())
272                .status()
273                .unwrap();
274        };
275        run(&["init", "-b", "main"]);
276        td
277    }
278
279    async fn write_file(repo: &Path, rel: &str, contents: &str) {
280        let abs = repo.join(rel);
281        if let Some(parent) = abs.parent() {
282            tokio::fs::create_dir_all(parent).await.unwrap();
283        }
284        tokio::fs::write(abs, contents).await.unwrap();
285    }
286
287    #[tokio::test]
288    async fn head_is_none_on_unborn_branch() {
289        if !git_available() {
290            return;
291        }
292        let td = init_repo().await;
293        let marker = ShellGitMarker::new();
294        assert_eq!(marker.head(td.path()).await.unwrap(), None);
295    }
296
297    #[tokio::test]
298    async fn mark_write_creates_commit_and_captures_sha() {
299        if !git_available() {
300            return;
301        }
302        let td = init_repo().await;
303        let marker = ShellGitMarker::new();
304
305        write_file(td.path(), "notes/hello.ttl", "<a> <b> <c> .").await;
306        let mark = marker
307            .mark_write(td.path(), "notes/hello.ttl", "did:nostr:abcd", "PUT /notes/hello.ttl")
308            .await
309            .unwrap();
310
311        // SHA is a 40-hex commit id and equals the new HEAD.
312        assert_eq!(mark.commit_sha.len(), 40);
313        assert!(mark.commit_sha.bytes().all(|b| b.is_ascii_hexdigit()));
314        assert_eq!(mark.branch, "main");
315        assert_eq!(mark.repo, td.path().file_name().unwrap().to_string_lossy());
316        // First commit of a fresh repo has no parent.
317        assert_eq!(mark.parent, None);
318
319        let head = marker.head(td.path()).await.unwrap().unwrap();
320        assert_eq!(head, mark.commit_sha);
321
322        // The agent did:nostr is the author email on the commit.
323        let email = git(td.path(), &["log", "-1", "--format=%ae"]).await.unwrap();
324        assert_eq!(email, "did:nostr:abcd");
325        let name = git(td.path(), &["log", "-1", "--format=%an"]).await.unwrap();
326        assert_eq!(name, "solid-pod-rs");
327    }
328
329    #[tokio::test]
330    async fn parent_chain_links_two_writes() {
331        if !git_available() {
332            return;
333        }
334        let td = init_repo().await;
335        let marker = ShellGitMarker::new();
336
337        write_file(td.path(), "a.ttl", "first").await;
338        let m1 = marker
339            .mark_write(td.path(), "a.ttl", "did:nostr:a", "write a")
340            .await
341            .unwrap();
342
343        write_file(td.path(), "b.ttl", "second").await;
344        let m2 = marker
345            .mark_write(td.path(), "b.ttl", "did:nostr:b", "write b")
346            .await
347            .unwrap();
348
349        // The second mark's parent is the first mark's commit — the append-only
350        // chain. SHAs differ.
351        assert_ne!(m1.commit_sha, m2.commit_sha);
352        assert_eq!(m1.parent, None);
353        assert_eq!(m2.parent.as_deref(), Some(m1.commit_sha.as_str()));
354    }
355
356    #[tokio::test]
357    async fn nothing_to_commit_returns_head_without_error() {
358        if !git_available() {
359            return;
360        }
361        let td = init_repo().await;
362        let marker = ShellGitMarker::new();
363
364        write_file(td.path(), "a.ttl", "content").await;
365        let m1 = marker
366            .mark_write(td.path(), "a.ttl", "did:nostr:a", "write a")
367            .await
368            .unwrap();
369
370        // Re-mark the SAME path with identical content already committed: there
371        // is nothing to commit. We must get a mark referencing the current HEAD,
372        // not an error, and HEAD must not have advanced.
373        let m2 = marker
374            .mark_write(td.path(), "a.ttl", "did:nostr:a", "re-write a")
375            .await
376            .unwrap();
377        assert_eq!(m2.commit_sha, m1.commit_sha, "HEAD must not advance");
378        assert_eq!(
379            marker.head(td.path()).await.unwrap().as_deref(),
380            Some(m1.commit_sha.as_str())
381        );
382    }
383
384    #[tokio::test]
385    async fn rejects_path_traversal() {
386        if !git_available() {
387            return;
388        }
389        let td = init_repo().await;
390        let marker = ShellGitMarker::new();
391        assert!(matches!(
392            marker
393                .mark_write(td.path(), "../escape.ttl", "did:nostr:a", "x")
394                .await,
395            Err(ProvenanceError::InvalidPath(_))
396        ));
397        assert!(matches!(
398            marker
399                .mark_write(td.path(), "/abs.ttl", "did:nostr:a", "x")
400                .await,
401            Err(ProvenanceError::InvalidPath(_))
402        ));
403    }
404}