Skip to main content

grit_lib/
filter_process.rs

1//! Long-running Git filter protocol (`filter.<name>.process`), matching `git-filter` v2.
2//!
3//! See Git's `convert.c` (`apply_multi_file_filter`) and `sub-process.c` (handshake).
4
5use std::collections::HashMap;
6use std::io::{Read, Write};
7use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
8use std::sync::{Mutex, OnceLock};
9
10use crate::objects::ObjectId;
11use crate::refs;
12use crate::repo::Repository;
13
14/// Max data bytes per pkt-line payload (Git `LARGE_PACKET_DATA_MAX`).
15const LARGE_PACKET_DATA_MAX: usize = 65520 - 4;
16
17const CAP_CLEAN: u32 = 1 << 0;
18const CAP_SMUDGE: u32 = 1 << 1;
19const CAP_DELAY: u32 = 1 << 2;
20
21/// Optional metadata sent with smudge (ref, treeish, blob hex).
22#[derive(Debug, Clone, Default)]
23pub struct FilterSmudgeMeta {
24    pub ref_name: Option<String>,
25    pub treeish_hex: Option<String>,
26    pub blob_hex: Option<String>,
27}
28
29/// Smudge metadata for path-only checkouts (`git checkout -- <paths>`): `blob=` only.
30#[must_use]
31pub fn smudge_meta_blob_only(blob_hex: &str) -> FilterSmudgeMeta {
32    FilterSmudgeMeta {
33        blob_hex: Some(blob_hex.to_string()),
34        ..Default::default()
35    }
36}
37
38/// Smudge metadata with `treeish=` only (e.g. `git reset --hard <commit>` / `git merge` checkout).
39#[must_use]
40pub fn smudge_meta_treeish_only(treeish_hex: &str, blob_hex: &str) -> FilterSmudgeMeta {
41    FilterSmudgeMeta {
42        treeish_hex: Some(treeish_hex.to_string()),
43        blob_hex: Some(blob_hex.to_string()),
44        ..Default::default()
45    }
46}
47
48/// Process-smudge metadata for `git reset --hard <ref>` (t0021): `ref=` when the spec names a ref.
49#[must_use]
50pub fn smudge_meta_for_reset(
51    repo: &Repository,
52    commit_spec: &str,
53    resolved_commit: &ObjectId,
54    blob_hex: &str,
55) -> FilterSmudgeMeta {
56    let tip_hex = resolved_commit.to_string();
57    let mut meta = FilterSmudgeMeta {
58        treeish_hex: Some(tip_hex.clone()),
59        blob_hex: Some(blob_hex.to_string()),
60        ..Default::default()
61    };
62    let arg_lower = commit_spec.to_ascii_lowercase();
63    let is_full_hex = arg_lower.len() == 40 && arg_lower.chars().all(|c| c.is_ascii_hexdigit());
64    if is_full_hex && arg_lower == tip_hex.to_ascii_lowercase() {
65        meta.ref_name = None;
66        return meta;
67    }
68    let mut candidates: Vec<String> = Vec::new();
69    if commit_spec == "HEAD" || commit_spec.starts_with("refs/") {
70        candidates.push(commit_spec.to_string());
71    } else {
72        candidates.push(format!("refs/heads/{commit_spec}"));
73        candidates.push(format!("refs/tags/{commit_spec}"));
74        candidates.push(commit_spec.to_string());
75    }
76    for name in candidates {
77        if let Ok(oid) = refs::resolve_ref(&repo.git_dir, &name) {
78            if oid == *resolved_commit {
79                meta.ref_name = Some(name);
80                break;
81            }
82        }
83    }
84    meta
85}
86
87/// Process-smudge metadata for `git archive` (matches Git / t0021).
88///
89/// `tree_ish_arg` is the user's argument (`main`, full commit hex, or tree hex).
90/// `resolved_tip` is the OID `archive` resolved; `tip_is_commit` is true when that object is a commit.
91#[must_use]
92pub fn smudge_meta_for_archive(
93    repo: &Repository,
94    tree_ish_arg: &str,
95    resolved_tip: &ObjectId,
96    tip_is_commit: bool,
97    blob_hex: &str,
98) -> FilterSmudgeMeta {
99    let mut meta = FilterSmudgeMeta {
100        blob_hex: Some(blob_hex.to_string()),
101        ..Default::default()
102    };
103    if !tip_is_commit {
104        meta.treeish_hex = Some(resolved_tip.to_string());
105        return meta;
106    }
107    let tip_hex = resolved_tip.to_string();
108    meta.treeish_hex = Some(tip_hex.clone());
109    let arg_lower = tree_ish_arg.to_ascii_lowercase();
110    let is_full_hex = arg_lower.len() == 40 && arg_lower.chars().all(|c| c.is_ascii_hexdigit());
111    if is_full_hex && arg_lower == tip_hex.to_ascii_lowercase() {
112        meta.ref_name = None;
113        return meta;
114    }
115    if let Ok(oid) = refs::resolve_ref(&repo.git_dir, tree_ish_arg) {
116        if oid == *resolved_tip {
117            meta.ref_name = Some(tree_ish_arg.to_string());
118            return meta;
119        }
120    }
121    let heads = format!("refs/heads/{tree_ish_arg}");
122    if let Ok(oid) = refs::resolve_ref(&repo.git_dir, &heads) {
123        if oid == *resolved_tip {
124            meta.ref_name = Some(heads);
125        }
126    }
127    meta
128}
129
130pub fn smudge_meta_for_checkout(repo: &Repository, blob_hex: &str) -> FilterSmudgeMeta {
131    let mut meta = FilterSmudgeMeta {
132        blob_hex: Some(blob_hex.to_string()),
133        ..Default::default()
134    };
135    let Ok(content) = std::fs::read_to_string(repo.git_dir.join("HEAD")) else {
136        return meta;
137    };
138    let content = content.trim();
139    if let Some(sym) = content.strip_prefix("ref: ") {
140        let sym = sym.trim();
141        meta.ref_name = Some(sym.to_string());
142        if let Ok(oid) = refs::resolve_ref(&repo.git_dir, sym) {
143            meta.treeish_hex = Some(oid.to_string());
144        }
145    } else if content.len() == 40 {
146        if let Ok(oid) = ObjectId::from_hex(content) {
147            meta.treeish_hex = Some(oid.to_string());
148        }
149    }
150    meta
151}
152
153struct RunningFilter {
154    #[allow(dead_code)]
155    child: Child,
156    stdin: Option<ChildStdin>,
157    stdout: Option<ChildStdout>,
158    caps: u32,
159}
160
161fn process_registry() -> &'static Mutex<HashMap<String, Mutex<RunningFilter>>> {
162    static REG: OnceLock<Mutex<HashMap<String, Mutex<RunningFilter>>>> = OnceLock::new();
163    REG.get_or_init(|| Mutex::new(HashMap::new()))
164}
165
166fn set_packet_header(len: usize, out: &mut [u8; 4]) {
167    const HEX: &[u8; 16] = b"0123456789abcdef";
168    out[0] = HEX[(len >> 12) & 0xf];
169    out[1] = HEX[(len >> 8) & 0xf];
170    out[2] = HEX[(len >> 4) & 0xf];
171    out[3] = HEX[len & 0xf];
172}
173
174fn write_packet(stdin: &mut ChildStdin, payload: &[u8]) -> std::io::Result<()> {
175    if payload.len() > LARGE_PACKET_DATA_MAX {
176        return Err(std::io::Error::other("filter packet payload too large"));
177    }
178    let total = payload.len() + 4;
179    let mut hdr = [0u8; 4];
180    set_packet_header(total, &mut hdr);
181    stdin.write_all(&hdr)?;
182    stdin.write_all(payload)?;
183    stdin.flush()?;
184    Ok(())
185}
186
187fn write_packet_line(stdin: &mut ChildStdin, line: &str) -> std::io::Result<()> {
188    let mut s = line.to_string();
189    if !s.ends_with('\n') {
190        s.push('\n');
191    }
192    write_packet(stdin, s.as_bytes())
193}
194
195fn write_flush(stdin: &mut ChildStdin) -> std::io::Result<()> {
196    stdin.write_all(b"0000")?;
197    stdin.flush()
198}
199
200fn read_exact<R: Read>(r: &mut R, buf: &mut [u8]) -> std::io::Result<()> {
201    let mut off = 0;
202    while off < buf.len() {
203        let n = r.read(&mut buf[off..])?;
204        if n == 0 {
205            return Err(std::io::Error::new(
206                std::io::ErrorKind::UnexpectedEof,
207                "unexpected EOF reading pkt-line",
208            ));
209        }
210        off += n;
211    }
212    Ok(())
213}
214
215fn read_packet_header(stdout: &mut ChildStdout) -> std::io::Result<Option<[u8; 4]>> {
216    let mut hdr = [0u8; 4];
217    let mut off = 0usize;
218    while off < 4 {
219        let n = stdout.read(&mut hdr[off..])?;
220        if n == 0 {
221            if off == 0 {
222                return Ok(None);
223            }
224            return Err(std::io::Error::new(
225                std::io::ErrorKind::UnexpectedEof,
226                "unexpected EOF reading pkt-line",
227            ));
228        }
229        off += n;
230    }
231    Ok(Some(hdr))
232}
233
234fn read_packet_payload(stdout: &mut ChildStdout) -> std::io::Result<Option<Vec<u8>>> {
235    let Some(hdr) = read_packet_header(stdout)? else {
236        return Ok(None);
237    };
238    let hex = std::str::from_utf8(&hdr)
239        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
240    let total = usize::from_str_radix(hex, 16).map_err(|_| {
241        std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid pkt-line header")
242    })?;
243    if total == 0 {
244        return Ok(None);
245    }
246    if total < 4 {
247        return Err(std::io::Error::new(
248            std::io::ErrorKind::InvalidData,
249            "invalid pkt-line length",
250        ));
251    }
252    let len = total - 4;
253    let mut payload = vec![0u8; len];
254    read_exact(stdout, &mut payload)?;
255    Ok(Some(payload))
256}
257
258fn read_packet_line(stdout: &mut ChildStdout) -> std::io::Result<Option<String>> {
259    let Some(payload) = read_packet_payload(stdout)? else {
260        return Ok(None);
261    };
262    let s = String::from_utf8_lossy(&payload).into_owned();
263    Ok(Some(s.trim_end_matches('\n').to_string()))
264}
265
266/// Read pkt-lines until flush; updates `acc` only when a `status=` line appears (matches Git
267/// `subprocess_read_status` — if the segment is empty, `acc` is left unchanged).
268fn read_status(stdout: &mut ChildStdout, acc: &mut String) -> std::io::Result<()> {
269    loop {
270        let Some(line) = read_packet_line(stdout)? else {
271            break;
272        };
273        if let Some(rest) = line.strip_prefix("status=") {
274            *acc = rest.to_string();
275        }
276    }
277    Ok(())
278}
279
280fn read_packetized(stdout: &mut ChildStdout) -> std::io::Result<Vec<u8>> {
281    let mut out = Vec::new();
282    loop {
283        let Some(chunk) = read_packet_payload(stdout)? else {
284            break;
285        };
286        out.extend_from_slice(&chunk);
287    }
288    Ok(out)
289}
290
291fn handshake(stdout: &mut ChildStdout, stdin: &mut ChildStdin) -> std::io::Result<u32> {
292    // Match Git's test-tool rot13-filter: client sends only `version=2` before the first flush.
293    write_packet_line(stdin, "git-filter-client")?;
294    write_packet_line(stdin, "version=2")?;
295    write_flush(stdin)?;
296
297    let Some(server) = read_packet_line(stdout)? else {
298        return Err(std::io::Error::new(
299            std::io::ErrorKind::UnexpectedEof,
300            "expected git-filter-server",
301        ));
302    };
303    if server != "git-filter-server" {
304        return Err(std::io::Error::new(
305            std::io::ErrorKind::InvalidData,
306            format!("unexpected filter server line: {server}"),
307        ));
308    }
309    let Some(ver_line) = read_packet_line(stdout)? else {
310        return Err(std::io::Error::new(
311            std::io::ErrorKind::UnexpectedEof,
312            "expected version line",
313        ));
314    };
315    let ver = ver_line
316        .strip_prefix("version=")
317        .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidData, "expected version="))?;
318    if ver != "2" {
319        return Err(std::io::Error::new(
320            std::io::ErrorKind::InvalidData,
321            format!("unsupported filter protocol version {ver}"),
322        ));
323    }
324    if read_packet_line(stdout)?.is_some() {
325        return Err(std::io::Error::new(
326            std::io::ErrorKind::InvalidData,
327            "expected flush after version",
328        ));
329    }
330
331    write_packet_line(stdin, "capability=clean")?;
332    write_packet_line(stdin, "capability=smudge")?;
333    write_packet_line(stdin, "capability=delay")?;
334    write_flush(stdin)?;
335
336    let mut caps = 0u32;
337    loop {
338        let Some(line) = read_packet_line(stdout)? else {
339            break;
340        };
341        if let Some(name) = line.strip_prefix("capability=") {
342            match name {
343                "clean" => caps |= CAP_CLEAN,
344                "smudge" => caps |= CAP_SMUDGE,
345                "delay" => caps |= CAP_DELAY,
346                _ => {}
347            }
348        }
349    }
350
351    Ok(caps)
352}
353
354fn spawn_running(cmd: &str) -> std::io::Result<RunningFilter> {
355    let mut child = Command::new("sh")
356        .arg("-c")
357        .arg(cmd)
358        .stdin(Stdio::piped())
359        .stdout(Stdio::piped())
360        .stderr(Stdio::inherit())
361        .spawn()?;
362
363    let mut stdin = child
364        .stdin
365        .take()
366        .ok_or_else(|| std::io::Error::other("filter process missing stdin"))?;
367    let mut stdout = child
368        .stdout
369        .take()
370        .ok_or_else(|| std::io::Error::other("filter process missing stdout"))?;
371
372    let caps = handshake(&mut stdout, &mut stdin)?;
373
374    Ok(RunningFilter {
375        child,
376        stdin: Some(stdin),
377        stdout: Some(stdout),
378        caps,
379    })
380}
381
382fn ensure_started(cmd: &str) -> Result<(), String> {
383    let mut reg = process_registry()
384        .lock()
385        .map_err(|_| "filter registry poisoned".to_string())?;
386    if reg.contains_key(cmd) {
387        return Ok(());
388    }
389    let rf = spawn_running(cmd).map_err(|e| e.to_string())?;
390    reg.insert(cmd.to_string(), Mutex::new(rf));
391    Ok(())
392}
393
394fn write_packetized(stdin: &mut ChildStdin, data: &[u8]) -> std::io::Result<()> {
395    let mut off = 0usize;
396    while off < data.len() {
397        let end = (off + LARGE_PACKET_DATA_MAX).min(data.len());
398        write_packet(stdin, &data[off..end])?;
399        off = end;
400    }
401    Ok(())
402}
403
404/// Run clean via long-running filter `cmd` for `path` and `input`.
405pub fn apply_process_clean(cmd: &str, path: &str, input: &[u8]) -> Result<Vec<u8>, String> {
406    ensure_started(cmd)?;
407    let reg = process_registry()
408        .lock()
409        .map_err(|_| "filter registry poisoned".to_string())?;
410    let proc_mutex = reg
411        .get(cmd)
412        .ok_or_else(|| "filter process not registered".to_string())?;
413    let mut rf = proc_mutex
414        .lock()
415        .map_err(|_| "filter process mutex poisoned".to_string())?;
416    if rf.caps & CAP_CLEAN == 0 {
417        return Err("filter process does not support clean".to_string());
418    }
419
420    let mut stdin = rf
421        .stdin
422        .take()
423        .ok_or_else(|| "filter stdin missing".to_string())?;
424    let mut stdout = rf
425        .stdout
426        .take()
427        .ok_or_else(|| "filter stdout missing".to_string())?;
428
429    let result = (|| {
430        write_packet_line(&mut stdin, "command=clean").map_err(|e| e.to_string())?;
431        write_packet_line(&mut stdin, &format!("pathname={path}")).map_err(|e| e.to_string())?;
432        write_flush(&mut stdin).map_err(|e| e.to_string())?;
433        write_packetized(&mut stdin, input).map_err(|e| e.to_string())?;
434        write_flush(&mut stdin).map_err(|e| e.to_string())?;
435
436        let mut st = String::new();
437        read_status(&mut stdout, &mut st).map_err(|e| e.to_string())?;
438        if st != "success" {
439            return Err(format!("filter status: {st}"));
440        }
441        let out = read_packetized(&mut stdout).map_err(|e| e.to_string())?;
442        read_status(&mut stdout, &mut st).map_err(|e| e.to_string())?;
443        if st != "success" {
444            return Err(format!("filter tail status: {st}"));
445        }
446        Ok(out)
447    })();
448
449    rf.stdin = Some(stdin);
450    rf.stdout = Some(stdout);
451    result
452}
453
454/// Run smudge via long-running filter.
455pub fn apply_process_smudge(
456    cmd: &str,
457    path: &str,
458    input: &[u8],
459    meta: Option<&FilterSmudgeMeta>,
460) -> Result<Vec<u8>, String> {
461    ensure_started(cmd)?;
462    let reg = process_registry()
463        .lock()
464        .map_err(|_| "filter registry poisoned".to_string())?;
465    let proc_mutex = reg
466        .get(cmd)
467        .ok_or_else(|| "filter process not registered".to_string())?;
468    let mut rf = proc_mutex
469        .lock()
470        .map_err(|_| "filter process mutex poisoned".to_string())?;
471    let mut stdin = rf
472        .stdin
473        .take()
474        .ok_or_else(|| "filter stdin missing".to_string())?;
475    let mut stdout = rf
476        .stdout
477        .take()
478        .ok_or_else(|| "filter stdout missing".to_string())?;
479
480    let result = (|| {
481        if rf.caps & CAP_SMUDGE == 0 {
482            return Ok(input.to_vec());
483        }
484        write_packet_line(&mut stdin, "command=smudge").map_err(|e| e.to_string())?;
485        write_packet_line(&mut stdin, &format!("pathname={path}")).map_err(|e| e.to_string())?;
486        if let Some(m) = meta {
487            if let Some(r) = &m.ref_name {
488                write_packet_line(&mut stdin, &format!("ref={r}")).map_err(|e| e.to_string())?;
489            }
490            if let Some(t) = &m.treeish_hex {
491                write_packet_line(&mut stdin, &format!("treeish={t}"))
492                    .map_err(|e| e.to_string())?;
493            }
494            if let Some(b) = &m.blob_hex {
495                write_packet_line(&mut stdin, &format!("blob={b}")).map_err(|e| e.to_string())?;
496            }
497        }
498        write_flush(&mut stdin).map_err(|e| e.to_string())?;
499        write_packetized(&mut stdin, input).map_err(|e| e.to_string())?;
500        write_flush(&mut stdin).map_err(|e| e.to_string())?;
501
502        let mut st = String::new();
503        read_status(&mut stdout, &mut st).map_err(|e| e.to_string())?;
504        if st == "delayed" {
505            return Err("delayed checkout not supported by grit process filter".to_string());
506        }
507        if st != "success" {
508            return Err(format!("filter status: {st}"));
509        }
510        let out = read_packetized(&mut stdout).map_err(|e| e.to_string())?;
511        read_status(&mut stdout, &mut st).map_err(|e| e.to_string())?;
512        if st != "success" {
513            return Err(format!("filter tail status: {st}"));
514        }
515        Ok(out)
516    })();
517
518    rf.stdin = Some(stdin);
519    rf.stdout = Some(stdout);
520    result
521}