Skip to main content

tess/
preprocess.rs

1//! Input preprocessor: pipe the source file through a user-defined command
2//! before tess reads it. Supports `--preprocess CMD` (CLI) and `$LESSOPEN`
3//! (env var). Pipe-mode only — the command must start with `|`, and `%s`
4//! is substituted with the source file path (shell-quoted).
5
6use std::path::Path;
7use std::process::{Command, Stdio};
8
9#[derive(Debug, Clone)]
10pub struct Preprocessor {
11    /// The command string with the `|` prefix stripped. Still contains `%s`
12    /// as a placeholder for the file path.
13    pub command: String,
14}
15
16impl Preprocessor {
17    /// Parse a raw value (from CLI flag or env var). The value must start
18    /// with `|`; that prefix is stripped from the stored `command`.
19    pub fn parse(raw: &str) -> Result<Self, String> {
20        let Some(rest) = raw.strip_prefix('|') else {
21            return Err(format!(
22                "preprocess: '{}' must start with '|' (tempfile mode is not supported)",
23                raw
24            ));
25        };
26        if rest.trim().is_empty() {
27            return Err("preprocess: command after '|' is empty".to_string());
28        }
29        Ok(Self { command: rest.to_string() })
30    }
31}
32
33#[derive(Debug)]
34pub enum PreprocessResult {
35    /// Command succeeded; use these bytes as the source.
36    Bytes(Vec<u8>),
37    /// Command failed (non-zero exit, empty output, or spawn error). The
38    /// caller should fall back to the raw file and surface the stderr.
39    Failed { stderr: String },
40}
41
42/// Run the preprocessor against `file_path`. Substitutes `%s` with the
43/// path (shell-quoted via single quotes with internal quote escaping).
44/// Spawns via `sh -c`.
45pub fn run(p: &Preprocessor, file_path: &Path) -> PreprocessResult {
46    let path_str = file_path.to_string_lossy();
47    let quoted = shell_quote(&path_str);
48    let cmd_line = p.command.replace("%s", &quoted);
49
50    let output_result = Command::new("/bin/sh")
51        .arg("-c")
52        .arg(&cmd_line)
53        .stdin(Stdio::null())
54        .stdout(Stdio::piped())
55        .stderr(Stdio::piped())
56        .output();
57
58    match output_result {
59        Ok(output) => {
60            if !output.status.success() {
61                let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
62                let msg = if stderr.is_empty() {
63                    format!("exited with status {}", output.status)
64                } else {
65                    stderr
66                };
67                return PreprocessResult::Failed { stderr: msg };
68            }
69            if output.stdout.is_empty() {
70                return PreprocessResult::Failed {
71                    stderr: "preprocessor produced no output".to_string(),
72                };
73            }
74            PreprocessResult::Bytes(output.stdout)
75        }
76        Err(e) => PreprocessResult::Failed {
77            stderr: format!("spawn failed: {e}"),
78        },
79    }
80}
81
82/// Single-quote a string for safe inclusion in a sh -c command. Any
83/// internal single quote is closed, escaped, and reopened: `it's` becomes
84/// `'it'\''s'`.
85fn shell_quote(s: &str) -> String {
86    let mut out = String::with_capacity(s.len() + 2);
87    out.push('\'');
88    for c in s.chars() {
89        if c == '\'' {
90            out.push_str("'\\''");
91        } else {
92            out.push(c);
93        }
94    }
95    out.push('\'');
96    out
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102    use std::io::Write;
103
104    #[test]
105    fn parse_strips_pipe_prefix() {
106        let p = Preprocessor::parse("|cat %s").unwrap();
107        assert_eq!(p.command, "cat %s");
108    }
109
110    #[test]
111    fn parse_rejects_value_without_pipe() {
112        let err = Preprocessor::parse("cat %s").unwrap_err();
113        assert!(err.contains("must start with '|'"));
114    }
115
116    #[test]
117    fn parse_rejects_pipe_with_empty_command() {
118        let err = Preprocessor::parse("|").unwrap_err();
119        assert!(err.contains("empty"));
120        let err = Preprocessor::parse("|   ").unwrap_err();
121        assert!(err.contains("empty"));
122    }
123
124    #[test]
125    fn run_cat_on_fixture_returns_bytes() {
126        let mut tmp = tempfile::NamedTempFile::new().unwrap();
127        tmp.write_all(b"hello world\n").unwrap();
128        let p = Preprocessor::parse("|cat %s").unwrap();
129        match run(&p, tmp.path()) {
130            PreprocessResult::Bytes(b) => assert_eq!(b, b"hello world\n"),
131            PreprocessResult::Failed { stderr } => panic!("expected Bytes, got Failed: {stderr}"),
132        }
133    }
134
135    #[test]
136    fn run_false_returns_failed() {
137        let mut tmp = tempfile::NamedTempFile::new().unwrap();
138        tmp.write_all(b"x").unwrap();
139        let p = Preprocessor::parse("|false").unwrap();
140        match run(&p, tmp.path()) {
141            PreprocessResult::Failed { .. } => {}
142            PreprocessResult::Bytes(_) => panic!("expected Failed"),
143        }
144    }
145
146    #[test]
147    fn run_missing_command_returns_failed() {
148        let mut tmp = tempfile::NamedTempFile::new().unwrap();
149        tmp.write_all(b"x").unwrap();
150        let p = Preprocessor::parse("|definitely-not-a-real-command-x9z %s").unwrap();
151        match run(&p, tmp.path()) {
152            PreprocessResult::Failed { stderr } => {
153                assert!(!stderr.is_empty(), "stderr should describe the error");
154            }
155            PreprocessResult::Bytes(_) => panic!("expected Failed"),
156        }
157    }
158
159    #[test]
160    fn run_empty_stdout_returns_failed() {
161        let mut tmp = tempfile::NamedTempFile::new().unwrap();
162        tmp.write_all(b"x").unwrap();
163        let p = Preprocessor::parse("|true").unwrap();
164        match run(&p, tmp.path()) {
165            PreprocessResult::Failed { stderr } => {
166                assert!(stderr.contains("no output"));
167            }
168            PreprocessResult::Bytes(_) => panic!("expected Failed"),
169        }
170    }
171
172    #[test]
173    fn run_substitutes_path_with_spaces_safely() {
174        let dir = tempfile::tempdir().unwrap();
175        let path = dir.path().join("name with spaces.txt");
176        std::fs::write(&path, b"content\n").unwrap();
177        let p = Preprocessor::parse("|cat %s").unwrap();
178        match run(&p, &path) {
179            PreprocessResult::Bytes(b) => assert_eq!(b, b"content\n"),
180            PreprocessResult::Failed { stderr } => panic!("expected Bytes, got: {stderr}"),
181        }
182    }
183
184    #[test]
185    fn shell_quote_handles_single_quote() {
186        assert_eq!(shell_quote("it's"), "'it'\\''s'");
187    }
188}