Skip to main content

drft/parsers/
script.rs

1use super::{Parser, RawLink};
2use std::io::Write;
3use std::process::{Command, Stdio};
4use std::time::Duration;
5
6/// Script-based parser. Runs an external command that receives the file path
7/// on stdin and emits NDJSON links on stdout.
8pub struct ScriptParser {
9    pub parser_name: String,
10    pub glob: globset::GlobMatcher,
11    pub type_filter: Option<Vec<String>>,
12    pub command: String,
13    pub timeout_ms: u64,
14}
15
16impl Parser for ScriptParser {
17    fn name(&self) -> &str {
18        &self.parser_name
19    }
20
21    fn matches(&self, path: &str) -> bool {
22        let filename = path.rsplit('/').next().unwrap_or(path);
23        self.glob.is_match(filename)
24    }
25
26    fn parse(&self, path: &str, _content: &str) -> Vec<RawLink> {
27        match self.run_script(path) {
28            Ok(links) => links,
29            Err(e) => {
30                eprintln!("warn: parser {}: {path}: {e}", self.parser_name);
31                Vec::new()
32            }
33        }
34    }
35}
36
37impl ScriptParser {
38    fn run_script(&self, path: &str) -> anyhow::Result<Vec<RawLink>> {
39        let mut child = Command::new("sh")
40            .arg("-c")
41            .arg(&self.command)
42            .stdin(Stdio::piped())
43            .stdout(Stdio::piped())
44            .stderr(Stdio::piped())
45            .spawn()?;
46
47        // Send file path on stdin
48        if let Some(mut stdin) = child.stdin.take() {
49            let _ = stdin.write_all(path.as_bytes());
50        }
51
52        // Wait with timeout
53        let output = match wait_with_timeout(&mut child, Duration::from_millis(self.timeout_ms)) {
54            Ok(output) => output,
55            Err(_) => {
56                let _ = child.kill();
57                anyhow::bail!("timed out after {}ms", self.timeout_ms);
58            }
59        };
60
61        if !output.status.success() {
62            let code = output.status.code().unwrap_or(-1);
63            anyhow::bail!("exited with code {code}");
64        }
65
66        let stdout = String::from_utf8_lossy(&output.stdout);
67        let mut links = Vec::new();
68
69        for line in stdout.lines() {
70            let line = line.trim();
71            if line.is_empty() {
72                continue;
73            }
74            match serde_json::from_str::<ScriptLink>(line) {
75                Ok(sl) => {
76                    links.push(RawLink {
77                        target: sl.target,
78                        link_type: sl.link_type,
79                        is_external: false,
80                    });
81                }
82                Err(e) => {
83                    eprintln!(
84                        "warn: parser {}: malformed JSON line: {e}",
85                        self.parser_name
86                    );
87                }
88            }
89        }
90
91        // Apply type filter
92        if let Some(ref types) = self.type_filter {
93            links.retain(|l| types.iter().any(|t| t == &l.link_type));
94        }
95
96        Ok(links)
97    }
98}
99
100#[derive(serde::Deserialize)]
101struct ScriptLink {
102    target: String,
103    #[serde(rename = "type")]
104    link_type: String,
105}
106
107fn wait_with_timeout(
108    child: &mut std::process::Child,
109    timeout: Duration,
110) -> Result<std::process::Output, ()> {
111    // Simple polling approach — fine for file-level parsing timeouts
112    let start = std::time::Instant::now();
113    loop {
114        match child.try_wait() {
115            Ok(Some(status)) => {
116                let stdout = child
117                    .stdout
118                    .take()
119                    .map(|mut s| {
120                        let mut buf = Vec::new();
121                        std::io::Read::read_to_end(&mut s, &mut buf).ok();
122                        buf
123                    })
124                    .unwrap_or_default();
125                let stderr = child
126                    .stderr
127                    .take()
128                    .map(|mut s| {
129                        let mut buf = Vec::new();
130                        std::io::Read::read_to_end(&mut s, &mut buf).ok();
131                        buf
132                    })
133                    .unwrap_or_default();
134                return Ok(std::process::Output {
135                    status,
136                    stdout,
137                    stderr,
138                });
139            }
140            Ok(None) => {
141                if start.elapsed() > timeout {
142                    return Err(());
143                }
144                std::thread::sleep(Duration::from_millis(50));
145            }
146            Err(_) => return Err(()),
147        }
148    }
149}