Skip to main content

gatel_core/proxy/
cgi.rs

1//! CGI (Common Gateway Interface) handler.
2//!
3//! Executes a CGI script as a subprocess, setting the standard CGI environment
4//! variables, piping the request body to stdin, and parsing the script's stdout
5//! as a CGI-style response (headers followed by body).
6
7use std::collections::HashMap;
8use std::path::PathBuf;
9use std::process::Stdio;
10
11use http::{Response, StatusCode};
12use http_body_util::BodyExt;
13use tokio::io::AsyncWriteExt;
14use tokio::process::Command;
15use tracing::warn;
16
17use crate::{Body, ProxyError, full_body, goals};
18
19/// CGI handler: executes scripts rooted at a given directory.
20pub struct CgiHandler {
21    root: PathBuf,
22    /// Extra environment variables injected into every CGI invocation.
23    env: HashMap<String, String>,
24}
25
26impl CgiHandler {
27    pub fn new(root: String, env: HashMap<String, String>) -> Self {
28        Self {
29            root: PathBuf::from(root),
30            env,
31        }
32    }
33}
34
35#[salvo::async_trait]
36impl salvo::Handler for CgiHandler {
37    async fn handle(
38        &self,
39        req: &mut salvo::Request,
40        _depot: &mut salvo::Depot,
41        res: &mut salvo::Response,
42        ctrl: &mut salvo::FlowCtrl,
43    ) {
44        let client_addr = crate::hoops::client_addr(req);
45        let request = match goals::strip_request(req) {
46            Ok(r) => r,
47            Err(e) => {
48                goals::merge_response(res, e.into_response());
49                ctrl.skip_rest();
50                return;
51            }
52        };
53        let response = self
54            .run(request, client_addr)
55            .await
56            .unwrap_or_else(|e| e.into_response());
57        goals::merge_response(res, response);
58        ctrl.skip_rest();
59    }
60}
61
62impl CgiHandler {
63    async fn run(
64        &self,
65        request: http::Request<crate::Body>,
66        client_addr: std::net::SocketAddr,
67    ) -> Result<Response<crate::Body>, ProxyError> {
68        let path = request.uri().path().to_string();
69        let script_path = self.root.join(path.trim_start_matches('/'));
70
71        if !script_path.exists() {
72            return Ok(Response::builder()
73                .status(StatusCode::NOT_FOUND)
74                .body(full_body("Not Found"))?);
75        }
76
77        // Collect the request body before decomposing the request.
78        let (parts, body) = request.into_parts();
79        let body_bytes = body
80            .collect()
81            .await
82            .map_err(|e| ProxyError::Internal(format!("body collect: {e}")))?
83            .to_bytes();
84
85        // Build the child process with CGI environment variables.
86        let mut cmd = Command::new(&script_path);
87        cmd.stdin(Stdio::piped())
88            .stdout(Stdio::piped())
89            .stderr(Stdio::piped());
90
91        // Standard CGI environment variables.
92        cmd.env("REQUEST_METHOD", parts.method.as_str());
93        cmd.env("QUERY_STRING", parts.uri.query().unwrap_or(""));
94        cmd.env("CONTENT_LENGTH", body_bytes.len().to_string());
95        cmd.env(
96            "CONTENT_TYPE",
97            parts
98                .headers
99                .get("content-type")
100                .and_then(|v| v.to_str().ok())
101                .unwrap_or(""),
102        );
103        cmd.env("SERVER_PROTOCOL", format!("{:?}", parts.version));
104        cmd.env("SERVER_SOFTWARE", "gatel");
105        cmd.env("GATEWAY_INTERFACE", "CGI/1.1");
106        cmd.env("SCRIPT_NAME", &path);
107        cmd.env("SCRIPT_FILENAME", script_path.to_string_lossy().to_string());
108        cmd.env("REQUEST_URI", parts.uri.to_string());
109        cmd.env("PATH_INFO", &path);
110        cmd.env("REMOTE_ADDR", client_addr.ip().to_string());
111        cmd.env("REMOTE_PORT", client_addr.port().to_string());
112
113        if let Some(host) = parts.headers.get("host").and_then(|v| v.to_str().ok()) {
114            cmd.env("SERVER_NAME", host.split(':').next().unwrap_or(host));
115            if let Some(port) = host.split(':').nth(1) {
116                cmd.env("SERVER_PORT", port);
117            }
118        }
119
120        // Translate HTTP headers to HTTP_* environment variables.
121        for (name, value) in &parts.headers {
122            if let Ok(v) = value.to_str() {
123                let env_name = format!("HTTP_{}", name.as_str().to_uppercase().replace('-', "_"));
124                cmd.env(&env_name, v);
125            }
126        }
127
128        // Inject custom environment variables from configuration.
129        for (k, v) in &self.env {
130            cmd.env(k, v);
131        }
132
133        let mut child = cmd.spawn().map_err(|e| {
134            ProxyError::Internal(format!(
135                "failed to spawn CGI script {}: {e}",
136                script_path.display()
137            ))
138        })?;
139
140        // Write request body to stdin, then close to signal EOF.
141        if let Some(mut stdin) = child.stdin.take() {
142            stdin.write_all(&body_bytes).await.ok();
143            drop(stdin);
144        }
145
146        let output = child.wait_with_output().await.map_err(|e| {
147            ProxyError::Internal(format!(
148                "failed to read CGI output from {}: {e}",
149                script_path.display()
150            ))
151        })?;
152
153        if !output.stderr.is_empty() {
154            let stderr = String::from_utf8_lossy(&output.stderr);
155            warn!(
156                script = %script_path.display(),
157                stderr = %stderr,
158                "CGI script wrote to stderr"
159            );
160        }
161
162        parse_cgi_response(&output.stdout)
163    }
164}
165
166// ---------------------------------------------------------------------------
167// CGI response parsing (shared with SCGI)
168// ---------------------------------------------------------------------------
169
170/// Parse the raw CGI output (headers `\r\n\r\n` or `\n\n` separated from body)
171/// into an HTTP response.
172///
173/// The output has the form:
174/// ```text
175/// Status: 200 OK\r\n
176/// Content-Type: text/html\r\n
177/// \r\n
178/// <html>...</html>
179/// ```
180///
181/// The `Status` pseudo-header is consumed and used to set the response status.
182/// All other headers are forwarded verbatim.  If `Status` is absent, 200 is assumed.
183pub fn parse_cgi_response(output: &[u8]) -> Result<Response<Body>, ProxyError> {
184    // Prefer \r\n\r\n as the separator; fall back to \n\n for lenient CGI scripts.
185    let (header_bytes, body_bytes) = if let Some(pos) = find_subsequence(output, b"\r\n\r\n") {
186        (&output[..pos], &output[pos + 4..])
187    } else if let Some(pos) = find_subsequence(output, b"\n\n") {
188        (&output[..pos], &output[pos + 2..])
189    } else {
190        // No separator found — treat the whole output as a body with no headers.
191        (&[] as &[u8], output)
192    };
193
194    let header_str = String::from_utf8_lossy(header_bytes);
195    let mut status = StatusCode::OK;
196    let mut builder = Response::builder();
197
198    for line in header_str.lines() {
199        if line.is_empty() {
200            continue;
201        }
202        if let Some(colon_pos) = line.find(':') {
203            let name = line[..colon_pos].trim();
204            let value = line[colon_pos + 1..].trim();
205
206            if name.eq_ignore_ascii_case("status") {
207                // Parse "200 OK" or just "200".
208                let code_str = value.split_whitespace().next().unwrap_or("200");
209                if let Ok(code) = code_str.parse::<u16>() {
210                    status = StatusCode::from_u16(code).unwrap_or(StatusCode::OK);
211                }
212            } else {
213                // Forward any other header to the response.
214                if let (Ok(hn), Ok(hv)) = (
215                    name.parse::<http::header::HeaderName>(),
216                    value.parse::<http::header::HeaderValue>(),
217                ) {
218                    builder = builder.header(hn, hv);
219                }
220            }
221        }
222    }
223
224    builder = builder.status(status);
225    let body = full_body(bytes::Bytes::copy_from_slice(body_bytes));
226    builder
227        .body(body)
228        .map_err(|e| ProxyError::Internal(format!("failed to build CGI response: {e}")))
229}
230
231/// Find the first occurrence of `needle` in `haystack`, returning its start index.
232fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
233    haystack
234        .windows(needle.len())
235        .position(|window| window == needle)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_parse_cgi_response_with_status() {
244        let data = b"Status: 404 Not Found\r\nContent-Type: text/plain\r\n\r\nNot here";
245        let resp = parse_cgi_response(data).unwrap();
246        assert_eq!(resp.status(), 404);
247        assert_eq!(resp.headers().get("content-type").unwrap(), "text/plain");
248    }
249
250    #[test]
251    fn test_parse_cgi_response_default_status() {
252        let data = b"Content-Type: text/html\r\n\r\n<h1>Hello</h1>";
253        let resp = parse_cgi_response(data).unwrap();
254        assert_eq!(resp.status(), 200);
255    }
256
257    #[test]
258    fn test_parse_cgi_response_lf_separator() {
259        // Some CGI scripts use bare \n\n instead of \r\n\r\n.
260        let data = b"Content-Type: text/plain\n\nHello";
261        let resp = parse_cgi_response(data).unwrap();
262        assert_eq!(resp.status(), 200);
263    }
264
265    #[test]
266    fn test_parse_cgi_response_no_headers() {
267        let data = b"just a body with no headers";
268        let resp = parse_cgi_response(data).unwrap();
269        assert_eq!(resp.status(), 200);
270    }
271}