riley_cms_core/
git.rs

1//! Git Smart HTTP protocol support via git-http-backend CGI
2//!
3//! This module provides functionality to serve Git repositories over HTTP
4//! using the Git Smart HTTP protocol. It works by invoking the system's
5//! `git http-backend` CGI binary.
6//!
7//! The implementation streams request bodies to the CGI process and streams
8//! CGI output back to the client, avoiding buffering large payloads in memory.
9
10use crate::error::{Error, Result};
11use bytes::Bytes;
12use futures_util::StreamExt;
13use std::collections::HashMap;
14use std::io;
15use std::path::Path;
16use std::pin::Pin;
17use std::process::Stdio;
18use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
19use tokio::process::{Child, Command};
20use tokio::task::JoinHandle;
21use tokio::time::{Duration, Instant, timeout};
22
23/// Default maximum time to wait for a git-http-backend process to complete.
24pub const DEFAULT_GIT_CGI_TIMEOUT: Duration = Duration::from_secs(300); // 5 minutes
25use tokio_util::io::ReaderStream;
26
27/// Maximum size of CGI headers before we give up (16 KB)
28const MAX_CGI_HEADER_SIZE: usize = 16 * 1024;
29
30/// Stream of response body bytes from a CGI process.
31pub type BodyStream =
32    Pin<Box<dyn futures_util::Stream<Item = std::result::Result<Bytes, io::Error>> + Send>>;
33
34/// Parsed CGI headers (status code + response headers, no body).
35#[derive(Debug)]
36pub struct GitCgiHeaders {
37    /// HTTP status code (parsed from CGI Status header, defaults to 200)
38    pub status: u16,
39    /// Response headers from the CGI script
40    pub headers: HashMap<String, String>,
41}
42
43/// A streaming CGI response.
44///
45/// The headers have already been parsed from stdout. The `body_stream` yields
46/// the remaining stdout bytes. Call `completion.wait()` after the stream is
47/// consumed to reap the child process and check its exit status.
48pub struct GitCgiStreamResponse {
49    /// Parsed CGI headers
50    pub headers: GitCgiHeaders,
51    /// Stream of body bytes from stdout (after the header separator)
52    pub body_stream: BodyStream,
53    /// Handle to await process completion (for webhook timing)
54    pub completion: GitCgiCompletion,
55}
56
57/// Handle to monitor CGI process completion and collect stderr.
58///
59/// The caller should await `wait()` after the body stream has been consumed
60/// (or dropped) to ensure the process is reaped and stderr is logged.
61pub struct GitCgiCompletion {
62    child: Child,
63    stderr_task: JoinHandle<String>,
64    stdin_task: Option<JoinHandle<std::result::Result<(), Error>>>,
65}
66
67impl GitCgiCompletion {
68    /// Wait for the process to exit. Returns the exit status.
69    ///
70    /// Also joins the stdin streaming task and logs stderr output.
71    /// The `cgi_timeout` parameter bounds the total time for both stdin streaming
72    /// AND child process exit, preventing slow-sending clients from keeping the
73    /// task alive indefinitely.
74    pub async fn wait(mut self, cgi_timeout: Duration) -> Result<std::process::ExitStatus> {
75        let start = Instant::now();
76
77        // Phase 1: Wait for stdin to finish (bounded by cgi_timeout).
78        // A slow-sending client could stall here indefinitely without this timeout.
79        if let Some(stdin_task) = self.stdin_task.take() {
80            match timeout(cgi_timeout, stdin_task).await {
81                Ok(Ok(Err(e))) => {
82                    tracing::warn!("stdin streaming error (non-fatal): {}", e);
83                }
84                Ok(Err(e)) => tracing::warn!("stdin task panicked: {}", e),
85                Ok(Ok(Ok(()))) => {}
86                Err(_elapsed) => {
87                    tracing::error!(
88                        "Git CGI timed out waiting for request body after {}s, killing",
89                        cgi_timeout.as_secs()
90                    );
91                    let _ = self.child.kill().await;
92                    return Err(Error::Git("Git operation timed out".to_string()));
93                }
94            }
95        }
96
97        // Phase 2: Wait for child process to exit with remaining timeout budget.
98        let remaining = cgi_timeout.saturating_sub(start.elapsed());
99        let status = match timeout(remaining, self.child.wait()).await {
100            Ok(result) => result
101                .map_err(|e| Error::Git(format!("Failed to wait on git-http-backend: {}", e)))?,
102            Err(_elapsed) => {
103                tracing::error!(
104                    "Git CGI process timed out after {}s, killing",
105                    cgi_timeout.as_secs()
106                );
107                let _ = self.child.kill().await;
108                return Err(Error::Git("Git operation timed out".to_string()));
109            }
110        };
111
112        let stderr = self
113            .stderr_task
114            .await
115            .unwrap_or_else(|_| String::from("<stderr task panicked>"));
116
117        if !stderr.is_empty() {
118            tracing::warn!("git-http-backend stderr: {}", stderr);
119        }
120
121        Ok(status)
122    }
123}
124
125/// Result of running a Git CGI operation (buffered variant, used in tests only)
126#[cfg(test)]
127#[derive(Debug)]
128pub(crate) struct GitCgiResponse {
129    pub status: u16,
130    pub headers: HashMap<String, String>,
131    pub body: Vec<u8>,
132}
133
134/// Git HTTP backend wrapper
135///
136/// Handles Git Smart HTTP protocol by invoking `git http-backend` CGI.
137pub struct GitBackend {
138    /// Path to the Git repository
139    repo_path: std::path::PathBuf,
140    /// Optional explicit path to git-http-backend binary
141    configured_backend_path: Option<std::path::PathBuf>,
142}
143
144impl GitBackend {
145    /// Create a new Git backend for the given repository path
146    pub fn new(repo_path: impl AsRef<Path>) -> Self {
147        Self {
148            repo_path: repo_path.as_ref().to_path_buf(),
149            configured_backend_path: None,
150        }
151    }
152
153    /// Create a new Git backend with an explicit backend binary path
154    pub fn with_backend_path(
155        repo_path: impl AsRef<Path>,
156        backend_path: Option<std::path::PathBuf>,
157    ) -> Self {
158        Self {
159            repo_path: repo_path.as_ref().to_path_buf(),
160            configured_backend_path: backend_path,
161        }
162    }
163
164    /// Run a Git CGI request with streaming I/O.
165    ///
166    /// The request body is provided as an async stream of byte chunks.
167    /// Returns parsed CGI headers plus a stream of the response body.
168    ///
169    /// # Arguments
170    ///
171    /// * `method` - HTTP method (GET, POST)
172    /// * `path_info` - Path after the Git URL prefix (e.g., "/info/refs")
173    /// * `query_string` - Query string (e.g., "service=git-upload-pack")
174    /// * `content_type` - Content-Type header value (if any)
175    /// * `content_length` - Content-Length from the request (if known)
176    /// * `body_stream` - Stream of request body chunks
177    /// * `max_body_size` - Maximum allowed body size in bytes
178    #[allow(clippy::too_many_arguments)]
179    pub async fn run_cgi(
180        &self,
181        method: &str,
182        path_info: &str,
183        query_string: Option<&str>,
184        content_type: Option<&str>,
185        content_length: Option<u64>,
186        body_stream: impl futures_util::Stream<Item = std::result::Result<Bytes, io::Error>>
187        + Send
188        + Unpin
189        + 'static,
190        max_body_size: u64,
191    ) -> Result<GitCgiStreamResponse> {
192        // Build CGI environment variables
193        let mut env = HashMap::new();
194        env.insert(
195            "GIT_PROJECT_ROOT".to_string(),
196            self.repo_path.to_string_lossy().to_string(),
197        );
198        env.insert("GIT_HTTP_EXPORT_ALL".to_string(), "1".to_string());
199        env.insert("PATH_INFO".to_string(), path_info.to_string());
200        env.insert("REQUEST_METHOD".to_string(), method.to_string());
201
202        if let Some(qs) = query_string {
203            env.insert("QUERY_STRING".to_string(), qs.to_string());
204        }
205
206        if let Some(ct) = content_type {
207            env.insert("CONTENT_TYPE".to_string(), ct.to_string());
208        }
209
210        if let Some(cl) = content_length {
211            env.insert("CONTENT_LENGTH".to_string(), cl.to_string());
212        }
213
214        // Find git-http-backend (use configured path if available)
215        let git_backend = match &self.configured_backend_path {
216            Some(path) => path.to_string_lossy().to_string(),
217            None => find_git_http_backend()?,
218        };
219
220        // Spawn the git-http-backend process
221        let mut child = Command::new(&git_backend)
222            .envs(&env)
223            .stdin(Stdio::piped())
224            .stdout(Stdio::piped())
225            .stderr(Stdio::piped())
226            .spawn()
227            .map_err(|e| Error::Git(format!("Failed to spawn git-http-backend: {}", e)))?;
228
229        // === STDIN STREAMING ===
230        // Spawn a task to stream the request body to stdin with size enforcement.
231        let mut stdin = child
232            .stdin
233            .take()
234            .ok_or_else(|| Error::Git("Failed to open stdin pipe".to_string()))?;
235
236        let stdin_task: JoinHandle<std::result::Result<(), Error>> = tokio::spawn(async move {
237            let mut body_stream = std::pin::pin!(body_stream);
238            let mut total_bytes: u64 = 0;
239
240            while let Some(chunk_result) = body_stream.next().await {
241                let chunk =
242                    chunk_result.map_err(|e| Error::Git(format!("Body stream error: {}", e)))?;
243                total_bytes += chunk.len() as u64;
244                if total_bytes > max_body_size {
245                    return Err(Error::Git(format!(
246                        "Request body too large ({} bytes exceeds max {} bytes)",
247                        total_bytes, max_body_size
248                    )));
249                }
250                if let Err(e) = stdin.write_all(&chunk).await {
251                    // Broken pipe is expected if the child doesn't need all input
252                    // (e.g., GET requests with empty body, or child errored early)
253                    if e.kind() == io::ErrorKind::BrokenPipe {
254                        break;
255                    }
256                    return Err(Error::Git(format!(
257                        "Failed to write to git-http-backend stdin: {}",
258                        e
259                    )));
260                }
261            }
262            // Close stdin to signal EOF to the child
263            let _ = stdin.shutdown().await;
264            Ok(())
265        });
266
267        // === STDERR COLLECTION ===
268        // Spawn a task to buffer stderr (capped at 64KB, typically small).
269        let stderr = child
270            .stderr
271            .take()
272            .ok_or_else(|| Error::Git("Failed to open stderr pipe".to_string()))?;
273
274        let stderr_task: JoinHandle<String> = tokio::spawn(async move {
275            let mut buf = String::new();
276            let mut limited = BufReader::new(stderr).take(64 * 1024);
277            let _ = limited.read_to_string(&mut buf).await;
278            buf
279        });
280
281        // === STDOUT HEADER PARSING ===
282        // Read from stdout until we find the header/body separator.
283        let stdout = child
284            .stdout
285            .take()
286            .ok_or_else(|| Error::Git("Failed to open stdout pipe".to_string()))?;
287
288        let mut stdout_reader = BufReader::new(stdout);
289        let headers = read_cgi_headers(&mut stdout_reader).await?;
290
291        // Check if stdin_task already finished with an error (e.g., body too large).
292        // If so, abort before streaming begins so we can return a proper error response.
293        if stdin_task.is_finished() {
294            match stdin_task.await {
295                Ok(Err(e)) => return Err(e),
296                Err(join_err) => {
297                    return Err(Error::Git(format!("stdin task panicked: {}", join_err)));
298                }
299                Ok(Ok(())) => {
300                    // stdin completed successfully, continue with no stdin_task to track
301                    let body_stream: BodyStream = Box::pin(ReaderStream::new(stdout_reader));
302
303                    return Ok(GitCgiStreamResponse {
304                        headers,
305                        body_stream,
306                        completion: GitCgiCompletion {
307                            child,
308                            stderr_task,
309                            stdin_task: None,
310                        },
311                    });
312                }
313            }
314        }
315
316        // === STDOUT BODY STREAMING ===
317        // The remaining bytes in stdout_reader become the body stream.
318        let body_stream: BodyStream = Box::pin(ReaderStream::new(stdout_reader));
319
320        Ok(GitCgiStreamResponse {
321            headers,
322            body_stream,
323            completion: GitCgiCompletion {
324                child,
325                stderr_task,
326                stdin_task: Some(stdin_task),
327            },
328        })
329    }
330
331    /// Check if the repository exists and is a valid Git repository
332    pub fn is_valid_repo(&self) -> bool {
333        self.repo_path.join(".git").exists() || self.repo_path.join("HEAD").exists()
334    }
335}
336
337/// Read CGI headers from a buffered reader.
338///
339/// Reads line by line until an empty line (the header/body separator) is found.
340/// The reader is left positioned at the start of the body.
341/// Fails if total headers exceed `MAX_CGI_HEADER_SIZE` bytes.
342///
343/// Uses a bounded line reader (`fill_buf`/`consume`) to prevent any single
344/// line from causing unbounded memory allocation.
345async fn read_cgi_headers<R: tokio::io::AsyncBufRead + Unpin>(
346    reader: &mut R,
347) -> Result<GitCgiHeaders> {
348    let mut headers = HashMap::new();
349    let mut status: u16 = 200;
350    let mut total_header_bytes: usize = 0;
351    let mut line_buf = Vec::new();
352
353    loop {
354        line_buf.clear();
355        let found_newline = read_bounded_line(reader, &mut line_buf, MAX_CGI_HEADER_SIZE).await?;
356
357        if line_buf.is_empty() && !found_newline {
358            // EOF before finding separator — treat as headers-only response
359            break;
360        }
361
362        total_header_bytes += line_buf.len() + if found_newline { 1 } else { 0 };
363        if total_header_bytes > MAX_CGI_HEADER_SIZE {
364            return Err(Error::Git(format!(
365                "CGI headers too large (>{} bytes). Possible malformed response.",
366                MAX_CGI_HEADER_SIZE
367            )));
368        }
369
370        // Check for the empty line separator (after trimming \r\n or \n)
371        let trimmed = strip_line_ending(&line_buf);
372        if trimmed.is_empty() {
373            break; // Found the header/body separator
374        }
375
376        // Parse "Key: Value" header line
377        let line_str = String::from_utf8_lossy(trimmed);
378        if let Some((key, value)) = line_str.split_once(':') {
379            let key = key.trim().to_lowercase();
380            let value = value.trim().to_string();
381
382            if key == "status"
383                && let Some(code_str) = value.split_whitespace().next()
384                && let Ok(code) = code_str.parse::<u16>()
385            {
386                status = code;
387            } else {
388                headers.insert(key, value);
389            }
390        }
391    }
392
393    Ok(GitCgiHeaders { status, headers })
394}
395
396/// Read a single line from a buffered reader with a maximum byte limit.
397///
398/// Reads from the reader's internal buffer via `fill_buf`/`consume` to avoid
399/// unbounded memory allocation. Returns `true` if a newline was found, `false`
400/// on EOF. The newline byte itself is NOT included in `buf`.
401async fn read_bounded_line<R: tokio::io::AsyncBufRead + Unpin>(
402    reader: &mut R,
403    buf: &mut Vec<u8>,
404    max_len: usize,
405) -> Result<bool> {
406    loop {
407        let available = reader
408            .fill_buf()
409            .await
410            .map_err(|e| Error::Git(format!("Failed to read CGI headers: {}", e)))?;
411
412        if available.is_empty() {
413            return Ok(false); // EOF
414        }
415
416        // Look for newline in the available data
417        match available.iter().position(|&b| b == b'\n') {
418            Some(pos) => {
419                // Found newline — append everything before it, consume up to and including newline
420                buf.extend_from_slice(&available[..pos]);
421                reader.consume(pos + 1);
422                if buf.len() > max_len {
423                    return Err(Error::Git(format!(
424                        "CGI header line too large (>{} bytes).",
425                        max_len
426                    )));
427                }
428                return Ok(true);
429            }
430            None => {
431                // No newline yet — append all available data and continue
432                buf.extend_from_slice(available);
433                let consumed = available.len();
434                reader.consume(consumed);
435                if buf.len() > max_len {
436                    return Err(Error::Git(format!(
437                        "CGI header line too large (>{} bytes).",
438                        max_len
439                    )));
440                }
441            }
442        }
443    }
444}
445
446/// Strip trailing \r\n or \n from a byte slice.
447fn strip_line_ending(line: &[u8]) -> &[u8] {
448    let mut end = line.len();
449    if end > 0 && line[end - 1] == b'\n' {
450        end -= 1;
451    }
452    if end > 0 && line[end - 1] == b'\r' {
453        end -= 1;
454    }
455    &line[..end]
456}
457
458/// Find the git-http-backend binary
459fn find_git_http_backend() -> Result<String> {
460    // Common locations for git-http-backend
461    let candidates = [
462        "/usr/lib/git-core/git-http-backend",
463        "/usr/libexec/git-core/git-http-backend",
464        "/opt/homebrew/libexec/git-core/git-http-backend",
465        "/usr/local/libexec/git-core/git-http-backend",
466    ];
467
468    for path in &candidates {
469        if std::path::Path::new(path).exists() {
470            return Ok(path.to_string());
471        }
472    }
473
474    // Try to find it via `git --exec-path`
475    let output = std::process::Command::new("git")
476        .arg("--exec-path")
477        .output()
478        .map_err(|e| Error::Git(format!("Failed to run git --exec-path: {}", e)))?;
479
480    if output.status.success() {
481        let exec_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
482        let backend_path = format!("{}/git-http-backend", exec_path);
483        if std::path::Path::new(&backend_path).exists() {
484            return Ok(backend_path);
485        }
486    }
487
488    Err(Error::Git(
489        "git-http-backend not found. Ensure Git is installed with HTTP support.".to_string(),
490    ))
491}
492
493/// Parse CGI response into status, headers, and body (buffered variant for tests)
494#[cfg(test)]
495fn parse_cgi_response(data: &[u8]) -> Result<GitCgiResponse> {
496    let mut headers = HashMap::new();
497    let mut status = 200u16;
498    let mut body_start = 0;
499
500    // Find the header/body separator (\r\n\r\n or \n\n)
501    let mut i = 0;
502    while i < data.len() {
503        if i + 3 < data.len() && &data[i..i + 4] == b"\r\n\r\n" {
504            body_start = i + 4;
505            break;
506        }
507        if i + 1 < data.len() && &data[i..i + 2] == b"\n\n" {
508            body_start = i + 2;
509            break;
510        }
511        i += 1;
512    }
513
514    if body_start > 0 {
515        let header_bytes = &data[..body_start];
516        let header_str = String::from_utf8_lossy(header_bytes);
517
518        for line in header_str.lines() {
519            if line.is_empty() {
520                continue;
521            }
522            if let Some((key, value)) = line.split_once(':') {
523                let key = key.trim().to_lowercase();
524                let value = value.trim().to_string();
525
526                if key == "status"
527                    && let Some(code_str) = value.split_whitespace().next()
528                    && let Ok(code) = code_str.parse::<u16>()
529                {
530                    status = code;
531                } else {
532                    headers.insert(key, value);
533                }
534            }
535        }
536    }
537
538    let body = if body_start > 0 && body_start < data.len() {
539        data[body_start..].to_vec()
540    } else {
541        Vec::new()
542    };
543
544    Ok(GitCgiResponse {
545        status,
546        headers,
547        body,
548    })
549}
550
551#[cfg(test)]
552mod tests {
553    use super::*;
554
555    #[test]
556    fn test_parse_cgi_response_basic() {
557        let data = b"Content-Type: application/x-git-upload-pack-advertisement\r\n\r\nHello";
558        let response = parse_cgi_response(data).unwrap();
559
560        assert_eq!(response.status, 200);
561        assert_eq!(
562            response.headers.get("content-type"),
563            Some(&"application/x-git-upload-pack-advertisement".to_string())
564        );
565        assert_eq!(response.body, b"Hello");
566    }
567
568    #[test]
569    fn test_parse_cgi_response_with_status() {
570        let data = b"Status: 404 Not Found\r\nContent-Type: text/plain\r\n\r\nNot found";
571        let response = parse_cgi_response(data).unwrap();
572
573        assert_eq!(response.status, 404);
574        assert_eq!(response.body, b"Not found");
575    }
576
577    #[test]
578    fn test_parse_cgi_response_unix_newlines() {
579        let data = b"Content-Type: text/plain\n\nBody here";
580        let response = parse_cgi_response(data).unwrap();
581
582        assert_eq!(response.status, 200);
583        assert_eq!(response.body, b"Body here");
584    }
585
586    #[test]
587    fn test_git_backend_is_valid_repo_bare() {
588        let temp_dir = tempfile::TempDir::new().unwrap();
589        let repo_path = temp_dir.path();
590
591        // Not a repo yet
592        let backend = GitBackend::new(repo_path);
593        assert!(!backend.is_valid_repo());
594
595        // Create a bare repo indicator
596        std::fs::write(repo_path.join("HEAD"), "ref: refs/heads/main").unwrap();
597        assert!(backend.is_valid_repo());
598    }
599
600    #[test]
601    fn test_git_backend_is_valid_repo_normal() {
602        let temp_dir = tempfile::TempDir::new().unwrap();
603        let repo_path = temp_dir.path();
604
605        // Create .git directory
606        std::fs::create_dir(repo_path.join(".git")).unwrap();
607
608        let backend = GitBackend::new(repo_path);
609        assert!(backend.is_valid_repo());
610    }
611
612    #[tokio::test]
613    async fn test_read_cgi_headers_basic() {
614        let data = b"Content-Type: application/x-git-upload-pack-advertisement\r\n\r\n";
615        let mut reader = tokio::io::BufReader::new(std::io::Cursor::new(data.to_vec()));
616        let headers = read_cgi_headers(&mut reader).await.unwrap();
617
618        assert_eq!(headers.status, 200);
619        assert_eq!(
620            headers.headers.get("content-type"),
621            Some(&"application/x-git-upload-pack-advertisement".to_string())
622        );
623    }
624
625    #[tokio::test]
626    async fn test_read_cgi_headers_with_status() {
627        let data = b"Status: 403 Forbidden\r\nContent-Type: text/plain\r\n\r\n";
628        let mut reader = tokio::io::BufReader::new(std::io::Cursor::new(data.to_vec()));
629        let headers = read_cgi_headers(&mut reader).await.unwrap();
630
631        assert_eq!(headers.status, 403);
632        assert_eq!(
633            headers.headers.get("content-type"),
634            Some(&"text/plain".to_string())
635        );
636    }
637
638    #[tokio::test]
639    async fn test_read_cgi_headers_unix_newlines() {
640        let data = b"Content-Type: text/plain\n\nBody here";
641        let mut reader = tokio::io::BufReader::new(std::io::Cursor::new(data.to_vec()));
642        let headers = read_cgi_headers(&mut reader).await.unwrap();
643
644        assert_eq!(headers.status, 200);
645        assert_eq!(
646            headers.headers.get("content-type"),
647            Some(&"text/plain".to_string())
648        );
649    }
650}