Skip to main content

solid_pod_rs_git/
service.rs

1//! Binder-agnostic Git HTTP service — spawns the system
2//! `git http-backend` CGI and shuttles stdin/stdout between it and
3//! the HTTP layer.
4//!
5//! Mirrors JSS `src/handlers/git.js` lines 95-268 (`handleGit`) end
6//! to end. The key design choices, all pulled straight from JSS:
7//!
8//! * `GIT_PROJECT_ROOT = repo_root`, `PATH_INFO = request path`. The
9//!   CGI walks `GIT_PROJECT_ROOT + PATH_INFO` internally.
10//! * `GIT_HTTP_EXPORT_ALL` set (empty value, just defined) so all
11//!   repos under the root are read-exportable.
12//! * `GIT_HTTP_RECEIVE_PACK=true` so push is enabled (JSS line 157).
13//! * `GIT_CONFIG_PARAMETERS` injects `uploadpack.allowTipSHA1InWant`
14//!   to match JSS line 158.
15//! * For non-bare repos we set `GIT_DIR` to the `.git` child (JSS
16//!   lines 168-170).
17//! * We parse CGI headers from stdout, separate them from body on
18//!   `\r\n\r\n` (fall back to `\n\n`), and convert the first `Status:`
19//!   header into the HTTP response status.
20
21use std::collections::HashMap;
22use std::path::{Path, PathBuf};
23use std::process::Stdio;
24use std::sync::Arc;
25
26use bytes::Bytes;
27use tokio::io::{AsyncReadExt, AsyncWriteExt};
28use tokio::process::Command;
29
30use crate::auth::{AuthError, GitAuth};
31use crate::config::{apply_write_config, find_git_dir};
32use crate::error::GitError;
33use crate::guard::{extract_repo_slug, path_safe};
34
35/// Path to the CGI binary shipped with git. Configurable via
36/// `GIT_HTTP_BACKEND_PATH` env var at service-startup time (the
37/// default matches Debian/Ubuntu).
38pub const DEFAULT_GIT_HTTP_BACKEND: &str = "/usr/lib/git-core/git-http-backend";
39
40/// Opaque HTTP request shape consumed by the service.
41///
42/// The crate stays intentionally binder-agnostic — callers (axum,
43/// actix-web, hyper raw, …) translate their native request type into
44/// this struct before calling `handle`.
45#[derive(Debug, Clone)]
46pub struct GitRequest {
47    /// e.g. `"GET"`, `"POST"`, `"OPTIONS"`.
48    pub method: String,
49    /// The URL path (`"/alice/repo/info/refs"`), already
50    /// percent-decoded.
51    pub path: String,
52    /// The raw query string without the leading `?`.
53    pub query: String,
54    /// All request headers as `(name, value)` tuples. Name is
55    /// compared case-insensitively by the service.
56    pub headers: Vec<(String, String)>,
57    /// Request body (empty for GETs).
58    pub body: Bytes,
59    /// Scheme + host (`"https://pod.example.com"`) — used only to
60    /// reconstruct the URL that NIP-98 verification checks. If None,
61    /// we fall back to `http://localhost`.
62    pub host_url: Option<String>,
63}
64
65impl GitRequest {
66    /// Reconstruct the canonical URL that a NIP-98 `u` tag is
67    /// expected to point at.
68    pub fn auth_url(&self) -> String {
69        let base = self
70            .host_url
71            .clone()
72            .unwrap_or_else(|| "http://localhost".to_string());
73        if self.query.is_empty() {
74            format!("{base}{}", self.path)
75        } else {
76            format!("{base}{}?{}", self.path, self.query)
77        }
78    }
79
80    /// `true` if this request requires a successful auth check (push).
81    #[must_use]
82    pub fn is_write(&self) -> bool {
83        self.path.contains("/git-receive-pack")
84            || self.query.contains("service=git-receive-pack")
85    }
86}
87
88/// CGI response to return to the HTTP layer.
89#[derive(Debug, Clone)]
90pub struct GitResponse {
91    /// HTTP status (derived from the CGI `Status:` header, or 200 by
92    /// default).
93    pub status: u16,
94    /// All response headers emitted by the CGI plus CORS headers.
95    pub headers: Vec<(String, String)>,
96    /// Body bytes — already includes the CGI body payload.
97    pub body: Bytes,
98}
99
100impl GitResponse {
101    /// Build a simple error response (no CGI invocation).
102    #[must_use]
103    pub fn error(status: u16, msg: impl Into<String>) -> Self {
104        let msg = msg.into();
105        let body = Bytes::from(format!("{{\"error\":\"{msg}\"}}"));
106        Self {
107            status,
108            headers: vec![
109                ("content-type".into(), "application/json".into()),
110                ("access-control-allow-origin".into(), "*".into()),
111            ],
112            body,
113        }
114    }
115}
116
117/// The Git HTTP service.
118#[derive(Clone)]
119pub struct GitHttpService {
120    repo_root: PathBuf,
121    auth: Option<Arc<dyn GitAuth>>,
122    backend_path: PathBuf,
123}
124
125impl std::fmt::Debug for GitHttpService {
126    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127        f.debug_struct("GitHttpService")
128            .field("repo_root", &self.repo_root)
129            .field("auth", &self.auth.is_some())
130            .field("backend_path", &self.backend_path)
131            .finish()
132    }
133}
134
135impl GitHttpService {
136    /// Build a service rooted at `repo_root`. All repos served must
137    /// live under this directory.
138    #[must_use]
139    pub fn new(repo_root: PathBuf) -> Self {
140        let backend = std::env::var("GIT_HTTP_BACKEND_PATH")
141            .map(PathBuf::from)
142            .unwrap_or_else(|_| PathBuf::from(DEFAULT_GIT_HTTP_BACKEND));
143        Self {
144            repo_root,
145            auth: None,
146            backend_path: backend,
147        }
148    }
149
150    /// Override the default CGI binary path.
151    #[must_use]
152    pub fn with_backend_path(mut self, path: PathBuf) -> Self {
153        self.backend_path = path;
154        self
155    }
156
157    /// Plug in an authoriser. Without one, write requests still
158    /// succeed — the service becomes an anonymous-push setup, which
159    /// is the behaviour JSS uses when no `handleAuth` pre-hook fires.
160    #[must_use]
161    pub fn with_auth<A: GitAuth + 'static>(mut self, auth: A) -> Self {
162        self.auth = Some(Arc::new(auth));
163        self
164    }
165
166    /// Same as [`with_auth`] but takes a pre-boxed Arc.
167    #[must_use]
168    pub fn with_auth_arc(mut self, auth: Arc<dyn GitAuth>) -> Self {
169        self.auth = Some(auth);
170        self
171    }
172
173    /// Handle an incoming Git HTTP request.
174    pub async fn handle(&self, req: GitRequest) -> Result<GitResponse, GitError> {
175        // CORS preflight — JSS lines 97-102.
176        if req.method.eq_ignore_ascii_case("OPTIONS") {
177            return Ok(GitResponse {
178                status: 200,
179                headers: vec![
180                    ("access-control-allow-origin".into(), "*".into()),
181                    (
182                        "access-control-allow-methods".into(),
183                        "GET, POST, OPTIONS".into(),
184                    ),
185                    (
186                        "access-control-allow-headers".into(),
187                        "Content-Type, Authorization".into(),
188                    ),
189                ],
190                body: Bytes::new(),
191            });
192        }
193
194        // 1. Parse + guard the repo path.
195        let slug = extract_repo_slug(&req.path);
196        let repo_abs = if slug == "." {
197            self.repo_root.canonicalize()?
198        } else {
199            path_safe(&self.repo_root, &slug)?
200        };
201
202        // 2. Find the git dir. Missing => 404.
203        let git_dir = match find_git_dir(&repo_abs)? {
204            Some(g) => g,
205            None => {
206                return Err(GitError::NotARepository(slug));
207            }
208        };
209
210        // 3. Auth for writes (JSS: the route-level `preValidation`
211        //    hook on `/git-receive-pack` calls `handleAuth`; we fold
212        //    that into a single check here).
213        let mut remote_user = String::new();
214        if req.is_write() {
215            let auth = self
216                .auth
217                .as_ref()
218                .ok_or_else(|| GitError::Unauthorised("no auth provider configured".into()))?;
219            match auth.authorise(&req).await {
220                Ok(id) => remote_user = id,
221                Err(AuthError::Missing) => {
222                    return Err(GitError::Unauthorised("missing Authorization".into()));
223                }
224                Err(e) => return Err(GitError::Auth(e)),
225            }
226        }
227
228        // 4. Apply the receive-pack config mutators on writes. Errors
229        //    are best-effort (JSS swallows them too).
230        if req.is_write() {
231            let _ = apply_write_config(&git_dir, &repo_abs).await;
232        }
233
234        // 5. Spawn the CGI and shuttle request/response bytes.
235        spawn_cgi(&self.backend_path, &self.repo_root, &git_dir, &remote_user, req).await
236    }
237}
238
239/// Core CGI driver — shared by all routes.
240async fn spawn_cgi(
241    backend: &Path,
242    repo_root: &Path,
243    git_dir: &crate::config::GitDir,
244    remote_user: &str,
245    req: GitRequest,
246) -> Result<GitResponse, GitError> {
247    // Assemble CGI env. We deliberately start from an empty env and
248    // only inherit PATH (to locate git subcommands the backend itself
249    // shells out to) — this matches the spirit of JSS which spreads
250    // `process.env` but we narrow it for defence-in-depth.
251    let mut env: HashMap<String, String> = HashMap::new();
252    if let Ok(path) = std::env::var("PATH") {
253        env.insert("PATH".into(), path);
254    }
255
256    env.insert(
257        "GIT_PROJECT_ROOT".into(),
258        repo_root
259            .canonicalize()
260            .unwrap_or_else(|_| repo_root.to_path_buf())
261            .to_string_lossy()
262            .into_owned(),
263    );
264    env.insert("GIT_HTTP_EXPORT_ALL".into(), String::new());
265    env.insert("GIT_HTTP_RECEIVE_PACK".into(), "true".into());
266    env.insert(
267        "GIT_CONFIG_PARAMETERS".into(),
268        "'uploadpack.allowTipSHA1InWant=true'".into(),
269    );
270    env.insert("PATH_INFO".into(), req.path.clone());
271    env.insert("REQUEST_METHOD".into(), req.method.to_uppercase());
272    env.insert("QUERY_STRING".into(), req.query.clone());
273    env.insert("REMOTE_USER".into(), remote_user.to_string());
274
275    for (k, v) in &req.headers {
276        let kl = k.to_lowercase();
277        if kl == "content-type" {
278            env.insert("CONTENT_TYPE".into(), v.clone());
279        } else if kl == "content-length" {
280            env.insert("CONTENT_LENGTH".into(), v.clone());
281        }
282    }
283    env.entry("CONTENT_LENGTH".into())
284        .or_insert_with(|| req.body.len().to_string());
285    env.entry("CONTENT_TYPE".into()).or_default();
286
287    if git_dir.is_regular {
288        env.insert(
289            "GIT_DIR".into(),
290            git_dir.git_dir.to_string_lossy().into_owned(),
291        );
292    }
293
294    let mut cmd = Command::new(backend);
295    cmd.env_clear()
296        .envs(&env)
297        .stdin(Stdio::piped())
298        .stdout(Stdio::piped())
299        .stderr(Stdio::piped());
300
301    let mut child = match cmd.spawn() {
302        Ok(c) => c,
303        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
304            return Err(GitError::BackendNotAvailable(format!(
305                "spawn {}: {}",
306                backend.display(),
307                e
308            )));
309        }
310        Err(e) => return Err(GitError::Io(e)),
311    };
312
313    // Write body → stdin.
314    if let Some(mut stdin) = child.stdin.take() {
315        if !req.body.is_empty() {
316            stdin.write_all(&req.body).await?;
317        }
318        drop(stdin); // close stdin so git-http-backend can exit.
319    }
320
321    // Collect stdout + stderr concurrently.
322    let mut stdout = child.stdout.take().expect("stdout piped");
323    let mut stderr = child.stderr.take().expect("stderr piped");
324
325    let stdout_task = tokio::spawn(async move {
326        let mut buf = Vec::new();
327        stdout.read_to_end(&mut buf).await.map(|_| buf)
328    });
329    let stderr_task = tokio::spawn(async move {
330        let mut buf = Vec::new();
331        let _ = stderr.read_to_end(&mut buf).await;
332        buf
333    });
334
335    let status = child.wait().await?;
336    let stdout_bytes = stdout_task
337        .await
338        .map_err(|e| GitError::MalformedCgi(format!("stdout task: {e}")))??;
339    let stderr_bytes = stderr_task.await.unwrap_or_default();
340
341    if !status.success() && stdout_bytes.is_empty() {
342        return Err(GitError::BackendFailed {
343            exit_code: status.code(),
344            stderr: String::from_utf8_lossy(&stderr_bytes).into_owned(),
345        });
346    }
347
348    parse_cgi_output(&stdout_bytes)
349}
350
351/// Split CGI headers from body and translate into a `GitResponse`.
352fn parse_cgi_output(stdout: &[u8]) -> Result<GitResponse, GitError> {
353    // Find the CGI header/body separator.
354    let (sep_idx, sep_len) = {
355        if let Some(i) = find_subsequence(stdout, b"\r\n\r\n") {
356            (i, 4)
357        } else if let Some(i) = find_subsequence(stdout, b"\n\n") {
358            (i, 2)
359        } else {
360            return Err(GitError::MalformedCgi(
361                "no header/body separator".into(),
362            ));
363        }
364    };
365
366    let header_section = std::str::from_utf8(&stdout[..sep_idx])
367        .map_err(|e| GitError::MalformedCgi(format!("utf-8 in headers: {e}")))?;
368    let body = Bytes::copy_from_slice(&stdout[sep_idx + sep_len..]);
369
370    let mut status: u16 = 200;
371    let mut headers: Vec<(String, String)> = Vec::new();
372
373    for line in header_section.split(['\n', '\r']) {
374        let line = line.trim();
375        if line.is_empty() {
376            continue;
377        }
378        let Some(colon) = line.find(':') else { continue };
379        let key = line[..colon].trim().to_string();
380        let value = line[colon + 1..].trim().to_string();
381        if key.eq_ignore_ascii_case("status") {
382            status = value
383                .split_whitespace()
384                .next()
385                .and_then(|s| s.parse().ok())
386                .unwrap_or(200);
387        } else {
388            headers.push((key, value));
389        }
390    }
391
392    // CORS headers (JSS lines 218-220).
393    headers.push((
394        "Access-Control-Allow-Origin".into(),
395        "*".into(),
396    ));
397    headers.push((
398        "Access-Control-Allow-Methods".into(),
399        "GET, POST, OPTIONS".into(),
400    ));
401    headers.push((
402        "Access-Control-Allow-Headers".into(),
403        "Content-Type, Authorization".into(),
404    ));
405
406    Ok(GitResponse {
407        status,
408        headers,
409        body,
410    })
411}
412
413fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
414    haystack
415        .windows(needle.len())
416        .position(|w| w == needle)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn parse_cgi_basic() {
425        let raw = b"Content-Type: application/x-git-upload-pack-advertisement\r\nStatus: 200 OK\r\n\r\nPKFILE-BODY";
426        let r = parse_cgi_output(raw).unwrap();
427        assert_eq!(r.status, 200);
428        assert_eq!(r.body, Bytes::from_static(b"PKFILE-BODY"));
429        assert!(r
430            .headers
431            .iter()
432            .any(|(k, _)| k.eq_ignore_ascii_case("content-type")));
433    }
434
435    #[test]
436    fn parse_cgi_lf_only_separator() {
437        let raw = b"Content-Type: text/plain\n\nHELLO";
438        let r = parse_cgi_output(raw).unwrap();
439        assert_eq!(r.body, Bytes::from_static(b"HELLO"));
440    }
441
442    #[test]
443    fn parse_cgi_status_override() {
444        let raw = b"Status: 403 Forbidden\r\n\r\nNO";
445        let r = parse_cgi_output(raw).unwrap();
446        assert_eq!(r.status, 403);
447    }
448
449    #[test]
450    fn parse_cgi_no_separator_fails() {
451        let raw = b"Content-Type: text/plain\r\nonly-headers";
452        assert!(parse_cgi_output(raw).is_err());
453    }
454
455    #[test]
456    fn git_request_is_write_detects_receive_pack_path() {
457        let req = GitRequest {
458            method: "POST".into(),
459            path: "/repo/git-receive-pack".into(),
460            query: String::new(),
461            headers: vec![],
462            body: Bytes::new(),
463            host_url: None,
464        };
465        assert!(req.is_write());
466    }
467
468    #[test]
469    fn git_request_is_write_detects_receive_pack_query() {
470        let req = GitRequest {
471            method: "GET".into(),
472            path: "/repo/info/refs".into(),
473            query: "service=git-receive-pack".into(),
474            headers: vec![],
475            body: Bytes::new(),
476            host_url: None,
477        };
478        assert!(req.is_write());
479    }
480
481    #[test]
482    fn git_request_is_write_false_for_read() {
483        let req = GitRequest {
484            method: "GET".into(),
485            path: "/repo/info/refs".into(),
486            query: "service=git-upload-pack".into(),
487            headers: vec![],
488            body: Bytes::new(),
489            host_url: None,
490        };
491        assert!(!req.is_write());
492    }
493
494    #[test]
495    fn git_request_auth_url_without_query() {
496        let req = GitRequest {
497            method: "GET".into(),
498            path: "/repo/info/refs".into(),
499            query: String::new(),
500            headers: vec![],
501            body: Bytes::new(),
502            host_url: Some("https://pod.example.com".into()),
503        };
504        assert_eq!(req.auth_url(), "https://pod.example.com/repo/info/refs");
505    }
506
507    #[test]
508    fn git_request_auth_url_with_query() {
509        let req = GitRequest {
510            method: "GET".into(),
511            path: "/repo/info/refs".into(),
512            query: "service=git-upload-pack".into(),
513            headers: vec![],
514            body: Bytes::new(),
515            host_url: Some("https://pod.example.com".into()),
516        };
517        assert_eq!(
518            req.auth_url(),
519            "https://pod.example.com/repo/info/refs?service=git-upload-pack"
520        );
521    }
522
523    #[test]
524    fn git_response_error_helper() {
525        let r = GitResponse::error(404, "not found");
526        assert_eq!(r.status, 404);
527        assert!(!r.body.is_empty());
528    }
529}