Skip to main content

solid_pod_rs_git/
service.rs

1//! Binder-agnostic Git HTTP service — spawns the system
2//! `git http-backend` CGI and shuttles stdin/stdout between it and
3//! the HTTP layer.
4//!
5//! Mirrors JSS `src/handlers/git.js` lines 95-268 (`handleGit`) end
6//! to end. The key design choices, all pulled straight from JSS:
7//!
8//! * `GIT_PROJECT_ROOT = repo_root`, `PATH_INFO = request path`. The
9//!   CGI walks `GIT_PROJECT_ROOT + PATH_INFO` internally.
10//! * `GIT_HTTP_EXPORT_ALL` set (empty value, just defined) so all
11//!   repos under the root are read-exportable.
12//! * `GIT_HTTP_RECEIVE_PACK=true` so push is enabled (JSS line 157).
13//! * `GIT_CONFIG_PARAMETERS` injects `uploadpack.allowTipSHA1InWant`
14//!   to match JSS line 158.
15//! * For non-bare repos we set `GIT_DIR` to the `.git` child (JSS
16//!   lines 168-170).
17//! * We parse CGI headers from stdout, separate them from body on
18//!   `\r\n\r\n` (fall back to `\n\n`), and convert the first `Status:`
19//!   header into the HTTP response status.
20
21use std::collections::HashMap;
22use std::path::{Path, PathBuf};
23use std::process::Stdio;
24use std::sync::Arc;
25
26use bytes::Bytes;
27use tokio::io::{AsyncReadExt, AsyncWriteExt};
28use tokio::process::Command;
29
30use crate::auth::{AuthError, GitAuth};
31use crate::config::{apply_write_config, find_git_dir};
32use crate::error::GitError;
33use crate::guard::{extract_repo_slug, path_safe};
34
35/// Path to the CGI binary shipped with git. Configurable via
36/// `GIT_HTTP_BACKEND_PATH` env var at service-startup time (the
37/// default matches Debian/Ubuntu).
38pub const DEFAULT_GIT_HTTP_BACKEND: &str = "/usr/lib/git-core/git-http-backend";
39
40/// Opaque HTTP request shape consumed by the service.
41///
42/// The crate stays intentionally binder-agnostic — callers (axum,
43/// actix-web, hyper raw, …) translate their native request type into
44/// this struct before calling `handle`.
45#[derive(Debug, Clone)]
46pub struct GitRequest {
47    /// e.g. `"GET"`, `"POST"`, `"OPTIONS"`.
48    pub method: String,
49    /// The URL path (`"/alice/repo/info/refs"`), already
50    /// percent-decoded.
51    pub path: String,
52    /// The raw query string without the leading `?`.
53    pub query: String,
54    /// All request headers as `(name, value)` tuples. Name is
55    /// compared case-insensitively by the service.
56    pub headers: Vec<(String, String)>,
57    /// Request body (empty for GETs).
58    pub body: Bytes,
59    /// Scheme + host (`"https://pod.example.com"`) — used only to
60    /// reconstruct the URL that NIP-98 verification checks. If None,
61    /// we fall back to `http://localhost`.
62    pub host_url: Option<String>,
63}
64
65impl GitRequest {
66    /// Reconstruct the canonical URL that a NIP-98 `u` tag is
67    /// expected to point at.
68    pub fn auth_url(&self) -> String {
69        let base = self
70            .host_url
71            .clone()
72            .unwrap_or_else(|| "http://localhost".to_string());
73        if self.query.is_empty() {
74            format!("{base}{}", self.path)
75        } else {
76            format!("{base}{}?{}", self.path, self.query)
77        }
78    }
79
80    /// `true` if this request requires a successful auth check (push).
81    #[must_use]
82    pub fn is_write(&self) -> bool {
83        self.path.contains("/git-receive-pack") || self.query.contains("service=git-receive-pack")
84    }
85
86    /// `true` if this request fetches repository data (clone/fetch/ls).
87    ///
88    /// Smart-HTTP read traffic is the `git-upload-pack` service plus the
89    /// `info/refs` capability advertisement that precedes it, and the
90    /// dumb-HTTP object/pack paths under `objects/`. These previously
91    /// bypassed every auth check while `GIT_HTTP_EXPORT_ALL` exported
92    /// each repo verbatim, so a private pod's git history was world-
93    /// clonable (P1-3). The service now gates reads through the same
94    /// auth provider as writes.
95    #[must_use]
96    pub fn is_read(&self) -> bool {
97        if self.is_write() {
98            return false;
99        }
100        self.path.contains("/git-upload-pack")
101            || self.query.contains("service=git-upload-pack")
102            || self.path.contains("/info/refs")
103            || self.path.contains("/objects/")
104            || self.path.ends_with("/HEAD")
105    }
106}
107
108/// CGI response to return to the HTTP layer.
109#[derive(Debug, Clone)]
110pub struct GitResponse {
111    /// HTTP status (derived from the CGI `Status:` header, or 200 by
112    /// default).
113    pub status: u16,
114    /// All response headers emitted by the CGI plus CORS headers.
115    pub headers: Vec<(String, String)>,
116    /// Body bytes — already includes the CGI body payload.
117    pub body: Bytes,
118}
119
120impl GitResponse {
121    /// Build a simple error response (no CGI invocation).
122    #[must_use]
123    pub fn error(status: u16, msg: impl Into<String>) -> Self {
124        let msg = msg.into();
125        let body = Bytes::from(format!("{{\"error\":\"{msg}\"}}"));
126        Self {
127            status,
128            headers: vec![
129                ("content-type".into(), "application/json".into()),
130                ("access-control-allow-origin".into(), "*".into()),
131            ],
132            body,
133        }
134    }
135}
136
137/// The Git HTTP service.
138#[derive(Clone)]
139pub struct GitHttpService {
140    repo_root: PathBuf,
141    auth: Option<Arc<dyn GitAuth>>,
142    backend_path: PathBuf,
143}
144
145impl std::fmt::Debug for GitHttpService {
146    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
147        f.debug_struct("GitHttpService")
148            .field("repo_root", &self.repo_root)
149            .field("auth", &self.auth.is_some())
150            .field("backend_path", &self.backend_path)
151            .finish()
152    }
153}
154
155impl GitHttpService {
156    /// Build a service rooted at `repo_root`. All repos served must
157    /// live under this directory.
158    #[must_use]
159    pub fn new(repo_root: PathBuf) -> Self {
160        let backend = std::env::var("GIT_HTTP_BACKEND_PATH")
161            .map(PathBuf::from)
162            .unwrap_or_else(|_| PathBuf::from(DEFAULT_GIT_HTTP_BACKEND));
163        Self {
164            repo_root,
165            auth: None,
166            backend_path: backend,
167        }
168    }
169
170    /// Override the default CGI binary path.
171    #[must_use]
172    pub fn with_backend_path(mut self, path: PathBuf) -> Self {
173        self.backend_path = path;
174        self
175    }
176
177    /// Plug in an authoriser. Without one, write requests still
178    /// succeed — the service becomes an anonymous-push setup, which
179    /// is the behaviour JSS uses when no `handleAuth` pre-hook fires.
180    #[must_use]
181    pub fn with_auth<A: GitAuth + 'static>(mut self, auth: A) -> Self {
182        self.auth = Some(Arc::new(auth));
183        self
184    }
185
186    /// Same as [`with_auth`] but takes a pre-boxed Arc.
187    #[must_use]
188    pub fn with_auth_arc(mut self, auth: Arc<dyn GitAuth>) -> Self {
189        self.auth = Some(auth);
190        self
191    }
192
193    /// Handle an incoming Git HTTP request.
194    pub async fn handle(&self, req: GitRequest) -> Result<GitResponse, GitError> {
195        // CORS preflight — JSS lines 97-102.
196        if req.method.eq_ignore_ascii_case("OPTIONS") {
197            return Ok(GitResponse {
198                status: 200,
199                headers: vec![
200                    ("access-control-allow-origin".into(), "*".into()),
201                    (
202                        "access-control-allow-methods".into(),
203                        "GET, POST, OPTIONS".into(),
204                    ),
205                    (
206                        "access-control-allow-headers".into(),
207                        "Content-Type, Authorization".into(),
208                    ),
209                ],
210                body: Bytes::new(),
211            });
212        }
213
214        // 1. Parse + guard the repo path.
215        let slug = extract_repo_slug(&req.path);
216        let repo_abs = if slug == "." {
217            self.repo_root.canonicalize()?
218        } else {
219            path_safe(&self.repo_root, &slug)?
220        };
221
222        // 2. Find the git dir. Missing => 404.
223        let git_dir = match find_git_dir(&repo_abs)? {
224            Some(g) => g,
225            None => {
226                return Err(GitError::NotARepository(slug));
227            }
228        };
229
230        // 3. Auth for writes (JSS: the route-level `preValidation`
231        //    hook on `/git-receive-pack` calls `handleAuth`; we fold
232        //    that into a single check here). P1-3: reads (clone/fetch)
233        //    are gated through the SAME provider when one is configured,
234        //    closing the world-readable git hole. When no provider is
235        //    plugged in the service stays anonymous (the documented
236        //    no-auth setup), matching JSS's behaviour with no
237        //    `handleAuth` pre-hook.
238        let mut remote_user = String::new();
239        let needs_auth = req.is_write() || (req.is_read() && self.auth.is_some());
240        if needs_auth {
241            let auth = self
242                .auth
243                .as_ref()
244                .ok_or_else(|| GitError::Unauthorised("no auth provider configured".into()))?;
245            match auth.authorise(&req).await {
246                Ok(id) => remote_user = id,
247                Err(AuthError::Missing) => {
248                    return Err(GitError::Unauthorised("missing Authorization".into()));
249                }
250                Err(e) => return Err(GitError::Auth(e)),
251            }
252        }
253
254        // 4. Apply the receive-pack config mutators on writes. Errors
255        //    are best-effort (JSS swallows them too).
256        if req.is_write() {
257            let _ = apply_write_config(&git_dir, &repo_abs).await;
258        }
259
260        // 5. Spawn the CGI and shuttle request/response bytes.
261        spawn_cgi(
262            &self.backend_path,
263            &self.repo_root,
264            &git_dir,
265            &remote_user,
266            req,
267        )
268        .await
269    }
270}
271
272/// Core CGI driver — shared by all routes.
273async fn spawn_cgi(
274    backend: &Path,
275    repo_root: &Path,
276    git_dir: &crate::config::GitDir,
277    remote_user: &str,
278    req: GitRequest,
279) -> Result<GitResponse, GitError> {
280    // Assemble CGI env. We deliberately start from an empty env and
281    // only inherit PATH (to locate git subcommands the backend itself
282    // shells out to) — this matches the spirit of JSS which spreads
283    // `process.env` but we narrow it for defence-in-depth.
284    let mut env: HashMap<String, String> = HashMap::new();
285    if let Ok(path) = std::env::var("PATH") {
286        env.insert("PATH".into(), path);
287    }
288
289    env.insert(
290        "GIT_PROJECT_ROOT".into(),
291        repo_root
292            .canonicalize()
293            .unwrap_or_else(|_| repo_root.to_path_buf())
294            .to_string_lossy()
295            .into_owned(),
296    );
297    env.insert("GIT_HTTP_EXPORT_ALL".into(), String::new());
298    env.insert("GIT_HTTP_RECEIVE_PACK".into(), "true".into());
299    env.insert(
300        "GIT_CONFIG_PARAMETERS".into(),
301        "'uploadpack.allowTipSHA1InWant=true'".into(),
302    );
303    env.insert("PATH_INFO".into(), req.path.clone());
304    env.insert("REQUEST_METHOD".into(), req.method.to_uppercase());
305    env.insert("QUERY_STRING".into(), req.query.clone());
306    env.insert("REMOTE_USER".into(), remote_user.to_string());
307
308    for (k, v) in &req.headers {
309        let kl = k.to_lowercase();
310        if kl == "content-type" {
311            env.insert("CONTENT_TYPE".into(), v.clone());
312        } else if kl == "content-length" {
313            env.insert("CONTENT_LENGTH".into(), v.clone());
314        }
315    }
316    env.entry("CONTENT_LENGTH".into())
317        .or_insert_with(|| req.body.len().to_string());
318    env.entry("CONTENT_TYPE".into()).or_default();
319
320    if git_dir.is_regular {
321        env.insert(
322            "GIT_DIR".into(),
323            git_dir.git_dir.to_string_lossy().into_owned(),
324        );
325    }
326
327    let mut cmd = Command::new(backend);
328    cmd.env_clear()
329        .envs(&env)
330        .stdin(Stdio::piped())
331        .stdout(Stdio::piped())
332        .stderr(Stdio::piped());
333
334    let mut child = match cmd.spawn() {
335        Ok(c) => c,
336        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
337            return Err(GitError::BackendNotAvailable(format!(
338                "spawn {}: {}",
339                backend.display(),
340                e
341            )));
342        }
343        Err(e) => return Err(GitError::Io(e)),
344    };
345
346    // Write body → stdin.
347    if let Some(mut stdin) = child.stdin.take() {
348        if !req.body.is_empty() {
349            stdin.write_all(&req.body).await?;
350        }
351        drop(stdin); // close stdin so git-http-backend can exit.
352    }
353
354    // Collect stdout + stderr concurrently.
355    let mut stdout = child.stdout.take().expect("stdout piped");
356    let mut stderr = child.stderr.take().expect("stderr piped");
357
358    let stdout_task = tokio::spawn(async move {
359        let mut buf = Vec::new();
360        stdout.read_to_end(&mut buf).await.map(|_| buf)
361    });
362    let stderr_task = tokio::spawn(async move {
363        let mut buf = Vec::new();
364        let _ = stderr.read_to_end(&mut buf).await;
365        buf
366    });
367
368    let status = child.wait().await?;
369    let stdout_bytes = stdout_task
370        .await
371        .map_err(|e| GitError::MalformedCgi(format!("stdout task: {e}")))??;
372    let stderr_bytes = stderr_task.await.unwrap_or_default();
373
374    if !status.success() && stdout_bytes.is_empty() {
375        return Err(GitError::BackendFailed {
376            exit_code: status.code(),
377            stderr: String::from_utf8_lossy(&stderr_bytes).into_owned(),
378        });
379    }
380
381    parse_cgi_output(&stdout_bytes)
382}
383
384/// Split CGI headers from body and translate into a `GitResponse`.
385fn parse_cgi_output(stdout: &[u8]) -> Result<GitResponse, GitError> {
386    // Find the CGI header/body separator.
387    let (sep_idx, sep_len) = {
388        if let Some(i) = find_subsequence(stdout, b"\r\n\r\n") {
389            (i, 4)
390        } else if let Some(i) = find_subsequence(stdout, b"\n\n") {
391            (i, 2)
392        } else {
393            return Err(GitError::MalformedCgi("no header/body separator".into()));
394        }
395    };
396
397    let header_section = std::str::from_utf8(&stdout[..sep_idx])
398        .map_err(|e| GitError::MalformedCgi(format!("utf-8 in headers: {e}")))?;
399    let body = Bytes::copy_from_slice(&stdout[sep_idx + sep_len..]);
400
401    let mut status: u16 = 200;
402    let mut headers: Vec<(String, String)> = Vec::new();
403
404    for line in header_section.split(['\n', '\r']) {
405        let line = line.trim();
406        if line.is_empty() {
407            continue;
408        }
409        let Some(colon) = line.find(':') else {
410            continue;
411        };
412        let key = line[..colon].trim().to_string();
413        let value = line[colon + 1..].trim().to_string();
414        if key.eq_ignore_ascii_case("status") {
415            status = value
416                .split_whitespace()
417                .next()
418                .and_then(|s| s.parse().ok())
419                .unwrap_or(200);
420        } else {
421            headers.push((key, value));
422        }
423    }
424
425    // CORS headers (JSS lines 218-220).
426    headers.push(("Access-Control-Allow-Origin".into(), "*".into()));
427    headers.push((
428        "Access-Control-Allow-Methods".into(),
429        "GET, POST, OPTIONS".into(),
430    ));
431    headers.push((
432        "Access-Control-Allow-Headers".into(),
433        "Content-Type, Authorization".into(),
434    ));
435
436    Ok(GitResponse {
437        status,
438        headers,
439        body,
440    })
441}
442
443fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
444    haystack.windows(needle.len()).position(|w| w == needle)
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn parse_cgi_basic() {
453        let raw = b"Content-Type: application/x-git-upload-pack-advertisement\r\nStatus: 200 OK\r\n\r\nPKFILE-BODY";
454        let r = parse_cgi_output(raw).unwrap();
455        assert_eq!(r.status, 200);
456        assert_eq!(r.body, Bytes::from_static(b"PKFILE-BODY"));
457        assert!(r
458            .headers
459            .iter()
460            .any(|(k, _)| k.eq_ignore_ascii_case("content-type")));
461    }
462
463    #[test]
464    fn parse_cgi_lf_only_separator() {
465        let raw = b"Content-Type: text/plain\n\nHELLO";
466        let r = parse_cgi_output(raw).unwrap();
467        assert_eq!(r.body, Bytes::from_static(b"HELLO"));
468    }
469
470    #[test]
471    fn parse_cgi_status_override() {
472        let raw = b"Status: 403 Forbidden\r\n\r\nNO";
473        let r = parse_cgi_output(raw).unwrap();
474        assert_eq!(r.status, 403);
475    }
476
477    #[test]
478    fn parse_cgi_no_separator_fails() {
479        let raw = b"Content-Type: text/plain\r\nonly-headers";
480        assert!(parse_cgi_output(raw).is_err());
481    }
482
483    #[test]
484    fn git_request_is_write_detects_receive_pack_path() {
485        let req = GitRequest {
486            method: "POST".into(),
487            path: "/repo/git-receive-pack".into(),
488            query: String::new(),
489            headers: vec![],
490            body: Bytes::new(),
491            host_url: None,
492        };
493        assert!(req.is_write());
494    }
495
496    #[test]
497    fn git_request_is_write_detects_receive_pack_query() {
498        let req = GitRequest {
499            method: "GET".into(),
500            path: "/repo/info/refs".into(),
501            query: "service=git-receive-pack".into(),
502            headers: vec![],
503            body: Bytes::new(),
504            host_url: None,
505        };
506        assert!(req.is_write());
507    }
508
509    #[test]
510    fn git_request_is_write_false_for_read() {
511        let req = GitRequest {
512            method: "GET".into(),
513            path: "/repo/info/refs".into(),
514            query: "service=git-upload-pack".into(),
515            headers: vec![],
516            body: Bytes::new(),
517            host_url: None,
518        };
519        assert!(!req.is_write());
520    }
521
522    #[test]
523    fn git_request_is_read_detects_upload_pack_and_info_refs() {
524        // info/refs advertisement for a clone.
525        let advert = GitRequest {
526            method: "GET".into(),
527            path: "/repo/info/refs".into(),
528            query: "service=git-upload-pack".into(),
529            headers: vec![],
530            body: Bytes::new(),
531            host_url: None,
532        };
533        assert!(advert.is_read());
534        assert!(!advert.is_write());
535
536        // The upload-pack POST itself.
537        let pack = GitRequest {
538            method: "POST".into(),
539            path: "/repo/git-upload-pack".into(),
540            query: String::new(),
541            headers: vec![],
542            body: Bytes::new(),
543            host_url: None,
544        };
545        assert!(pack.is_read());
546
547        // Dumb-HTTP object fetch.
548        let object = GitRequest {
549            method: "GET".into(),
550            path: "/repo/objects/info/packs".into(),
551            query: String::new(),
552            headers: vec![],
553            body: Bytes::new(),
554            host_url: None,
555        };
556        assert!(object.is_read());
557    }
558
559    #[test]
560    fn git_request_is_read_false_for_write() {
561        // A receive-pack advertisement is a write, never a read.
562        let req = GitRequest {
563            method: "GET".into(),
564            path: "/repo/info/refs".into(),
565            query: "service=git-receive-pack".into(),
566            headers: vec![],
567            body: Bytes::new(),
568            host_url: None,
569        };
570        assert!(req.is_write());
571        assert!(!req.is_read());
572    }
573
574    #[test]
575    fn git_request_auth_url_without_query() {
576        let req = GitRequest {
577            method: "GET".into(),
578            path: "/repo/info/refs".into(),
579            query: String::new(),
580            headers: vec![],
581            body: Bytes::new(),
582            host_url: Some("https://pod.example.com".into()),
583        };
584        assert_eq!(req.auth_url(), "https://pod.example.com/repo/info/refs");
585    }
586
587    #[test]
588    fn git_request_auth_url_with_query() {
589        let req = GitRequest {
590            method: "GET".into(),
591            path: "/repo/info/refs".into(),
592            query: "service=git-upload-pack".into(),
593            headers: vec![],
594            body: Bytes::new(),
595            host_url: Some("https://pod.example.com".into()),
596        };
597        assert_eq!(
598            req.auth_url(),
599            "https://pod.example.com/repo/info/refs?service=git-upload-pack"
600        );
601    }
602
603    #[test]
604    fn git_response_error_helper() {
605        let r = GitResponse::error(404, "not found");
606        assert_eq!(r.status, 404);
607        assert!(!r.body.is_empty());
608    }
609}