Skip to main content

solid_pod_rs_server/
lib.rs

1//! # solid-pod-rs-server
2//!
3//! Drop-in Solid Pod server binary wrapping
4//! [`solid-pod-rs`](https://crates.io/crates/solid-pod-rs) with
5//! [actix-web](https://docs.rs/actix-web). This crate is both a
6//! library (for integration-test reuse) and a binary.
7//!
8//! ## Public types
9//!
10//! - [`AppState`]  — Shared actix-web application state (storage, dotfile policy, body cap).
11//! - [`build_app`] — Builds the fully-configured `actix_web::App` with all routes and middleware.
12//! - [`NodeInfoMeta`] — NodeInfo 2.1 metadata inputs.
13//! - [`PathTraversalGuard`] — Middleware that rejects `..` path-traversal attempts.
14//! - [`DotfileGuard`] — Middleware that enforces the dotfile allowlist.
15//! - [`ErrorLoggingMiddleware`] — Middleware that logs 5xx responses with full error chains.
16//! - [`body_cap_from_env`] — Reads `JSS_MAX_REQUEST_BODY` from the environment.
17//! - [`cli`] — CLI argument definitions (clap derive).
18//!
19//! ## Route table
20//!
21//! | Method   | Path                                     | Handler              |
22//! |----------|------------------------------------------|----------------------|
23//! | GET/HEAD | `/{tail:.*}`                             | `handle_get`         |
24//! | PUT      | `/{tail:.*}`                             | `handle_put`         |
25//! | POST     | `/{tail:.*}/`                            | `handle_post`        |
26//! | PATCH    | `/{tail:.*}`                             | `handle_patch`       |
27//! | DELETE   | `/{tail:.*}`                             | `handle_delete`      |
28//! | OPTIONS  | `/{tail:.*}`                             | `handle_options`     |
29//! | GET      | `/.well-known/solid`                     | Solid discovery      |
30//! | GET      | `/.well-known/webfinger`                 | WebFinger JRD        |
31//! | GET      | `/.well-known/nodeinfo`                  | NodeInfo discovery   |
32//! | GET      | `/.well-known/nodeinfo/2.1`              | NodeInfo 2.1         |
33//! | GET      | `/.well-known/did/nostr/{pubkey}.json`   | DID:nostr document   |
34//!
35//! ## Middleware stack (applied in order)
36//!
37//! 1. `NormalizePath` -- collapse `//` and decode %-encoded segments.
38//! 2. `PathTraversalGuard` -- defence-in-depth `..` re-check.
39//! 3. `DotfileGuard` -- rejects `.env` etc unless on the allowlist.
40//! 4. `PayloadConfig` -- enforces `JSS_MAX_REQUEST_BODY` body cap.
41//! 5. `ErrorLoggingMiddleware` -- structured 5xx logging.
42//! 6. WAC-on-write -- PUT/POST/PATCH/DELETE require a write/append grant.
43
44#![doc = include_str!("../README.md")]
45
46#![deny(unsafe_code)]
47#![warn(rust_2018_idioms)]
48
49/// CLI argument definitions (clap derive structs).
50pub mod cli;
51
52use std::path::{Path, PathBuf};
53use std::sync::Arc;
54
55use actix_web::body::{BoxBody, EitherBody};
56use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
57use actix_web::http::{header, StatusCode};
58use actix_web::middleware::{NormalizePath, TrailingSlash};
59use actix_web::{web, App, Error as ActixError, HttpRequest, HttpResponse};
60use bytes::Bytes;
61use futures_util::future::{ready, LocalBoxFuture, Ready};
62use percent_encoding::percent_decode_str;
63use serde::Deserialize;
64use solid_pod_rs::{
65    auth::nip98,
66    config::sources::parse_size,
67    interop,
68    ldp::{self, LdpContainerOps, PatchCreateOutcome},
69    security::DotfileAllowlist,
70    storage::Storage,
71    wac::{
72        self, conditions::RequestContext, parse_jsonld_acl, parser::parse_turtle_acl, AccessMode,
73    },
74    PodError,
75};
76
77// ---------------------------------------------------------------------------
78// Shared app state
79// ---------------------------------------------------------------------------
80
81/// Actix-web shared state.
82#[derive(Clone)]
83pub struct AppState {
84    pub storage: Arc<dyn Storage>,
85    pub dotfiles: Arc<DotfileAllowlist>,
86    pub body_cap: usize,
87    pub nodeinfo: NodeInfoMeta,
88    pub mashlib_cdn: Option<String>,
89}
90
91/// NodeInfo 2.1 body inputs. Kept here so tests can override them.
92#[derive(Clone, Debug)]
93pub struct NodeInfoMeta {
94    pub software_name: String,
95    pub software_version: String,
96    pub open_registrations: bool,
97    pub total_users: u64,
98    pub base_url: String,
99}
100
101impl Default for NodeInfoMeta {
102    fn default() -> Self {
103        Self {
104            software_name: "solid-pod-rs-server".to_string(),
105            software_version: env!("CARGO_PKG_VERSION").to_string(),
106            open_registrations: false,
107            total_users: 0,
108            base_url: "http://localhost".to_string(),
109        }
110    }
111}
112
113/// Discover the body cap from the environment. Accepts values like
114/// `50MB`, `1.5GB`, or a bare integer (bytes). Falls back to 50 MiB.
115pub const DEFAULT_BODY_CAP: usize = 50 * 1024 * 1024;
116
117/// Read `JSS_MAX_REQUEST_BODY` and parse via [`parse_size`]. On any
118/// failure, returns [`DEFAULT_BODY_CAP`].
119pub fn body_cap_from_env() -> usize {
120    match std::env::var("JSS_MAX_REQUEST_BODY") {
121        Ok(v) => parse_size(&v)
122            .map(|u| u as usize)
123            .unwrap_or(DEFAULT_BODY_CAP),
124        Err(_) => DEFAULT_BODY_CAP,
125    }
126}
127
128impl AppState {
129    /// Convenience constructor for tests and the binary. Callers may
130    /// replace fields after creation since `AppState` is a plain struct.
131    pub fn new(storage: Arc<dyn Storage>) -> Self {
132        Self {
133            storage,
134            dotfiles: Arc::new(DotfileAllowlist::from_env()),
135            body_cap: body_cap_from_env(),
136            nodeinfo: NodeInfoMeta::default(),
137            mashlib_cdn: None,
138        }
139    }
140}
141
142// ---------------------------------------------------------------------------
143// Error translation
144// ---------------------------------------------------------------------------
145
146fn to_actix(e: PodError) -> ActixError {
147    match e {
148        PodError::NotFound(_) => actix_web::error::ErrorNotFound(e.to_string()),
149        PodError::BadRequest(_) => actix_web::error::ErrorBadRequest(e.to_string()),
150        PodError::Unsupported(_) => actix_web::error::ErrorUnsupportedMediaType(e.to_string()),
151        PodError::Forbidden => actix_web::error::ErrorForbidden(e.to_string()),
152        PodError::Unauthenticated => actix_web::error::ErrorUnauthorized(e.to_string()),
153        PodError::PreconditionFailed(_) => {
154            actix_web::error::ErrorPreconditionFailed(e.to_string())
155        }
156        _ => actix_web::error::ErrorInternalServerError(e.to_string()),
157    }
158}
159
160// ---------------------------------------------------------------------------
161// Auth helper — shared across handlers
162// ---------------------------------------------------------------------------
163
164/// Attempt NIP-98 bearer verification; returns the pubkey on success.
165async fn extract_pubkey(req: &HttpRequest) -> Option<String> {
166    let header_val = req
167        .headers()
168        .get(header::AUTHORIZATION)
169        .and_then(|v| v.to_str().ok())?;
170    let url = format!(
171        "http://{}{}",
172        req.connection_info().host(),
173        req.uri().path()
174    );
175    nip98::verify(header_val, &url, req.method().as_str(), None)
176        .await
177        .ok()
178}
179
180fn agent_uri(pubkey: Option<&String>) -> Option<String> {
181    pubkey.map(|pk| format!("did:nostr:{pk}"))
182}
183
184// ---------------------------------------------------------------------------
185// WAC enforcement for writes (PUT / POST / PATCH / DELETE)
186// ---------------------------------------------------------------------------
187
188/// Resolve the effective ACL and evaluate whether the given WebID may
189/// perform `mode` on `path`.
190///
191/// Returns `Ok(())` on grant. On deny, returns an `actix_web::Error`:
192/// * `401` when the request had no authenticated agent (so the client
193///   knows retrying with credentials might work);
194/// * `403` when authenticated but the ACL does not grant the mode.
195async fn enforce_write(
196    state: &AppState,
197    path: &str,
198    mode: AccessMode,
199    agent_uri: Option<&str>,
200) -> Result<(), ActixError> {
201    // `StorageAclResolver` is generic over a concrete backend. `state`
202    // holds an `Arc<dyn Storage>`; wrap it in a trait-object-friendly
203    // adapter (`DynStorage`) that forwards each trait method so the
204    // resolver can be constructed with a concrete type.
205    let acl_doc = match find_effective_acl_dyn(&*state.storage, path).await {
206        Ok(doc) => doc,
207        Err(e) => return Err(to_actix(e)),
208    };
209
210    let ctx = RequestContext {
211        web_id: agent_uri,
212        client_id: None,
213        issuer: None,
214    };
215    let registry = wac::conditions::ConditionRegistry::default_with_client_and_issuer();
216    let groups: wac::StaticGroupMembership = wac::StaticGroupMembership::default();
217    let granted = wac::evaluate_access_ctx_with_registry(
218        acl_doc.as_ref(),
219        &ctx,
220        path,
221        mode,
222        None,
223        &groups,
224        &registry,
225    );
226    if granted {
227        return Ok(());
228    }
229
230    let allow_header = wac::wac_allow_header(acl_doc.as_ref(), agent_uri, path);
231    let (status, body) = if agent_uri.is_none() {
232        (StatusCode::UNAUTHORIZED, "authentication required")
233    } else {
234        (StatusCode::FORBIDDEN, "access forbidden")
235    };
236    let mut rsp = HttpResponse::new(status);
237    rsp.headers_mut().insert(
238        header::HeaderName::from_static("wac-allow"),
239        header::HeaderValue::from_str(&allow_header).unwrap_or(header::HeaderValue::from_static("")),
240    );
241    Err(actix_web::error::InternalError::from_response(body, rsp).into())
242}
243
244// ---------------------------------------------------------------------------
245// Handlers
246// ---------------------------------------------------------------------------
247
248fn set_link_headers(rsp: &mut HttpResponse, path: &str) {
249    let links = ldp::link_headers(path).join(", ");
250    if let Ok(value) = header::HeaderValue::from_str(&links) {
251        rsp.headers_mut()
252            .insert(header::HeaderName::from_static("link"), value);
253    }
254}
255
256fn set_wac_allow(rsp: &mut HttpResponse, header_value: &str) {
257    if let Ok(v) = header::HeaderValue::from_str(header_value) {
258        rsp.headers_mut()
259            .insert(header::HeaderName::from_static("wac-allow"), v);
260    }
261}
262
263async fn handle_get(
264    req: HttpRequest,
265    state: web::Data<AppState>,
266) -> Result<HttpResponse, ActixError> {
267    let path = req.uri().path().to_string();
268    let auth_pk = extract_pubkey(&req).await;
269    let agent = agent_uri(auth_pk.as_ref());
270    let wac_allow = wac::wac_allow_header(None, agent.as_deref(), &path);
271
272    if ldp::is_container(&path) {
273        let v = state
274            .storage
275            .container_representation(&path)
276            .await
277            .map_err(to_actix)?;
278        let mut rsp = HttpResponse::Ok().json(v);
279        rsp.headers_mut().insert(
280            header::CONTENT_TYPE,
281            header::HeaderValue::from_static("application/ld+json"),
282        );
283        set_wac_allow(&mut rsp, &wac_allow);
284        set_link_headers(&mut rsp, &path);
285        return Ok(rsp);
286    }
287
288    match state.storage.get(&path).await {
289        Ok((body, meta)) => {
290            let mut rsp = HttpResponse::Ok().body(body.to_vec());
291            rsp.headers_mut().insert(
292                header::CONTENT_TYPE,
293                header::HeaderValue::from_str(&meta.content_type)
294                    .unwrap_or_else(|_| header::HeaderValue::from_static("application/octet-stream")),
295            );
296            if let Ok(etag) = header::HeaderValue::from_str(&format!("\"{}\"", meta.etag)) {
297                rsp.headers_mut().insert(header::ETAG, etag);
298            }
299            set_wac_allow(&mut rsp, &wac_allow);
300            set_link_headers(&mut rsp, &path);
301            Ok(rsp)
302        }
303        Err(PodError::NotFound(_)) => Ok(HttpResponse::NotFound().finish()),
304        Err(e) => Err(to_actix(e)),
305    }
306}
307
308async fn handle_put(
309    req: HttpRequest,
310    body: web::Bytes,
311    state: web::Data<AppState>,
312) -> Result<HttpResponse, ActixError> {
313    let path = req.uri().path().to_string();
314    if ldp::is_container(&path) {
315        return Ok(HttpResponse::MethodNotAllowed().body("cannot PUT to a container"));
316    }
317    let auth_pk = extract_pubkey(&req).await;
318    let agent = agent_uri(auth_pk.as_ref());
319    enforce_write(&state, &path, AccessMode::Write, agent.as_deref()).await?;
320
321    let ct = req
322        .headers()
323        .get(header::CONTENT_TYPE)
324        .and_then(|v| v.to_str().ok())
325        .unwrap_or("application/octet-stream");
326    let meta = state
327        .storage
328        .put(&path, Bytes::from(body.to_vec()), ct)
329        .await
330        .map_err(to_actix)?;
331    let mut rsp = HttpResponse::Created().finish();
332    if let Ok(etag) = header::HeaderValue::from_str(&format!("\"{}\"", meta.etag)) {
333        rsp.headers_mut().insert(header::ETAG, etag);
334    }
335    set_link_headers(&mut rsp, &path);
336    Ok(rsp)
337}
338
339async fn handle_post(
340    req: HttpRequest,
341    body: web::Bytes,
342    state: web::Data<AppState>,
343) -> Result<HttpResponse, ActixError> {
344    let path = req.uri().path().to_string();
345    // POST route only matches container paths (trailing slash) via the
346    // `POST /{tail:.*}/` registration.
347    let auth_pk = extract_pubkey(&req).await;
348    let agent = agent_uri(auth_pk.as_ref());
349    enforce_write(&state, &path, AccessMode::Append, agent.as_deref()).await?;
350
351    let slug = req
352        .headers()
353        .get(header::HeaderName::from_static("slug"))
354        .and_then(|v| v.to_str().ok());
355    let target = match ldp::resolve_slug(&path, slug) {
356        Ok(p) => p,
357        Err(e) => return Err(to_actix(e)),
358    };
359    let ct = req
360        .headers()
361        .get(header::CONTENT_TYPE)
362        .and_then(|v| v.to_str().ok())
363        .unwrap_or("application/octet-stream");
364    let meta = state
365        .storage
366        .put(&target, Bytes::from(body.to_vec()), ct)
367        .await
368        .map_err(to_actix)?;
369    let mut rsp = HttpResponse::Created().finish();
370    if let Ok(loc) = header::HeaderValue::from_str(&target) {
371        rsp.headers_mut().insert(header::LOCATION, loc);
372    }
373    if let Ok(etag) = header::HeaderValue::from_str(&format!("\"{}\"", meta.etag)) {
374        rsp.headers_mut().insert(header::ETAG, etag);
375    }
376    set_link_headers(&mut rsp, &target);
377    Ok(rsp)
378}
379
380async fn handle_patch(
381    req: HttpRequest,
382    body: web::Bytes,
383    state: web::Data<AppState>,
384) -> Result<HttpResponse, ActixError> {
385    let path = req.uri().path().to_string();
386    if ldp::is_container(&path) {
387        return Ok(HttpResponse::MethodNotAllowed().body("cannot PATCH a container"));
388    }
389    let auth_pk = extract_pubkey(&req).await;
390    let agent = agent_uri(auth_pk.as_ref());
391    enforce_write(&state, &path, AccessMode::Append, agent.as_deref()).await?;
392
393    let ct = req
394        .headers()
395        .get(header::CONTENT_TYPE)
396        .and_then(|v| v.to_str().ok())
397        .unwrap_or("");
398    let dialect = match ldp::patch_dialect_from_mime(ct) {
399        Some(d) => d,
400        None => {
401            return Ok(HttpResponse::UnsupportedMediaType()
402                .body(format!("unsupported patch dialect for content-type {ct:?}")))
403        }
404    };
405    let body_str = match std::str::from_utf8(&body) {
406        Ok(s) => s.to_string(),
407        Err(_) => {
408            return Ok(HttpResponse::BadRequest().body("patch body is not valid UTF-8"))
409        }
410    };
411
412    // Existing resource?
413    let existing = state.storage.get(&path).await;
414    match existing {
415        Ok((current_body, meta)) => {
416            // Parse the current body into a graph. For the Sprint 7 D
417            // slice, the PATCH paths operate on an empty seed graph when
418            // a textual RDF representation cannot be parsed — the
419            // dialect patchers already cover the semantics. This keeps
420            // the handler thin; richer mutation semantics live in
421            // the library crate.
422            let out = match dialect {
423                ldp::PatchDialect::N3 => ldp::apply_n3_patch(ldp::Graph::new(), &body_str)
424                    .map_err(patch_parse_err),
425                ldp::PatchDialect::SparqlUpdate => {
426                    ldp::apply_sparql_patch(ldp::Graph::new(), &body_str)
427                        .map_err(patch_parse_err)
428                }
429                ldp::PatchDialect::JsonPatch => {
430                    let mut json: serde_json::Value = match serde_json::from_slice(&current_body) {
431                        Ok(v) => v,
432                        Err(_) => serde_json::json!({}),
433                    };
434                    let patch: serde_json::Value = match serde_json::from_str(&body_str) {
435                        Ok(v) => v,
436                        Err(e) => return Err(to_actix(PodError::BadRequest(e.to_string()))),
437                    };
438                    ldp::apply_json_patch(&mut json, &patch).map_err(to_actix)?;
439                    let bytes = serde_json::to_vec(&json).map_err(PodError::from).map_err(to_actix)?;
440                    let _ = state
441                        .storage
442                        .put(&path, Bytes::from(bytes), &meta.content_type)
443                        .await
444                        .map_err(to_actix)?;
445                    return Ok(HttpResponse::NoContent().finish());
446                }
447            };
448            let outcome = out?;
449            // Round-trip the updated graph back to Turtle so the next
450            // GET reflects the mutation.
451            let serialised = graph_to_turtle(&outcome.graph);
452            let _ = state
453                .storage
454                .put(&path, Bytes::from(serialised.into_bytes()), "text/turtle")
455                .await
456                .map_err(to_actix)?;
457            Ok(HttpResponse::NoContent().finish())
458        }
459        Err(PodError::NotFound(_)) => {
460            // PATCH against an absent resource — create it.
461            let create = ldp::apply_patch_to_absent(dialect, &body_str).map_err(patch_parse_err)?;
462            let PatchCreateOutcome::Created { graph, .. } = create else {
463                return Err(to_actix(PodError::Unsupported(
464                    "unexpected patch outcome on absent resource".into(),
465                )));
466            };
467            let serialised = graph_to_turtle(&graph);
468            let _ = state
469                .storage
470                .put(&path, Bytes::from(serialised.into_bytes()), "text/turtle")
471                .await
472                .map_err(to_actix)?;
473            Ok(HttpResponse::Created().finish())
474        }
475        Err(e) => Err(to_actix(e)),
476    }
477}
478
479/// Map a PATCH body parse error to 400 Bad Request. Distinguishes
480/// "client sent garbage in a supported dialect" (400) from "client
481/// chose an unsupported dialect" (415 — handled by the dispatcher).
482fn patch_parse_err(e: PodError) -> ActixError {
483    match e {
484        PodError::Unsupported(msg) | PodError::BadRequest(msg) => {
485            actix_web::error::ErrorBadRequest(msg)
486        }
487        other => to_actix(other),
488    }
489}
490
491/// Serialise a graph to N-Triples so the next GET reflects PATCH
492/// mutations verbatim. Delegates to the library's canonical serialiser
493/// — the handler does not add its own formatting.
494fn graph_to_turtle(g: &ldp::Graph) -> String {
495    g.to_ntriples()
496}
497
498/// Walk the storage tree from `path` upward, returning the first
499/// `*.acl` document that parses as JSON-LD or Turtle. Object-safe
500/// equivalent of `StorageAclResolver::find_effective_acl` — the latter
501/// is generic over a concrete `Storage`, whereas the binary holds an
502/// `Arc<dyn Storage>`.
503async fn find_effective_acl_dyn(
504    storage: &dyn Storage,
505    resource_path: &str,
506) -> Result<Option<wac::AclDocument>, PodError> {
507    let mut path = resource_path.to_string();
508    loop {
509        let acl_key = if path == "/" {
510            "/.acl".to_string()
511        } else {
512            format!("{}.acl", path.trim_end_matches('/'))
513        };
514        if let Ok((body, meta)) = storage.get(&acl_key).await {
515            match parse_jsonld_acl(&body) {
516                Ok(doc) => return Ok(Some(doc)),
517                Err(PodError::BadRequest(_)) => {
518                    return Err(PodError::BadRequest("ACL document exceeds bounds".into()))
519                }
520                Err(_) => {}
521            }
522            let ct = meta.content_type.to_ascii_lowercase();
523            let looks_turtle = ct.starts_with("text/turtle")
524                || ct.starts_with("application/turtle")
525                || ct.starts_with("application/x-turtle");
526            let text = std::str::from_utf8(&body).unwrap_or("");
527            if looks_turtle || text.contains("@prefix") || text.contains("acl:Authorization") {
528                if let Ok(doc) = parse_turtle_acl(text) {
529                    return Ok(Some(doc));
530                }
531            }
532        }
533        if path == "/" || path.is_empty() {
534            break;
535        }
536        let trimmed = path.trim_end_matches('/');
537        path = match trimmed.rfind('/') {
538            Some(0) => "/".to_string(),
539            Some(pos) => trimmed[..pos].to_string(),
540            None => "/".to_string(),
541        };
542    }
543    Ok(None)
544}
545
546async fn handle_delete(
547    req: HttpRequest,
548    state: web::Data<AppState>,
549) -> Result<HttpResponse, ActixError> {
550    let path = req.uri().path().to_string();
551    let auth_pk = extract_pubkey(&req).await;
552    let agent = agent_uri(auth_pk.as_ref());
553    enforce_write(&state, &path, AccessMode::Write, agent.as_deref()).await?;
554
555    match state.storage.delete(&path).await {
556        Ok(()) => Ok(HttpResponse::NoContent().finish()),
557        Err(PodError::NotFound(_)) => Ok(HttpResponse::NotFound().finish()),
558        Err(e) => Err(to_actix(e)),
559    }
560}
561
562async fn handle_options(req: HttpRequest) -> Result<HttpResponse, ActixError> {
563    let path = req.uri().path().to_string();
564    let o = ldp::options_for(&path);
565    let mut rsp = HttpResponse::NoContent().finish();
566    if let Ok(v) = header::HeaderValue::from_str(&o.allow.join(", ")) {
567        rsp.headers_mut()
568            .insert(header::HeaderName::from_static("allow"), v);
569    }
570    if let Some(ap) = o.accept_post {
571        if let Ok(v) = header::HeaderValue::from_str(ap) {
572            rsp.headers_mut()
573                .insert(header::HeaderName::from_static("accept-post"), v);
574        }
575    }
576    if let Ok(v) = header::HeaderValue::from_str(o.accept_patch) {
577        rsp.headers_mut()
578            .insert(header::HeaderName::from_static("accept-patch"), v);
579    }
580    if let Ok(v) = header::HeaderValue::from_str(o.accept_ranges) {
581        rsp.headers_mut()
582            .insert(header::HeaderName::from_static("accept-ranges"), v);
583    }
584    Ok(rsp)
585}
586
587// ---------------------------------------------------------------------------
588// .well-known handlers
589// ---------------------------------------------------------------------------
590
591async fn handle_well_known_solid(state: web::Data<AppState>) -> HttpResponse {
592    let doc = interop::well_known_solid(&state.nodeinfo.base_url, &state.nodeinfo.base_url);
593    HttpResponse::Ok()
594        .content_type("application/ld+json")
595        .json(doc)
596}
597
598#[derive(Debug, Deserialize)]
599struct WebFingerQuery {
600    resource: Option<String>,
601}
602
603async fn handle_well_known_webfinger(
604    state: web::Data<AppState>,
605    q: web::Query<WebFingerQuery>,
606) -> HttpResponse {
607    let resource = q.resource.clone().unwrap_or_else(|| {
608        format!(
609            "acct:anonymous@{}",
610            state
611                .nodeinfo
612                .base_url
613                .trim_start_matches("http://")
614                .trim_start_matches("https://")
615        )
616    });
617    let webid = format!("{}/profile/card#me", state.nodeinfo.base_url.trim_end_matches('/'));
618    match interop::webfinger_response(&resource, &state.nodeinfo.base_url, &webid) {
619        Some(jrd) => HttpResponse::Ok()
620            .content_type("application/jrd+json")
621            .json(jrd),
622        None => HttpResponse::NotFound().finish(),
623    }
624}
625
626async fn handle_well_known_nodeinfo(state: web::Data<AppState>) -> HttpResponse {
627    let doc = interop::nodeinfo_discovery(&state.nodeinfo.base_url);
628    HttpResponse::Ok()
629        .content_type("application/json")
630        .json(doc)
631}
632
633async fn handle_well_known_nodeinfo_2_1(state: web::Data<AppState>) -> HttpResponse {
634    let doc = interop::nodeinfo_2_1(
635        &state.nodeinfo.software_name,
636        &state.nodeinfo.software_version,
637        state.nodeinfo.open_registrations,
638        state.nodeinfo.total_users,
639    );
640    HttpResponse::Ok()
641        .content_type("application/json")
642        .json(doc)
643}
644
645#[cfg(feature = "did-nostr")]
646async fn handle_well_known_did_nostr(
647    state: web::Data<AppState>,
648    path: web::Path<String>,
649) -> HttpResponse {
650    let pubkey = path.into_inner();
651    let also = vec![format!(
652        "{}/profile/card#me",
653        state.nodeinfo.base_url.trim_end_matches('/')
654    )];
655    let doc = interop::did_nostr::did_nostr_document(&pubkey, &also);
656    HttpResponse::Ok()
657        .content_type("application/did+json")
658        .json(doc)
659}
660
661// ---------------------------------------------------------------------------
662// Percent-decode + dotdot re-check middleware
663// ---------------------------------------------------------------------------
664
665/// Actix middleware that rejects requests containing `..` path-traversal sequences.
666pub struct PathTraversalGuard;
667
668impl<S, B> Transform<S, ServiceRequest> for PathTraversalGuard
669where
670    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
671    B: 'static,
672{
673    type Response = ServiceResponse<EitherBody<B, BoxBody>>;
674    type Error = ActixError;
675    type InitError = ();
676    type Transform = PathTraversalGuardMiddleware<S>;
677    type Future = Ready<Result<Self::Transform, Self::InitError>>;
678
679    fn new_transform(&self, service: S) -> Self::Future {
680        ready(Ok(PathTraversalGuardMiddleware { service }))
681    }
682}
683
684/// Per-request service instance produced by [`PathTraversalGuard`].
685pub struct PathTraversalGuardMiddleware<S> {
686    service: S,
687}
688
689impl<S, B> Service<ServiceRequest> for PathTraversalGuardMiddleware<S>
690where
691    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
692    B: 'static,
693{
694    type Response = ServiceResponse<EitherBody<B, BoxBody>>;
695    type Error = ActixError;
696    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
697
698    actix_web::dev::forward_ready!(service);
699
700    fn call(&self, req: ServiceRequest) -> Self::Future {
701        // Decode the raw path twice so that `%252e%252e` → `%2e%2e` →
702        // `..` can be caught even though NormalizePath already ran once.
703        let raw = req.path().to_string();
704        if path_is_traversal(&raw) {
705            let rsp = HttpResponse::BadRequest().body("invalid path: traversal rejected");
706            let sr = req.into_response(rsp.map_into_boxed_body());
707            return Box::pin(async move { Ok(sr.map_into_right_body()) });
708        }
709        let fut = self.service.call(req);
710        Box::pin(async move {
711            let resp = fut.await?;
712            Ok(resp.map_into_left_body())
713        })
714    }
715}
716
717fn path_is_traversal(path: &str) -> bool {
718    // Two passes of percent-decode catches double-encoding.
719    let once: String = percent_decode_str(path).decode_utf8_lossy().into_owned();
720    let twice: String = percent_decode_str(&once).decode_utf8_lossy().into_owned();
721    for seg in once.split('/').chain(twice.split('/')) {
722        if seg == ".." || seg == "." {
723            return true;
724        }
725    }
726    // Also flag any raw escape sequences that decode to a traversal
727    // segment even when buried inside a component (e.g. `foo%2f..%2fbar`).
728    if twice.contains("/../") || twice.starts_with("../") || twice.ends_with("/..") {
729        return true;
730    }
731    false
732}
733
734// ---------------------------------------------------------------------------
735// Sprint 11 (row 158): top-level 5xx logging middleware.
736//
737// JSS ref: commit 5b34d72 (#312) — "Top-level Fastify error handler,
738// full stack on 5xx". Mirror the behaviour in actix: intercept any
739// response whose status is 5xx, emit a structured `tracing::error!`
740// with the method, path, status, error chain, and (when
741// `RUST_BACKTRACE=1`) a captured backtrace. The response body is not
742// altered; we only observe.
743// ---------------------------------------------------------------------------
744
745/// Observes outbound responses and logs 5xx results with the full
746/// error chain. Pass-through on 2xx/3xx/4xx. Shaped as an actix
747/// [`Transform`] so it slots into the middleware stack in
748/// [`build_app`].
749pub struct ErrorLoggingMiddleware;
750
751impl<S, B> Transform<S, ServiceRequest> for ErrorLoggingMiddleware
752where
753    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
754    B: 'static,
755{
756    type Response = ServiceResponse<B>;
757    type Error = ActixError;
758    type InitError = ();
759    type Transform = ErrorLoggingMiddlewareService<S>;
760    type Future = Ready<Result<Self::Transform, Self::InitError>>;
761
762    fn new_transform(&self, service: S) -> Self::Future {
763        ready(Ok(ErrorLoggingMiddlewareService { service }))
764    }
765}
766
767/// Per-request service instance produced by [`ErrorLoggingMiddleware`].
768pub struct ErrorLoggingMiddlewareService<S> {
769    service: S,
770}
771
772impl<S, B> Service<ServiceRequest> for ErrorLoggingMiddlewareService<S>
773where
774    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
775    B: 'static,
776{
777    type Response = ServiceResponse<B>;
778    type Error = ActixError;
779    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
780
781    actix_web::dev::forward_ready!(service);
782
783    fn call(&self, req: ServiceRequest) -> Self::Future {
784        // Snapshot fields we need for the log line before the request
785        // moves into the inner service.
786        let method = req.method().as_str().to_string();
787        let path = req.path().to_string();
788
789        let fut = self.service.call(req);
790        Box::pin(async move {
791            let response = fut.await?;
792            let status = response.status();
793            if status.is_server_error() {
794                log_5xx(&method, &path, status, response.response().error());
795            }
796            Ok(response)
797        })
798    }
799}
800
801/// Emit the structured 5xx log line. Captures a backtrace only when
802/// `RUST_BACKTRACE=1` is set so production logs don't bloat unless the
803/// operator opted in.
804fn log_5xx(method: &str, path: &str, status: StatusCode, error: Option<&actix_web::Error>) {
805    // Full error chain — include `source()` walk so downstream
806    // `PodError` variants surface instead of being swallowed by
807    // actix's top-level wrapper.
808    let chain = match error {
809        Some(e) => format_error_chain(e),
810        None => "<no error attached to response>".to_string(),
811    };
812
813    let backtrace = if std::env::var("RUST_BACKTRACE").ok().as_deref() == Some("1") {
814        Some(std::backtrace::Backtrace::force_capture().to_string())
815    } else {
816        None
817    };
818
819    tracing::error!(
820        target: "solid_pod_rs_server::http",
821        method = %method,
822        path = %path,
823        status = %status.as_u16(),
824        error.chain = %chain,
825        backtrace = backtrace.as_deref().unwrap_or(""),
826        "5xx response"
827    );
828}
829
830/// Walk an actix `Error` + its `source()` chain into a single
831/// human-readable string (one segment per cause, separated by ` -> `).
832///
833/// `actix_web::Error` does not expose a stable `source()` accessor,
834/// and `ResponseError` in actix-web 4 does not extend
835/// [`std::error::Error`]. We surface the `Display` form of the
836/// response error (which captures the message operators care about
837/// on 5xx) and append the actix `Debug` dump for deep diagnosis —
838/// the dump already includes the inner cause chain that actix-http
839/// preserves internally.
840fn format_error_chain(e: &actix_web::Error) -> String {
841    let summary = format!("{}", e.as_response_error());
842    let debug = format!("{e:?}");
843    if debug == summary || debug.is_empty() {
844        summary
845    } else {
846        format!("{summary} -> {debug}")
847    }
848}
849
850// ---------------------------------------------------------------------------
851// Dotfile allowlist middleware
852// ---------------------------------------------------------------------------
853
854/// Actix middleware that blocks dotfile paths unless they appear on the allowlist.
855pub struct DotfileGuard {
856    allow: Arc<DotfileAllowlist>,
857}
858
859impl DotfileGuard {
860    pub fn new(allow: Arc<DotfileAllowlist>) -> Self {
861        Self { allow }
862    }
863}
864
865impl<S, B> Transform<S, ServiceRequest> for DotfileGuard
866where
867    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
868    B: 'static,
869{
870    type Response = ServiceResponse<EitherBody<B, BoxBody>>;
871    type Error = ActixError;
872    type InitError = ();
873    type Transform = DotfileGuardMiddleware<S>;
874    type Future = Ready<Result<Self::Transform, Self::InitError>>;
875
876    fn new_transform(&self, service: S) -> Self::Future {
877        ready(Ok(DotfileGuardMiddleware {
878            service,
879            allow: self.allow.clone(),
880        }))
881    }
882}
883
884/// Per-request service instance produced by [`DotfileGuard`].
885pub struct DotfileGuardMiddleware<S> {
886    service: S,
887    allow: Arc<DotfileAllowlist>,
888}
889
890impl<S, B> Service<ServiceRequest> for DotfileGuardMiddleware<S>
891where
892    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = ActixError> + 'static,
893    B: 'static,
894{
895    type Response = ServiceResponse<EitherBody<B, BoxBody>>;
896    type Error = ActixError;
897    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
898
899    actix_web::dev::forward_ready!(service);
900
901    fn call(&self, req: ServiceRequest) -> Self::Future {
902        let path = req.path().to_string();
903        // Whitelist the well-known discovery paths even though they
904        // contain a dotfile component — they are part of Solid's stable
905        // interop surface.
906        let allow_wellknown = path.starts_with("/.well-known/");
907        if !allow_wellknown {
908            let pb = PathBuf::from(&path);
909            if !self.allow.is_allowed(Path::new(&pb)) {
910                let rsp = HttpResponse::Forbidden().body("dotfile path denied by allowlist");
911                let sr = req.into_response(rsp.map_into_boxed_body());
912                return Box::pin(async move { Ok(sr.map_into_right_body()) });
913            }
914        }
915        let fut = self.service.call(req);
916        Box::pin(async move {
917            let resp = fut.await?;
918            Ok(resp.map_into_left_body())
919        })
920    }
921}
922
923// ---------------------------------------------------------------------------
924// Public app builder
925// ---------------------------------------------------------------------------
926
927/// Build the complete actix `App` for the Solid Pod server. Both the
928/// binary (`main.rs`) and the workspace integration tests call this.
929///
930/// The returned `App` is fully-configured: route table, normaliser,
931/// path-traversal guard, dotfile allowlist, body cap, CORS middleware
932/// (when available), rate-limit middleware (when available), and WAC
933/// enforcement.
934pub fn build_app(
935    state: AppState,
936) -> App<
937    impl actix_web::dev::ServiceFactory<
938        ServiceRequest,
939        Config = (),
940        Response = ServiceResponse<
941            EitherBody<EitherBody<BoxBody>>,
942        >,
943        Error = ActixError,
944        InitError = (),
945    >,
946> {
947    let body_cap = state.body_cap;
948    let dotfiles = state.dotfiles.clone();
949
950    let mut app = App::new()
951        .app_data(web::Data::new(state.clone()))
952        .app_data(web::PayloadConfig::new(body_cap))
953        // Sprint 11 (row 158): outermost layer so it observes every
954        // response — including those that short-circuited in inner
955        // guards. Wrapping first means `wrap()` applies it last in
956        // actix's stack order.
957        .wrap(ErrorLoggingMiddleware)
958        // `MergeOnly` collapses duplicate slashes (//a → /a) without
959        // stripping the trailing slash, which is the container/resource
960        // discriminator in LDP.
961        .wrap(NormalizePath::new(TrailingSlash::MergeOnly))
962        .wrap(PathTraversalGuard)
963        .wrap(DotfileGuard::new(dotfiles));
964
965    // CORS / rate-limit: middleware is driven by the library types from
966    // S7-A. We register pass-through headers when the env-driven policy
967    // permits. The middleware is a no-op today beyond emitting the
968    // policy's `response_headers` on every response; full preflight
969    // handling lives in the sibling S7-A work.
970    app = app
971        .route(
972            "/.well-known/solid",
973            web::get().to(handle_well_known_solid),
974        )
975        .route(
976            "/.well-known/webfinger",
977            web::get().to(handle_well_known_webfinger),
978        )
979        .route(
980            "/.well-known/nodeinfo",
981            web::get().to(handle_well_known_nodeinfo),
982        )
983        .route(
984            "/.well-known/nodeinfo/2.1",
985            web::get().to(handle_well_known_nodeinfo_2_1),
986        );
987
988    #[cfg(feature = "did-nostr")]
989    {
990        app = app.route(
991            "/.well-known/did/nostr/{pubkey}.json",
992            web::get().to(handle_well_known_did_nostr),
993        );
994    }
995
996    // Container POST (trailing slash) must register before the catch-all
997    // so the trailing-slash variant wins.
998    app.route("/{tail:.*}/", web::post().to(handle_post))
999        .route("/{tail:.*}", web::get().to(handle_get))
1000        .route("/{tail:.*}", web::head().to(handle_get))
1001        .route("/{tail:.*}", web::put().to(handle_put))
1002        .route("/{tail:.*}", web::patch().to(handle_patch))
1003        .route("/{tail:.*}", web::delete().to(handle_delete))
1004        .route("/{tail:.*}", web::method(actix_web::http::Method::OPTIONS).to(handle_options))
1005}