manta-server 2.0.0-beta.61

Manta HTTP server — single API that proxies to CSM / Ochami backends.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
//! Top-level Axum handlers module.
//!
//! `mod.rs` keeps:
//! - request extractors (`BearerToken`, `SiteName`, `RequestCtx`, `SiteHeader`)
//! - the `ErrorResponse` body type + error mappers (`to_handler_error`,
//!   `serialize_or_500`)
//! - guard helpers (`require_vault`, `require_k8s_url`,
//!   `validate_repo_list_lengths`, `parse_iso_datetime`)
//! - the cross-handler `resolve_xnames_from_request` helper
//! - the `health` endpoint
//!
//! Every other handler lives in a per-resource sub-module (mirroring
//! the `service/` layout) and is re-exported here so `routes.rs` and
//! `api_doc.rs` can keep referencing `handlers::X` unchanged.

use std::sync::Arc;

use axum::{
  Json,
  extract::FromRequestParts,
  http::{StatusCode, header, request::Parts},
  response::IntoResponse,
};
use manta_backend_dispatcher::error::Error as BackendError;
use serde::Serialize;
use utoipa::{IntoParams, ToSchema};

use super::ServerState;
use super::common::app_context::InfraContext;

mod analysis;
mod auth;
mod boot_parameters;
mod cluster;
mod configuration;
mod console;
mod ephemeral_env;
mod group;
mod hardware;
mod hw_cluster;
mod image;
mod kernel_parameters;
mod migrate;
mod node;
mod power;
mod redfish_endpoints;
mod sat_file;
mod session;
mod template;

pub use analysis::*;
pub use auth::*;
pub use boot_parameters::*;
pub use cluster::*;
pub use configuration::*;
pub use console::*;
pub use ephemeral_env::*;
pub use group::*;
pub use hardware::*;
pub use hw_cluster::*;
pub use image::*;
pub use kernel_parameters::*;
pub use migrate::*;
pub use node::*;
pub use power::*;
pub use redfish_endpoints::*;
pub use sat_file::*;
pub use session::*;
pub use template::*;

// ---------------------------------------------------------------------------
// Bearer-token extractor — eliminates token-extraction boilerplate
// ---------------------------------------------------------------------------

/// Axum extractor that pulls the token from `Authorization: Bearer <token>`.
pub struct BearerToken(pub String);

impl<S: Send + Sync> FromRequestParts<S> for BearerToken {
  type Rejection = (StatusCode, Json<ErrorResponse>);

  async fn from_request_parts(
    parts: &mut Parts,
    _state: &S,
  ) -> Result<Self, Self::Rejection> {
    let auth_header = parts
      .headers
      .get(header::AUTHORIZATION)
      .and_then(|v| v.to_str().ok())
      .ok_or_else(|| {
        (
          StatusCode::UNAUTHORIZED,
          Json(ErrorResponse {
            error: "Missing Authorization header".to_string(),
          }),
        )
      })?;

    let token = auth_header
      .strip_prefix("Bearer ")
      .or_else(|| auth_header.strip_prefix("bearer "))
      .ok_or_else(|| {
        (
          StatusCode::UNAUTHORIZED,
          Json(ErrorResponse {
            error: "Authorization header must use Bearer scheme".to_string(),
          }),
        )
      })?;

    Ok(BearerToken(token.to_string()))
  }
}

/// Axum extractor that reads the target site name from `X-Manta-Site`.
///
/// Every handler that touches backend APIs requires this header so the server
/// knows which site's CA certificate, base URL, and credentials to use.
pub struct SiteName(pub String);

impl<S: Send + Sync> FromRequestParts<S> for SiteName {
  type Rejection = (StatusCode, Json<ErrorResponse>);

  async fn from_request_parts(
    parts: &mut Parts,
    _state: &S,
  ) -> Result<Self, Self::Rejection> {
    let site = parts
      .headers
      .get("X-Manta-Site")
      .and_then(|v| v.to_str().ok())
      .ok_or_else(|| {
        (
          StatusCode::BAD_REQUEST,
          Json(ErrorResponse {
            error: "Missing X-Manta-Site header".to_string(),
          }),
        )
      })?;
    Ok(SiteName(site.to_string()))
  }
}

/// Required header parameter present on every authenticated endpoint.
///
/// Tells the server which cluster to route the request to.
/// **Not** an authentication mechanism — documented as a plain header parameter.
///
/// The field is consumed by the `utoipa::IntoParams` derive macro at compile
/// time to generate the OpenAPI spec; the runtime extractor is [`SiteName`].
#[derive(IntoParams)]
#[into_params(parameter_in = Header)]
#[allow(dead_code)]
pub struct SiteHeader {
  /// Name of the target cluster (matches a site configured in the server).
  #[param(required = true, rename = "X-Manta-Site")]
  pub x_manta_site: String,
}

// ---------------------------------------------------------------------------
// RequestCtx — bundles the State + BearerToken + SiteName extractors that
// every authenticated handler opens with. Plus `infra()` for the
// `state.infra_context(&site_name).map_err(to_handler_error)?` line that
// follows. Each handler shrinks by 3-4 lines.
// ---------------------------------------------------------------------------

/// Bundled extractor for `State<Arc<ServerState>>` + `BearerToken` +
/// `SiteName`. Use it in handler signatures instead of the three
/// individual extractors when all three are needed (the typical case).
///
/// The unauthenticated `/auth/*` handlers and the health endpoint
/// still use explicit extractors — they don't need a Bearer token.
pub struct RequestCtx {
  /// Shared server state (backend dispatcher, per-site config, TLS
  /// material, optional Vault + k8s URLs).
  pub state: Arc<ServerState>,
  /// Bearer token extracted from the inbound `Authorization` header.
  pub token: String,
  /// Site name extracted from the inbound `X-Manta-Site` header;
  /// used to pick the right `[sites.X]` entry from `state`.
  pub site_name: String,
}

impl FromRequestParts<Arc<ServerState>> for RequestCtx {
  type Rejection = (StatusCode, Json<ErrorResponse>);

  async fn from_request_parts(
    parts: &mut Parts,
    state: &Arc<ServerState>,
  ) -> Result<Self, Self::Rejection> {
    let BearerToken(token) =
      BearerToken::from_request_parts(parts, state).await?;
    let SiteName(site_name) =
      SiteName::from_request_parts(parts, state).await?;
    // Validate the site resolves to a configured backend NOW, so the
    // per-handler `ctx.infra()` call below cannot fail. Returning the
    // 404-mapped error from extraction is the same shape the handler
    // would have produced.
    state.infra_context(&site_name).map_err(to_handler_error)?;
    Ok(Self {
      state: Arc::clone(state),
      token,
      site_name,
    })
  }
}

impl RequestCtx {
  /// Borrow the per-site infrastructure (backend, base URLs, root
  /// cert, optional Vault + k8s URLs). Infallible — the site was
  /// validated during extraction; a missing site would have failed
  /// the request before the handler body ran.
  pub fn infra(&self) -> InfraContext<'_> {
    self
      .state
      .infra_context(&self.site_name)
      .expect("site validated during RequestCtx extraction")
  }
}

/// Render an error and its `source()` chain as a multi-line string.
///
/// `thiserror`'s `Display` only emits the top-level message; nested
/// errors reached via `std::error::Error::source()` are dropped. This
/// walks the chain so the server log carries the full causal context
/// (e.g. the underlying TLS / connect error behind a `reqwest::Error`).
/// Works uniformly for thiserror-derived and `anyhow::Error` chains.
fn format_with_causes(e: &(dyn std::error::Error + 'static)) -> String {
  let mut out = e.to_string();
  let mut src = e.source();
  while let Some(cause) = src {
    out.push_str("\n  caused by: ");
    out.push_str(&cause.to_string());
    src = cause.source();
  }
  out
}

/// Convert a `BackendError` into the best-fitting HTTP error response.
///
/// `pub` (rather than `pub(crate)`) so the integration tests in
/// `crates/manta-server/tests/` can exercise the mapping directly.
//
// `e` is consumed via `e.to_string()` at the end; technically it could
// take `&BackendError`, but the canonical call shape is
// `.map_err(to_handler_error)?` which threads the value through.
// Switching to a reference would force every site to write
// `.map_err(|e| to_handler_error(&e))?` — losing the point-free form
// across hundreds of handler call sites is a worse trade than the
// ineffectual `Drop` here.
#[allow(clippy::needless_pass_by_value)]
pub fn to_handler_error(e: BackendError) -> (StatusCode, Json<ErrorResponse>) {
  let status = match &e {
    BackendError::NotFound(_)
    | BackendError::SessionNotFound
    | BackendError::ConfigurationNotFound => StatusCode::NOT_FOUND,
    BackendError::Conflict(_)
    | BackendError::ConfigurationAlreadyExistsError(_) => StatusCode::CONFLICT,
    BackendError::BadRequest(_)
    | BackendError::InvalidPattern(_)
    | BackendError::UnsupportedBackend(_)
    | BackendError::InvalidNodeId(_) => StatusCode::BAD_REQUEST,
    BackendError::AuthenticationTokenNotFound(_)
    | BackendError::JwtMalformed(_) => StatusCode::UNAUTHORIZED,
    BackendError::InsufficientResources(_) => StatusCode::UNPROCESSABLE_ENTITY,
    // Backend HTTP errors carry the originating status code (CSM or
    // ochami). Propagate it verbatim when it's a valid HTTP status, so
    // a 404 from the backend surfaces as a 404 from manta-server (not a
    // generic 500). Fall back to 502 Bad Gateway if the embedded code
    // is outside the HTTP status range — that's the canonical "upstream
    // returned something nonsensical" signal.
    BackendError::CsmError { status, .. } => {
      StatusCode::from_u16(*status).unwrap_or(StatusCode::BAD_GATEWAY)
    }
    // The CSM-side reqwest client times out via `NetError(reqwest::Error)`;
    // surface that as 504 Gateway Timeout so the CLI sees a distinct
    // status (not a generic 500) and the body explicitly names the hop.
    BackendError::NetError(rqe) if rqe.is_timeout() => {
      StatusCode::GATEWAY_TIMEOUT
    }
    _ => StatusCode::INTERNAL_SERVER_ERROR,
  };
  let chain = format_with_causes(&e);
  if status == StatusCode::INTERNAL_SERVER_ERROR {
    tracing::error!("Internal error: {}", chain);
  } else {
    tracing::debug!("Service error {}: {}", status, chain);
  }
  let error_body = categorise_backend_error_body(&e);
  (status, Json(ErrorResponse { error: error_body }))
}

/// Rewrite the error body when the underlying `BackendError` is a
/// timeout or connect failure on the manta-server -> CSM hop. The
/// rewritten body leads with which hop timed out so the operator
/// (and the CLI's own `categorise_server_error`) can name it.
fn categorise_backend_error_body(e: &BackendError) -> String {
  match e {
    BackendError::NetError(rqe) if rqe.is_timeout() => {
      format!(
        "manta-server -> CSM call timed out (csm-rs reqwest \
         HTTP_REQUEST_TIMEOUT, default 15 min). CSM did not send \
         response headers in time. Original: {rqe}"
      )
    }
    BackendError::NetError(rqe) if rqe.is_connect() => {
      format!(
        "manta-server -> CSM connect failed. Could not establish a \
         TCP/TLS connection to the configured CSM endpoint. Check \
         the site's backend URL and network reachability. Original: {rqe}"
      )
    }
    _ => e.to_string(),
  }
}

pub(super) fn serialize_or_500<T: Serialize>(
  v: &T,
) -> Result<serde_json::Value, (StatusCode, Json<ErrorResponse>)> {
  serde_json::to_value(v).map_err(|e| {
    let chain = format_with_causes(&e);
    tracing::error!("Failed to serialize: {}", chain);
    (
      StatusCode::INTERNAL_SERVER_ERROR,
      Json(ErrorResponse {
        error: format!("Failed to serialize: {e}"),
      }),
    )
  })
}

pub(super) fn require_vault(
  url: Option<&str>,
) -> Result<&str, (StatusCode, Json<ErrorResponse>)> {
  url.ok_or_else(|| {
    (
      StatusCode::NOT_IMPLEMENTED,
      Json(ErrorResponse {
        error: "vault_base_url not configured on this server".into(),
      }),
    )
  })
}

pub(super) fn require_k8s_url(
  url: Option<&str>,
) -> Result<&str, (StatusCode, Json<ErrorResponse>)> {
  url.ok_or_else(|| {
    (
      StatusCode::NOT_IMPLEMENTED,
      Json(ErrorResponse {
        error: "k8s_api_url not configured on this server".into(),
      }),
    )
  })
}

pub(super) fn validate_repo_list_lengths(
  repo_names: &[String],
  repo_last_commit_ids: &[String],
) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
  if repo_names.len() != repo_last_commit_ids.len() {
    return Err((
      StatusCode::BAD_REQUEST,
      Json(ErrorResponse {
        error: format!(
          "repo_names ({}) and repo_last_commit_ids ({}) must have the same length",
          repo_names.len(),
          repo_last_commit_ids.len()
        ),
      }),
    ));
  }
  Ok(())
}

pub(super) fn parse_iso_datetime(
  field: &str,
  value: &str,
) -> Result<chrono::NaiveDateTime, (StatusCode, Json<ErrorResponse>)> {
  chrono::NaiveDateTime::parse_from_str(value, "%Y-%m-%dT%H:%M:%S").map_err(
    |e| {
      (
        StatusCode::BAD_REQUEST,
        Json(ErrorResponse {
          error: format!("Invalid '{field}' datetime '{value}': {e}"),
        }),
      )
    },
  )
}

// ---------------------------------------------------------------------------
// Shared response types
// ---------------------------------------------------------------------------

/// Standard JSON error body returned by all failed endpoints.
#[derive(Serialize, ToSchema)]
pub struct ErrorResponse {
  /// Human-readable explanation of the failure. Never includes
  /// stack traces, credentials, or internal type names.
  pub error: String,
}

// ---------------------------------------------------------------------------
// Health check
// ---------------------------------------------------------------------------

/// GET /health — liveness probe; returns `{"status":"ok"}`.
#[utoipa::path(get, path = "/health", tag = "system",
  responses(
    (status = 200, description = "Server is healthy"),
  )
)]
#[tracing::instrument(skip_all)]
pub async fn health() -> impl IntoResponse {
  Json(serde_json::json!({ "status": "ok" }))
}

// ---------------------------------------------------------------------------
// Shared helpers
// ---------------------------------------------------------------------------

/// Resolve target xnames from an explicit list or an HSM group name.
/// Returns 400 if neither is provided.
async fn resolve_xnames_from_request(
  infra: &crate::server::common::app_context::InfraContext<'_>,
  token: &str,
  xnames_expression: Option<&str>,
  group_name_opt: Option<&str>,
) -> Result<Vec<String>, (StatusCode, Json<ErrorResponse>)> {
  if let Some(expr) = xnames_expression
    && !expr.is_empty()
  {
    return crate::service::node_ops::from_user_hosts_expression_to_xname_vec(
      infra, token, expr, false,
    )
    .await
    .map_err(to_handler_error);
  }
  if let Some(group) = group_name_opt {
    return crate::service::node_ops::resolve_target_nodes(
      infra,
      token,
      None,
      Some(group),
      None,
    )
    .await
    .map_err(to_handler_error);
  }
  Err((
    StatusCode::BAD_REQUEST,
    Json(ErrorResponse {
      error: "At least one of 'xnames' or 'hsm_group' must be provided"
        .to_string(),
    }),
  ))
}

#[cfg(test)]
mod tests {
  //! Pure-logic locks for the helpers in this module that don't need
  //! a live router. Route- and error-mapping coverage lives in
  //! `crates/manta-server/tests/server_routes.rs`.

  use super::format_with_causes;
  use std::error::Error;
  use std::fmt;

  /// Toy error whose `source()` returns the inner error, so we can
  /// build a fixed-depth `Display + Error` chain for the walk test.
  #[derive(Debug)]
  struct Chain {
    msg: &'static str,
    src: Option<Box<Chain>>,
  }
  impl fmt::Display for Chain {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
      f.write_str(self.msg)
    }
  }
  impl Error for Chain {
    fn source(&self) -> Option<&(dyn Error + 'static)> {
      self.src.as_deref().map(|s| s as &(dyn Error + 'static))
    }
  }

  #[test]
  fn format_with_causes_single_error_has_no_caused_by() {
    let e = Chain {
      msg: "boom",
      src: None,
    };
    assert_eq!(format_with_causes(&e), "boom");
  }

  #[test]
  fn format_with_causes_two_level_chain_is_indented() {
    let e = Chain {
      msg: "outer",
      src: Some(Box::new(Chain {
        msg: "inner",
        src: None,
      })),
    };
    assert_eq!(format_with_causes(&e), "outer\n  caused by: inner");
  }

  #[test]
  fn format_with_causes_walks_to_the_root() {
    // Deeply nested chain — emulates anyhow's `with_context()` stack.
    let e = Chain {
      msg: "top",
      src: Some(Box::new(Chain {
        msg: "middle",
        src: Some(Box::new(Chain {
          msg: "root",
          src: None,
        })),
      })),
    };
    assert_eq!(
      format_with_causes(&e),
      "top\n  caused by: middle\n  caused by: root"
    );
  }
}