Skip to main content

mii_http/
check.rs

1//! Semantic validation of a parsed Spec.
2
3use crate::diag::Diag;
4use crate::spec::*;
5use std::collections::HashSet;
6
7pub fn check(spec: &Spec) -> Vec<Diag> {
8    tracing::debug!(endpoints = spec.endpoints.len(), "check::check");
9    let mut diags = Vec::new();
10    check_setup(&spec.setup, &mut diags);
11    let mut seen: HashSet<(Method, String)> = HashSet::new();
12    for ep in &spec.endpoints {
13        check_endpoint(ep, &spec.setup, &mut diags);
14        let key = (ep.method, normalize_path(&ep.path));
15        if !seen.insert(key) {
16            diags.push(Diag::warning(
17                format!("duplicate endpoint {} {}", ep.method.as_str(), ep.path),
18                ep.span.clone(),
19                "this overrides another endpoint with the same method+path",
20            ));
21        }
22    }
23    diags
24}
25
26fn normalize_path(path: &str) -> String {
27    // collapse parameter type annotations for collision detection
28    path.split('/')
29        .map(|seg| {
30            if let Some(rest) = seg.strip_prefix(':') {
31                format!(":{}", rest.split(':').next().unwrap_or(""))
32            } else {
33                seg.to_string()
34            }
35        })
36        .collect::<Vec<_>>()
37        .join("/")
38}
39
40fn check_setup(setup: &Setup, diags: &mut Vec<Diag>) {
41    if let Some(AuthSpec::BearerHeader { header, span }) = &setup.auth {
42        if header.is_empty() {
43            diags.push(Diag::error(
44                "AUTH header name is empty",
45                span.clone(),
46                "specify a header name",
47            ));
48        }
49        if setup.jwt_verifier.is_none() && setup.token_secret.is_none() {
50            diags.push(
51                Diag::warning(
52                    "AUTH Bearer configured without JWT_VERIFIER or TOKEN_SECRET",
53                    span.clone(),
54                    "tokens cannot be validated; any value will be accepted",
55                )
56                .with_note("add `JWT_VERIFIER [ENV ...]` or `TOKEN_SECRET [ENV ...]`"),
57            );
58        }
59    }
60}
61
62fn check_endpoint(ep: &Endpoint, _setup: &Setup, diags: &mut Vec<Diag>) {
63    // unique names within scope
64    check_unique(&ep.query_params, "query parameter", diags);
65    check_unique(&ep.headers, "header", diags);
66    let var_names: HashSet<&str> = ep.vars.iter().map(|v| v.name.as_str()).collect();
67    if var_names.len() != ep.vars.len() {
68        diags.push(Diag::error(
69            "duplicate VAR name",
70            ep.span.clone(),
71            "VAR names must be unique within an endpoint",
72        ));
73    }
74
75    // path params
76    let path_params: HashSet<&str> = ep
77        .path_segments
78        .iter()
79        .filter_map(|s| match s {
80            PathSegment::Param { name, .. } => Some(name.as_str()),
81            _ => None,
82        })
83        .collect();
84
85    // body schema validation
86    if let Some(body) = &ep.body {
87        match body {
88            BodySpec::Form { fields, .. } => {
89                check_unique(fields, "form field", diags);
90                for f in fields {
91                    forbid_stdin_only_in_field(&f.ty, &f.name, "form field", diags, &f.span);
92                }
93            }
94            BodySpec::Json {
95                schema: Some(schema),
96                ..
97            } => {
98                let mut names = HashSet::new();
99                for f in &schema.fields {
100                    if !names.insert(f.name.clone()) {
101                        diags.push(Diag::error(
102                            format!("duplicate JSON field `{}`", f.name),
103                            f.span.clone(),
104                            "JSON field names must be unique",
105                        ));
106                    }
107                    let inner_ty = match &f.ty {
108                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
109                    };
110                    forbid_stdin_only_in_field(inner_ty, &f.name, "JSON field", diags, &f.span);
111                    security_check_type(inner_ty, &f.span, diags);
112                }
113            }
114            _ => {}
115        }
116    }
117
118    for q in &ep.query_params {
119        forbid_stdin_only_in_field(&q.ty, &q.name, "query parameter", diags, &q.span);
120        security_check_type(&q.ty, &q.span, diags);
121    }
122    for h in &ep.headers {
123        forbid_stdin_only_in_field(&h.ty, &h.name, "header", diags, &h.span);
124        security_check_type(&h.ty, &h.span, diags);
125    }
126    for seg in &ep.path_segments {
127        if let PathSegment::Param { name, ty, span } = seg {
128            forbid_stdin_only_in_field(ty, name, "path parameter", diags, span);
129            security_check_type(ty, span, diags);
130        }
131    }
132
133    // exec references resolve
134    let scope = RefScope {
135        query: ep.query_params.iter().map(|f| f.name.as_str()).collect(),
136        headers: ep.headers.iter().map(|f| f.name.as_str()).collect(),
137        path: path_params,
138        vars: var_names,
139        ep,
140    };
141    for stage in &ep.exec.pipeline {
142        match stage {
143            ExecStage::Source { reference, span } => {
144                check_ref(reference, span, &scope, diags);
145            }
146            ExecStage::Command { tokens, .. } => {
147                if let Some(first) = tokens.first() {
148                    check_executable_token(first, diags);
149                }
150                for t in tokens {
151                    check_token(t, &scope, diags);
152                }
153            }
154        }
155    }
156
157    // GET should not have a BODY
158    if ep.method == Method::Get && ep.body.is_some() {
159        diags.push(Diag::warning(
160            "GET endpoint declares a BODY",
161            ep.span.clone(),
162            "request bodies on GET requests are unusual",
163        ));
164    }
165}
166
167fn check_unique(fields: &[NamedField], kind: &str, diags: &mut Vec<Diag>) {
168    let mut seen = HashSet::new();
169    for f in fields {
170        if !seen.insert(f.name.clone()) {
171            diags.push(Diag::error(
172                format!("duplicate {} `{}`", kind, f.name),
173                f.span.clone(),
174                "names must be unique",
175            ));
176        }
177    }
178}
179
180fn forbid_stdin_only_in_field(
181    ty: &TypeExpr,
182    name: &str,
183    kind: &str,
184    diags: &mut Vec<Diag>,
185    span: &Span,
186) {
187    // `string` and `json` (untyped) are allowed to be *declared* on any
188    // field; they are reserved for stdin use, which is enforced at argv
189    // construction time by `check_argv_safety`. Only `binary`, which is
190    // body-only, is rejected at the declaration site.
191    if matches!(ty, TypeExpr::Binary) {
192        diags.push(Diag::error(
193            format!("`binary` type only allowed as BODY for {} `{}`", kind, name),
194            span.clone(),
195            "binary is allowed only on top-level BODY",
196        ));
197    }
198}
199
200fn security_check_type(ty: &TypeExpr, span: &Span, diags: &mut Vec<Diag>) {
201    if let TypeExpr::Regex { pattern, .. } = ty {
202        let suspicious = matches!(
203            pattern.as_str(),
204            ".*" | ".+" | "(.*)" | "(.+)" | "[\\s\\S]*"
205        );
206        if suspicious {
207            diags.push(Diag::warning(
208                format!("permissive regex `/{}/` accepts almost any input", pattern),
209                span.clone(),
210                "consider restricting the pattern",
211            ));
212        }
213        // also check for unanchored .* style
214        if pattern.contains(".*") || pattern.contains(".+") {
215            diags.push(Diag::warning(
216                "regex contains `.*`/`.+` which can match command-injection payloads",
217                span.clone(),
218                "constrain to expected character class (e.g. /[a-zA-Z0-9_-]+/)",
219            ));
220        }
221    }
222}
223
224/// All names declared on an endpoint that an Exec reference can resolve to,
225/// plus a back-pointer to the endpoint for body-schema lookups.
226struct RefScope<'a> {
227    query: HashSet<&'a str>,
228    headers: HashSet<&'a str>,
229    path: HashSet<&'a str>,
230    vars: HashSet<&'a str>,
231    ep: &'a Endpoint,
232}
233
234/// Verify that a `ValueRef` resolves to something declared on the endpoint.
235/// Argv-context safety (forbidding unconstrained types as command arguments)
236/// is handled separately by [`check_argv_safety`].
237fn check_ref(r: &ValueRef, span: &Span, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
238    let ep = scope.ep;
239    let ok = match r {
240        ValueRef::Query(n) => scope.query.contains(n.as_str()),
241        ValueRef::Header(n) => scope.headers.contains(n.as_str()),
242        ValueRef::Path(n) => scope.path.contains(n.as_str()),
243        ValueRef::Var(n) => scope.vars.contains(n.as_str()),
244        ValueRef::Body { path: p } => match (&ep.body, p.is_empty()) {
245            (
246                Some(BodySpec::Json {
247                    schema: Some(schema),
248                    ..
249                }),
250                false,
251            ) => {
252                let head = &p[0];
253                schema.fields.iter().any(|f| &f.name == head)
254            }
255            (Some(BodySpec::Form { fields, .. }), false) if p.len() == 1 => {
256                fields.iter().any(|f| f.name == p[0])
257            }
258            (Some(_), true) => true,
259            (Some(BodySpec::Json { schema: None, .. }), false) => true,
260            _ => false,
261        },
262    };
263    if !ok {
264        diags.push(Diag::error(
265            format!("unresolved reference: {}", r.describe()),
266            span.clone(),
267            "no such field declared on this endpoint",
268        ));
269    }
270}
271
272/// Reject references that, used as a command argv token, would expose
273/// unconstrained user input directly to the command line.
274fn check_argv_safety(r: &ValueRef, span: &Span, ep: &Endpoint, diags: &mut Vec<Diag>) {
275    match r {
276        ValueRef::Query(name) => {
277            if let Some(f) = ep.query_params.iter().find(|f| &f.name == name) {
278                argv_unsafe_named(&f.ty, "query parameter", &f.name, span, diags);
279            }
280        }
281        ValueRef::Header(name) => {
282            if let Some(f) = ep.headers.iter().find(|f| &f.name == name) {
283                argv_unsafe_named(&f.ty, "header", &f.name, span, diags);
284            }
285        }
286        ValueRef::Path(name) => {
287            for seg in &ep.path_segments {
288                if let PathSegment::Param { name: n, ty, .. } = seg
289                    && n == name
290                {
291                    argv_unsafe_named(ty, "path parameter", n, span, diags);
292                }
293            }
294        }
295        ValueRef::Var(name) => {
296            if let Some(v) = ep.vars.iter().find(|v| &v.name == name)
297                && matches!(v.source, ValueSource::Header { .. })
298            {
299                diags.push(
300                    Diag::error(
301                        format!("VAR `{}` from a request header cannot be passed as argv", name),
302                        span.clone(),
303                        "declare a typed HEADER and reference it directly, or pipe the VAR via stdin",
304                    )
305                    .with_note("header-backed VAR values are request input and have no type declaration"),
306                );
307            }
308        }
309        ValueRef::Body { path: p } => match &ep.body {
310            Some(BodySpec::String { .. }) => diags.push(Diag::error(
311                "string body cannot be passed as argv",
312                span.clone(),
313                "use stdin (e.g. `$ | command`)",
314            )),
315            Some(BodySpec::Binary { .. }) if p.is_empty() => {}
316            Some(BodySpec::Binary { .. }) => diags.push(Diag::error(
317                "binary body fields cannot be passed as argv",
318                span.clone(),
319                "binary bodies do not have named fields",
320            )),
321            Some(BodySpec::Json { schema: None, .. }) => diags.push(Diag::error(
322                "untyped JSON body cannot be passed as argv",
323                span.clone(),
324                "declare a JSON schema with safe types, or use stdin",
325            )),
326            Some(BodySpec::Json {
327                schema: Some(schema),
328                ..
329            }) if !p.is_empty() => {
330                if let Some(field) = schema.fields.iter().find(|f| f.name == p[0]) {
331                    let inner = match &field.ty {
332                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
333                    };
334                    argv_unsafe_named(inner, "JSON field", &field.name, span, diags);
335                }
336            }
337            Some(BodySpec::Form { fields, .. }) if !p.is_empty() => {
338                if let Some(field) = fields.iter().find(|f| f.name == p[0]) {
339                    argv_unsafe_named(&field.ty, "form field", &field.name, span, diags);
340                }
341            }
342            _ => {}
343        },
344    }
345}
346
347/// Emit an argv-context error for a named field whose declared type is
348/// unconstrained (`string` or `json`). Other types are safe.
349fn argv_unsafe_named(ty: &TypeExpr, kind: &str, name: &str, span: &Span, diags: &mut Vec<Diag>) {
350    if matches!(ty, TypeExpr::String | TypeExpr::Json) {
351        diags.push(
352            Diag::error(
353                format!(
354                    "{} `{}` of type `{}` cannot be passed as argv",
355                    kind,
356                    name,
357                    ty.name()
358                ),
359                span.clone(),
360                "use a constrained type (regex, union, int, ...) or pipe via stdin",
361            )
362            .with_note("`string`/`json` are reserved for stdin to avoid command injection"),
363        );
364    }
365}
366
367fn check_executable_token(t: &ExecToken, diags: &mut Vec<Diag>) {
368    if token_contains_interpolation(t) {
369        diags.push(Diag::error(
370            "command executable cannot be interpolated",
371            token_span(t),
372            "make the program name a literal in the spec",
373        ));
374    }
375}
376
377fn token_contains_interpolation(t: &ExecToken) -> bool {
378    match t {
379        ExecToken::Text { parts, .. } => parts.iter().any(|p| matches!(p, TextPart::Interp(_))),
380        ExecToken::Group { pieces, .. } => pieces
381            .iter()
382            .flat_map(|piece| piece.parts.iter())
383            .any(|p| matches!(p, TextPart::Interp(_))),
384    }
385}
386
387fn token_span(t: &ExecToken) -> Span {
388    match t {
389        ExecToken::Text { span, .. } | ExecToken::Group { span, .. } => span.clone(),
390    }
391}
392
393fn check_token(t: &ExecToken, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
394    let parts_iter: Box<dyn Iterator<Item = (&Span, &Vec<TextPart>)>> = match t {
395        ExecToken::Text { parts, span, .. } => Box::new(std::iter::once((span, parts))),
396        ExecToken::Group { pieces, span } => Box::new(pieces.iter().map(move |p| (span, &p.parts))),
397    };
398    for (span, parts) in parts_iter {
399        for p in parts {
400            match p {
401                TextPart::Interp(r) => {
402                    check_ref(r, span, scope, diags);
403                    check_argv_safety(r, span, scope.ep, diags);
404                }
405                TextPart::Literal(s) => check_bare_reference_literals(s, span, scope, diags),
406            }
407        }
408    }
409}
410
411fn check_bare_reference_literals(
412    text: &str,
413    span: &Span,
414    scope: &RefScope<'_>,
415    diags: &mut Vec<Diag>,
416) {
417    for bare in bare_reference_candidates(text, scope) {
418        diags.push(
419            Diag::warning(
420                format!("bare Exec reference `{}` is not interpolated", bare),
421                span.clone(),
422                "wrap shell pieces in `[...]`, or use `{...}` inside a quoted string",
423            )
424            .with_note(format!(
425                "write `[{}]` for a shell piece, or escape it as `\\{}` for literal text",
426                bare, bare
427            )),
428        );
429    }
430}
431
432fn bare_reference_candidates(text: &str, scope: &RefScope<'_>) -> Vec<String> {
433    let mut out = Vec::new();
434    for (idx, ch) in text.char_indices() {
435        if is_escaped(text, idx) {
436            continue;
437        }
438        match ch {
439            '%' | ':' | '^' | '@' => {
440                let rest = &text[idx + ch.len_utf8()..];
441                let Some((name, _)) = take_ident(rest) else {
442                    continue;
443                };
444                if bare_named_ref_exists(ch, name, scope) {
445                    out.push(format!("{}{}", ch, name));
446                }
447            }
448            '$' => {
449                let rest = &text[idx + 1..];
450                if let Some(path) = rest.strip_prefix('.') {
451                    let Some((parts, _)) = take_body_path(path) else {
452                        continue;
453                    };
454                    if body_ref_exists(&parts, scope.ep) {
455                        out.push(format!("$.{}", parts.join(".")));
456                    }
457                } else if rest.is_empty() && scope.ep.body.is_some() {
458                    out.push("$".into());
459                }
460            }
461            _ => {}
462        }
463    }
464    out
465}
466
467fn bare_named_ref_exists(sigil: char, name: &str, scope: &RefScope<'_>) -> bool {
468    match sigil {
469        '%' => scope.query.contains(name),
470        ':' => scope.path.contains(name),
471        '^' => scope.headers.contains(name),
472        '@' => scope.vars.contains(name),
473        _ => false,
474    }
475}
476
477fn body_ref_exists(path: &[String], ep: &Endpoint) -> bool {
478    match &ep.body {
479        Some(BodySpec::Json {
480            schema: Some(schema),
481            ..
482        }) => path
483            .first()
484            .is_some_and(|head| schema.fields.iter().any(|f| &f.name == head)),
485        Some(BodySpec::Form { fields, .. }) if path.len() == 1 => {
486            fields.iter().any(|f| f.name == path[0])
487        }
488        Some(BodySpec::Json { schema: None, .. }) => true,
489        _ => false,
490    }
491}
492
493fn take_body_path(mut rest: &str) -> Option<(Vec<String>, &str)> {
494    let mut parts = Vec::new();
495    loop {
496        let (part, after) = take_ident(rest)?;
497        parts.push(part.to_string());
498        rest = after;
499        let Some(after_dot) = rest.strip_prefix('.') else {
500            break;
501        };
502        rest = after_dot;
503    }
504    Some((parts, rest))
505}
506
507fn take_ident(rest: &str) -> Option<(&str, &str)> {
508    let end = rest
509        .char_indices()
510        .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
511        .map(|(idx, c)| idx + c.len_utf8())
512        .last()?;
513    Some(rest.split_at(end))
514}
515
516fn is_escaped(text: &str, idx: usize) -> bool {
517    let mut count = 0;
518    for ch in text[..idx].chars().rev() {
519        if ch == '\\' {
520            count += 1;
521        } else {
522            break;
523        }
524    }
525    count % 2 == 1
526}