Skip to main content

mii_http/
check.rs

1//! Semantic validation of a parsed Spec.
2
3use crate::diag::Diag;
4use crate::spec::*;
5use std::collections::HashSet;
6
7pub fn check(spec: &Spec) -> Vec<Diag> {
8    tracing::debug!(endpoints = spec.endpoints.len(), "check::check");
9    let mut diags = Vec::new();
10    check_setup(&spec.setup, &mut diags);
11    let mut seen: HashSet<(Method, String)> = HashSet::new();
12    for ep in &spec.endpoints {
13        check_endpoint(ep, &spec.setup, &mut diags);
14        let key = (ep.method, normalize_path(&ep.path));
15        if !seen.insert(key) {
16            diags.push(Diag::warning(
17                format!("duplicate endpoint {} {}", ep.method.as_str(), ep.path),
18                ep.span.clone(),
19                "this overrides another endpoint with the same method+path",
20            ));
21        }
22    }
23    diags
24}
25
26fn normalize_path(path: &str) -> String {
27    // collapse parameter type annotations for collision detection
28    path.split('/')
29        .map(|seg| {
30            if let Some(rest) = seg.strip_prefix(':') {
31                format!(":{}", rest.split(':').next().unwrap_or(""))
32            } else {
33                seg.to_string()
34            }
35        })
36        .collect::<Vec<_>>()
37        .join("/")
38}
39
40fn check_setup(setup: &Setup, diags: &mut Vec<Diag>) {
41    if let Some(AuthSpec::BearerHeader { header, span }) = &setup.auth {
42        if header.is_empty() {
43            diags.push(Diag::error(
44                "AUTH header name is empty",
45                span.clone(),
46                "specify a header name",
47            ));
48        }
49        if setup.jwt_verifier.is_none() && setup.token_secret.is_none() {
50            diags.push(
51                Diag::warning(
52                    "AUTH Bearer configured without JWT_VERIFIER or TOKEN_SECRET",
53                    span.clone(),
54                    "tokens cannot be validated; any value will be accepted",
55                )
56                .with_note("add `JWT_VERIFIER [ENV ...]` or `TOKEN_SECRET [ENV ...]`"),
57            );
58        }
59    }
60}
61
62fn check_endpoint(ep: &Endpoint, _setup: &Setup, diags: &mut Vec<Diag>) {
63    // unique names within scope
64    check_unique(&ep.query_params, "query parameter", diags);
65    check_unique(&ep.headers, "header", diags);
66    let var_names: HashSet<&str> = ep.vars.iter().map(|v| v.name.as_str()).collect();
67    if var_names.len() != ep.vars.len() {
68        diags.push(Diag::error(
69            "duplicate VAR name",
70            ep.span.clone(),
71            "VAR names must be unique within an endpoint",
72        ));
73    }
74
75    // path params
76    let path_params: HashSet<&str> = ep
77        .path_segments
78        .iter()
79        .filter_map(|s| match s {
80            PathSegment::Param { name, .. } => Some(name.as_str()),
81            _ => None,
82        })
83        .collect();
84
85    // body schema validation
86    if let Some(body) = &ep.body {
87        match body {
88            BodySpec::Form { fields, .. } => {
89                check_unique(fields, "form field", diags);
90                for f in fields {
91                    forbid_stdin_only_in_field(&f.ty, &f.name, "form field", diags, &f.span);
92                }
93            }
94            BodySpec::Json {
95                schema: Some(schema),
96                ..
97            } => {
98                let mut names = HashSet::new();
99                for f in &schema.fields {
100                    if !names.insert(f.name.clone()) {
101                        diags.push(Diag::error(
102                            format!("duplicate JSON field `{}`", f.name),
103                            f.span.clone(),
104                            "JSON field names must be unique",
105                        ));
106                    }
107                    let inner_ty = match &f.ty {
108                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
109                    };
110                    forbid_stdin_only_in_field(inner_ty, &f.name, "JSON field", diags, &f.span);
111                    security_check_type(inner_ty, &f.span, diags);
112                }
113            }
114            _ => {}
115        }
116    }
117
118    for q in &ep.query_params {
119        forbid_stdin_only_in_field(&q.ty, &q.name, "query parameter", diags, &q.span);
120        security_check_type(&q.ty, &q.span, diags);
121    }
122    for h in &ep.headers {
123        forbid_stdin_only_in_field(&h.ty, &h.name, "header", diags, &h.span);
124        security_check_type(&h.ty, &h.span, diags);
125    }
126    for seg in &ep.path_segments {
127        if let PathSegment::Param { name, ty, span } = seg {
128            forbid_stdin_only_in_field(ty, name, "path parameter", diags, span);
129            security_check_type(ty, span, diags);
130        }
131    }
132
133    // exec references resolve
134    let scope = RefScope {
135        query: ep.query_params.iter().map(|f| f.name.as_str()).collect(),
136        headers: ep.headers.iter().map(|f| f.name.as_str()).collect(),
137        path: path_params,
138        vars: var_names,
139        ep,
140    };
141    for stage in ep.exec.all_stages() {
142        match stage {
143            ExecStage::Source { reference, span } => {
144                check_ref(reference, span, &scope, diags);
145            }
146            ExecStage::Command { tokens, .. } => {
147                if let Some(first) = tokens.first() {
148                    check_executable_token(first, diags);
149                }
150                for t in tokens {
151                    check_token(t, &scope, diags);
152                }
153            }
154        }
155    }
156
157    // GET should not have a BODY
158    if ep.method == Method::Get && ep.body.is_some() {
159        diags.push(Diag::warning(
160            "GET endpoint declares a BODY",
161            ep.span.clone(),
162            "request bodies on GET requests are unusual",
163        ));
164    }
165}
166
167fn check_unique(fields: &[NamedField], kind: &str, diags: &mut Vec<Diag>) {
168    let mut seen = HashSet::new();
169    for f in fields {
170        if !seen.insert(f.name.clone()) {
171            diags.push(Diag::error(
172                format!("duplicate {} `{}`", kind, f.name),
173                f.span.clone(),
174                "names must be unique",
175            ));
176        }
177    }
178}
179
180fn forbid_stdin_only_in_field(
181    ty: &TypeExpr,
182    name: &str,
183    kind: &str,
184    diags: &mut Vec<Diag>,
185    span: &Span,
186) {
187    // `string` and `json` (untyped) are allowed to be *declared* on any
188    // field; they are reserved for stdin use, which is enforced at argv
189    // construction time by `check_argv_safety`. `binary` is allowed only on
190    // top-level BODY or as a FORM field (where it is materialized to a
191    // temp file path when used as argv).
192    if matches!(ty, TypeExpr::Binary) && kind != "form field" {
193        diags.push(Diag::error(
194            format!("`binary` type only allowed as BODY or FORM field for {} `{}`", kind, name),
195            span.clone(),
196            "binary is allowed only on top-level BODY or inside `BODY form { ... }`",
197        ));
198    }
199}
200
201fn security_check_type(ty: &TypeExpr, span: &Span, diags: &mut Vec<Diag>) {
202    if let TypeExpr::Regex { pattern, .. } = ty {
203        let suspicious = matches!(
204            pattern.as_str(),
205            ".*" | ".+" | "(.*)" | "(.+)" | "[\\s\\S]*"
206        );
207        if suspicious {
208            diags.push(Diag::warning(
209                format!("permissive regex `/{}/` accepts almost any input", pattern),
210                span.clone(),
211                "consider restricting the pattern",
212            ));
213        }
214        // also check for unanchored .* style
215        if pattern.contains(".*") || pattern.contains(".+") {
216            diags.push(Diag::warning(
217                "regex contains `.*`/`.+` which can match command-injection payloads",
218                span.clone(),
219                "constrain to expected character class (e.g. /[a-zA-Z0-9_-]+/)",
220            ));
221        }
222    }
223}
224
225/// All names declared on an endpoint that an Exec reference can resolve to,
226/// plus a back-pointer to the endpoint for body-schema lookups.
227struct RefScope<'a> {
228    query: HashSet<&'a str>,
229    headers: HashSet<&'a str>,
230    path: HashSet<&'a str>,
231    vars: HashSet<&'a str>,
232    ep: &'a Endpoint,
233}
234
235/// Verify that a `ValueRef` resolves to something declared on the endpoint.
236/// Argv-context safety (forbidding unconstrained types as command arguments)
237/// is handled separately by [`check_argv_safety`].
238fn check_ref(r: &ValueRef, span: &Span, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
239    let ep = scope.ep;
240    let ok = match r {
241        ValueRef::Query(n) => scope.query.contains(n.as_str()),
242        ValueRef::Header(n) => scope.headers.contains(n.as_str()),
243        ValueRef::Path(n) => scope.path.contains(n.as_str()),
244        ValueRef::Var(n) => scope.vars.contains(n.as_str()),
245        ValueRef::Body { path: p } => match (&ep.body, p.is_empty()) {
246            (
247                Some(BodySpec::Json {
248                    schema: Some(schema),
249                    ..
250                }),
251                false,
252            ) => {
253                let head = &p[0];
254                schema.fields.iter().any(|f| &f.name == head)
255            }
256            (Some(BodySpec::Form { fields, .. }), false) if p.len() == 1 => {
257                fields.iter().any(|f| f.name == p[0])
258            }
259            (Some(_), true) => true,
260            (Some(BodySpec::Json { schema: None, .. }), false) => true,
261            _ => false,
262        },
263    };
264    if !ok {
265        diags.push(Diag::error(
266            format!("unresolved reference: {}", r.describe()),
267            span.clone(),
268            "no such field declared on this endpoint",
269        ));
270    }
271}
272
273/// Reject references that, used as a command argv token, would expose
274/// unconstrained user input directly to the command line.
275fn check_argv_safety(r: &ValueRef, span: &Span, ep: &Endpoint, diags: &mut Vec<Diag>) {
276    match r {
277        ValueRef::Query(name) => {
278            if let Some(f) = ep.query_params.iter().find(|f| &f.name == name) {
279                argv_unsafe_named(&f.ty, "query parameter", &f.name, span, diags);
280            }
281        }
282        ValueRef::Header(name) => {
283            if let Some(f) = ep.headers.iter().find(|f| &f.name == name) {
284                argv_unsafe_named(&f.ty, "header", &f.name, span, diags);
285            }
286        }
287        ValueRef::Path(name) => {
288            for seg in &ep.path_segments {
289                if let PathSegment::Param { name: n, ty, .. } = seg
290                    && n == name
291                {
292                    argv_unsafe_named(ty, "path parameter", n, span, diags);
293                }
294            }
295        }
296        ValueRef::Var(name) => {
297            if let Some(v) = ep.vars.iter().find(|v| &v.name == name)
298                && matches!(v.source, ValueSource::Header { .. })
299            {
300                diags.push(
301                    Diag::error(
302                        format!("VAR `{}` from a request header cannot be passed as argv", name),
303                        span.clone(),
304                        "declare a typed HEADER and reference it directly, or pipe the VAR via stdin",
305                    )
306                    .with_note("header-backed VAR values are request input and have no type declaration"),
307                );
308            }
309        }
310        ValueRef::Body { path: p } => match &ep.body {
311            Some(BodySpec::String { .. }) => diags.push(Diag::error(
312                "string body cannot be passed as argv",
313                span.clone(),
314                "use stdin (e.g. `$ | command`)",
315            )),
316            Some(BodySpec::Binary { .. }) if p.is_empty() => {}
317            Some(BodySpec::Binary { .. }) => diags.push(Diag::error(
318                "binary body fields cannot be passed as argv",
319                span.clone(),
320                "binary bodies do not have named fields",
321            )),
322            Some(BodySpec::Json { schema: None, .. }) => diags.push(Diag::error(
323                "untyped JSON body cannot be passed as argv",
324                span.clone(),
325                "declare a JSON schema with safe types, or use stdin",
326            )),
327            Some(BodySpec::Json {
328                schema: Some(schema),
329                ..
330            }) if !p.is_empty() => {
331                if let Some(field) = schema.fields.iter().find(|f| f.name == p[0]) {
332                    let inner = match &field.ty {
333                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
334                    };
335                    argv_unsafe_named(inner, "JSON field", &field.name, span, diags);
336                }
337            }
338            Some(BodySpec::Form { fields, .. }) if !p.is_empty() => {
339                if let Some(field) = fields.iter().find(|f| f.name == p[0]) {
340                    argv_unsafe_named(&field.ty, "form field", &field.name, span, diags);
341                }
342            }
343            _ => {}
344        },
345    }
346}
347
348/// Emit an argv-context error for a named field whose declared type is
349/// unconstrained (`string` or `json`). Other types are safe.
350fn argv_unsafe_named(ty: &TypeExpr, kind: &str, name: &str, span: &Span, diags: &mut Vec<Diag>) {
351    if matches!(ty, TypeExpr::String | TypeExpr::Json) {
352        diags.push(
353            Diag::error(
354                format!(
355                    "{} `{}` of type `{}` cannot be passed as argv",
356                    kind,
357                    name,
358                    ty.name()
359                ),
360                span.clone(),
361                "use a constrained type (regex, union, int, ...) or pipe via stdin",
362            )
363            .with_note("`string`/`json` are reserved for stdin to avoid command injection"),
364        );
365    }
366}
367
368fn check_executable_token(t: &ExecToken, diags: &mut Vec<Diag>) {
369    if token_contains_interpolation(t) {
370        diags.push(Diag::error(
371            "command executable cannot be interpolated",
372            token_span(t),
373            "make the program name a literal in the spec",
374        ));
375    }
376}
377
378fn token_contains_interpolation(t: &ExecToken) -> bool {
379    match t {
380        ExecToken::Text { parts, .. } => parts.iter().any(|p| matches!(p, TextPart::Interp(_))),
381        ExecToken::Group { pieces, .. } => pieces
382            .iter()
383            .flat_map(|piece| piece.parts.iter())
384            .any(|p| matches!(p, TextPart::Interp(_))),
385    }
386}
387
388fn token_span(t: &ExecToken) -> Span {
389    match t {
390        ExecToken::Text { span, .. } | ExecToken::Group { span, .. } => span.clone(),
391    }
392}
393
394fn check_token(t: &ExecToken, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
395    let parts_iter: Box<dyn Iterator<Item = (&Span, &Vec<TextPart>)>> = match t {
396        ExecToken::Text { parts, span, .. } => Box::new(std::iter::once((span, parts))),
397        ExecToken::Group { pieces, span } => Box::new(pieces.iter().map(move |p| (span, &p.parts))),
398    };
399    for (span, parts) in parts_iter {
400        for p in parts {
401            match p {
402                TextPart::Interp(r) => {
403                    check_ref(r, span, scope, diags);
404                    check_argv_safety(r, span, scope.ep, diags);
405                }
406                TextPart::Literal(s) => check_bare_reference_literals(s, span, scope, diags),
407            }
408        }
409    }
410}
411
412fn check_bare_reference_literals(
413    text: &str,
414    span: &Span,
415    scope: &RefScope<'_>,
416    diags: &mut Vec<Diag>,
417) {
418    for bare in bare_reference_candidates(text, scope) {
419        diags.push(
420            Diag::warning(
421                format!("bare Exec reference `{}` is not interpolated", bare),
422                span.clone(),
423                "wrap shell pieces in `[...]`, or use `{...}` inside a quoted string",
424            )
425            .with_note(format!(
426                "write `[{}]` for a shell piece, or escape it as `\\{}` for literal text",
427                bare, bare
428            )),
429        );
430    }
431}
432
433fn bare_reference_candidates(text: &str, scope: &RefScope<'_>) -> Vec<String> {
434    let mut out = Vec::new();
435    for (idx, ch) in text.char_indices() {
436        if is_escaped(text, idx) {
437            continue;
438        }
439        match ch {
440            '%' | ':' | '^' | '@' => {
441                let rest = &text[idx + ch.len_utf8()..];
442                let Some((name, _)) = take_ident(rest) else {
443                    continue;
444                };
445                if bare_named_ref_exists(ch, name, scope) {
446                    out.push(format!("{}{}", ch, name));
447                }
448            }
449            '$' => {
450                let rest = &text[idx + 1..];
451                if let Some(path) = rest.strip_prefix('.') {
452                    let Some((parts, _)) = take_body_path(path) else {
453                        continue;
454                    };
455                    if body_ref_exists(&parts, scope.ep) {
456                        out.push(format!("$.{}", parts.join(".")));
457                    }
458                } else if rest.is_empty() && scope.ep.body.is_some() {
459                    out.push("$".into());
460                }
461            }
462            _ => {}
463        }
464    }
465    out
466}
467
468fn bare_named_ref_exists(sigil: char, name: &str, scope: &RefScope<'_>) -> bool {
469    match sigil {
470        '%' => scope.query.contains(name),
471        ':' => scope.path.contains(name),
472        '^' => scope.headers.contains(name),
473        '@' => scope.vars.contains(name),
474        _ => false,
475    }
476}
477
478fn body_ref_exists(path: &[String], ep: &Endpoint) -> bool {
479    match &ep.body {
480        Some(BodySpec::Json {
481            schema: Some(schema),
482            ..
483        }) => path
484            .first()
485            .is_some_and(|head| schema.fields.iter().any(|f| &f.name == head)),
486        Some(BodySpec::Form { fields, .. }) if path.len() == 1 => {
487            fields.iter().any(|f| f.name == path[0])
488        }
489        Some(BodySpec::Json { schema: None, .. }) => true,
490        _ => false,
491    }
492}
493
494fn take_body_path(mut rest: &str) -> Option<(Vec<String>, &str)> {
495    let mut parts = Vec::new();
496    loop {
497        let (part, after) = take_ident(rest)?;
498        parts.push(part.to_string());
499        rest = after;
500        let Some(after_dot) = rest.strip_prefix('.') else {
501            break;
502        };
503        rest = after_dot;
504    }
505    Some((parts, rest))
506}
507
508fn take_ident(rest: &str) -> Option<(&str, &str)> {
509    let end = rest
510        .char_indices()
511        .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
512        .map(|(idx, c)| idx + c.len_utf8())
513        .last()?;
514    Some(rest.split_at(end))
515}
516
517fn is_escaped(text: &str, idx: usize) -> bool {
518    let mut count = 0;
519    for ch in text[..idx].chars().rev() {
520        if ch == '\\' {
521            count += 1;
522        } else {
523            break;
524        }
525    }
526    count % 2 == 1
527}