Skip to main content

mii_http/
check.rs

1//! Semantic validation of a parsed Spec.
2
3use crate::diag::Diag;
4use crate::spec::*;
5use std::collections::HashSet;
6
7pub fn check(spec: &Spec) -> Vec<Diag> {
8    tracing::debug!(endpoints = spec.endpoints.len(), "check::check");
9    let mut diags = Vec::new();
10    check_setup(&spec.setup, &mut diags);
11    let mut seen: HashSet<(Method, String)> = HashSet::new();
12    for ep in &spec.endpoints {
13        check_endpoint(ep, &spec.setup, &mut diags);
14        let key = (ep.method, normalize_path(&ep.path));
15        if !seen.insert(key) {
16            diags.push(Diag::warning(
17                format!("duplicate endpoint {} {}", ep.method.as_str(), ep.path),
18                ep.span.clone(),
19                "this overrides another endpoint with the same method+path",
20            ));
21        }
22    }
23    diags
24}
25
26fn normalize_path(path: &str) -> String {
27    // collapse parameter type annotations for collision detection
28    path.split('/')
29        .map(|seg| {
30            if let Some(rest) = seg.strip_prefix(':') {
31                format!(":{}", rest.split(':').next().unwrap_or(""))
32            } else {
33                seg.to_string()
34            }
35        })
36        .collect::<Vec<_>>()
37        .join("/")
38}
39
40fn check_setup(setup: &Setup, diags: &mut Vec<Diag>) {
41    if let Some(AuthSpec::BearerHeader { header, span }) = &setup.auth {
42        if header.is_empty() {
43            diags.push(Diag::error(
44                "AUTH header name is empty",
45                span.clone(),
46                "specify a header name",
47            ));
48        }
49        if setup.jwt_verifier.is_none() && setup.token_secret.is_none() {
50            diags.push(Diag::warning(
51                "AUTH Bearer configured without JWT_VERIFIER or TOKEN_SECRET",
52                span.clone(),
53                "tokens cannot be validated; any value will be accepted",
54            ).with_note("add `JWT_VERIFIER [ENV ...]` or `TOKEN_SECRET [ENV ...]`"));
55        }
56    }
57}
58
59fn check_endpoint(ep: &Endpoint, _setup: &Setup, diags: &mut Vec<Diag>) {
60    // unique names within scope
61    check_unique(&ep.query_params, "query parameter", diags);
62    check_unique(&ep.headers, "header", diags);
63    let var_names: HashSet<&str> = ep.vars.iter().map(|v| v.name.as_str()).collect();
64    if var_names.len() != ep.vars.len() {
65        diags.push(Diag::error(
66            "duplicate VAR name",
67            ep.span.clone(),
68            "VAR names must be unique within an endpoint",
69        ));
70    }
71
72    // path params
73    let path_params: HashSet<&str> = ep
74        .path_segments
75        .iter()
76        .filter_map(|s| match s {
77            PathSegment::Param { name, .. } => Some(name.as_str()),
78            _ => None,
79        })
80        .collect();
81
82    // body schema validation
83    if let Some(body) = &ep.body {
84        match body {
85            BodySpec::Form { fields, .. } => {
86                check_unique(fields, "form field", diags);
87                for f in fields {
88                    forbid_stdin_only_in_field(&f.ty, &f.name, "form field", diags, &f.span);
89                }
90            }
91            BodySpec::Json {
92                schema: Some(schema),
93                ..
94            } => {
95                let mut names = HashSet::new();
96                for f in &schema.fields {
97                    if !names.insert(f.name.clone()) {
98                        diags.push(Diag::error(
99                            format!("duplicate JSON field `{}`", f.name),
100                            f.span.clone(),
101                            "JSON field names must be unique",
102                        ));
103                    }
104                    let inner_ty = match &f.ty {
105                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
106                    };
107                    forbid_stdin_only_in_field(inner_ty, &f.name, "JSON field", diags, &f.span);
108                    security_check_type(inner_ty, &f.span, diags);
109                }
110            }
111            _ => {}
112        }
113    }
114
115    for q in &ep.query_params {
116        forbid_stdin_only_in_field(&q.ty, &q.name, "query parameter", diags, &q.span);
117        security_check_type(&q.ty, &q.span, diags);
118    }
119    for h in &ep.headers {
120        forbid_stdin_only_in_field(&h.ty, &h.name, "header", diags, &h.span);
121        security_check_type(&h.ty, &h.span, diags);
122    }
123    for seg in &ep.path_segments {
124        if let PathSegment::Param { name, ty, span } = seg {
125            forbid_stdin_only_in_field(ty, name, "path parameter", diags, span);
126            security_check_type(ty, span, diags);
127        }
128    }
129
130    // exec references resolve
131    let scope = RefScope {
132        query: ep.query_params.iter().map(|f| f.name.as_str()).collect(),
133        headers: ep.headers.iter().map(|f| f.name.as_str()).collect(),
134        path: path_params,
135        vars: var_names,
136        ep,
137    };
138    for stage in &ep.exec.pipeline {
139        match stage {
140            ExecStage::Source { reference, span } => {
141                check_ref(reference, span, &scope, diags);
142            }
143            ExecStage::Command { tokens, .. } => {
144                for t in tokens {
145                    check_token(t, &scope, diags);
146                }
147            }
148        }
149    }
150
151    // GET should not have a BODY
152    if ep.method == Method::Get && ep.body.is_some() {
153        diags.push(Diag::warning(
154            "GET endpoint declares a BODY",
155            ep.span.clone(),
156            "request bodies on GET requests are unusual",
157        ));
158    }
159}
160
161fn check_unique(fields: &[NamedField], kind: &str, diags: &mut Vec<Diag>) {
162    let mut seen = HashSet::new();
163    for f in fields {
164        if !seen.insert(f.name.clone()) {
165            diags.push(Diag::error(
166                format!("duplicate {} `{}`", kind, f.name),
167                f.span.clone(),
168                "names must be unique",
169            ));
170        }
171    }
172}
173
174fn forbid_stdin_only_in_field(
175    ty: &TypeExpr,
176    name: &str,
177    kind: &str,
178    diags: &mut Vec<Diag>,
179    span: &Span,
180) {
181    match ty {
182        TypeExpr::String => diags.push(Diag::error(
183            format!("`string` type not allowed for {} `{}`", kind, name),
184            span.clone(),
185            "use a regex, union, or another constrained type",
186        ).with_note("`string` is reserved for stdin to avoid command injection")),
187        TypeExpr::Json => diags.push(Diag::error(
188            format!("`json` type not allowed for {} `{}`", kind, name),
189            span.clone(),
190            "use a typed json schema instead",
191        )),
192        TypeExpr::Binary => diags.push(Diag::error(
193            format!("`binary` type only allowed as BODY for {} `{}`", kind, name),
194            span.clone(),
195            "binary is allowed only on top-level BODY",
196        )),
197        _ => {}
198    }
199}
200
201fn security_check_type(ty: &TypeExpr, span: &Span, diags: &mut Vec<Diag>) {
202    if let TypeExpr::Regex { pattern, .. } = ty {
203        let suspicious = matches!(pattern.as_str(), ".*" | ".+" | "(.*)" | "(.+)" | "[\\s\\S]*");
204        if suspicious {
205            diags.push(Diag::warning(
206                format!("permissive regex `/{}/` accepts almost any input", pattern),
207                span.clone(),
208                "consider restricting the pattern",
209            ));
210        }
211        // also check for unanchored .* style
212        if pattern.contains(".*") || pattern.contains(".+") {
213            diags.push(Diag::warning(
214                "regex contains `.*`/`.+` which can match command-injection payloads",
215                span.clone(),
216                "constrain to expected character class (e.g. /[a-zA-Z0-9_-]+/)",
217            ));
218        }
219    }
220}
221
222/// All names declared on an endpoint that an Exec reference can resolve to,
223/// plus a back-pointer to the endpoint for body-schema lookups.
224struct RefScope<'a> {
225    query: HashSet<&'a str>,
226    headers: HashSet<&'a str>,
227    path: HashSet<&'a str>,
228    vars: HashSet<&'a str>,
229    ep: &'a Endpoint,
230}
231
232/// Verify that a `ValueRef` resolves to something declared on the endpoint.
233/// Argv-context safety (forbidding unconstrained types as command arguments)
234/// is handled separately by [`check_argv_safety`].
235fn check_ref(r: &ValueRef, span: &Span, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
236    let ep = scope.ep;
237    let ok = match r {
238        ValueRef::Query(n) => scope.query.contains(n.as_str()),
239        ValueRef::Header(n) => scope.headers.contains(n.as_str()),
240        ValueRef::Path(n) => scope.path.contains(n.as_str()),
241        ValueRef::Var(n) => scope.vars.contains(n.as_str()),
242        ValueRef::Body { path: p } => match (&ep.body, p.is_empty()) {
243            (Some(BodySpec::Json { schema: Some(schema), .. }), false) => {
244                let head = &p[0];
245                schema.fields.iter().any(|f| &f.name == head)
246            }
247            (Some(BodySpec::Form { fields, .. }), false) if p.len() == 1 => {
248                fields.iter().any(|f| &f.name == &p[0])
249            }
250            (Some(_), true) => true,
251            (Some(BodySpec::Json { schema: None, .. }), false) => true,
252            _ => false,
253        },
254    };
255    if !ok {
256        diags.push(Diag::error(
257            format!("unresolved reference: {}", r.describe()),
258            span.clone(),
259            "no such field declared on this endpoint",
260        ));
261    }
262}
263
264/// Reject references that, used as a command argv token, would expose
265/// unconstrained user input directly to the command line.
266fn check_argv_safety(r: &ValueRef, span: &Span, ep: &Endpoint, diags: &mut Vec<Diag>) {
267    let ValueRef::Body { path: p } = r else {
268        return;
269    };
270    match &ep.body {
271        Some(BodySpec::String { .. }) => diags.push(Diag::error(
272            "string body cannot be passed as argv",
273            span.clone(),
274            "use stdin (e.g. `$ | command`)",
275        )),
276        Some(BodySpec::Binary { .. }) => diags.push(Diag::error(
277            "binary body cannot be passed as argv",
278            span.clone(),
279            "use stdin (e.g. `$ | command`)",
280        )),
281        Some(BodySpec::Json { schema: None, .. }) => diags.push(Diag::error(
282            "untyped JSON body cannot be passed as argv",
283            span.clone(),
284            "declare a JSON schema with safe types, or use stdin",
285        )),
286        Some(BodySpec::Json { schema: Some(schema), .. }) if !p.is_empty() => {
287            if let Some(field) = schema.fields.iter().find(|f| f.name == p[0]) {
288                let inner = match &field.ty {
289                    JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
290                };
291                if matches!(inner, TypeExpr::String | TypeExpr::Json) {
292                    diags.push(Diag::error(
293                        format!(
294                            "body field `{}` of type `{}` cannot be passed as argv",
295                            p.join("."),
296                            inner.name()
297                        ),
298                        span.clone(),
299                        "use a constrained type or stdin",
300                    ));
301                }
302            }
303        }
304        _ => {}
305    }
306}
307
308fn check_token(t: &ExecToken, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
309    let parts_iter: Box<dyn Iterator<Item = (&Span, &Vec<TextPart>)>> = match t {
310        ExecToken::Text { parts, span } => Box::new(std::iter::once((span, parts))),
311        ExecToken::Group { pieces, span } => {
312            Box::new(pieces.iter().map(move |p| (span, &p.parts)))
313        }
314    };
315    for (span, parts) in parts_iter {
316        for p in parts {
317            if let TextPart::Interp(r) = p {
318                check_ref(r, span, scope, diags);
319                check_argv_safety(r, span, scope.ep, diags);
320            }
321        }
322    }
323}