Skip to main content

mii_http/
check.rs

1//! Semantic validation of a parsed Spec.
2
3use crate::diag::Diag;
4use crate::spec::*;
5use std::collections::HashSet;
6
7pub fn check(spec: &Spec) -> Vec<Diag> {
8    tracing::debug!(endpoints = spec.endpoints.len(), "check::check");
9    let mut diags = Vec::new();
10    check_setup(&spec.setup, &mut diags);
11    let mut seen: HashSet<(Method, String)> = HashSet::new();
12    for ep in &spec.endpoints {
13        check_endpoint(ep, &spec.setup, &mut diags);
14        let key = (ep.method, normalize_path(&ep.path));
15        if !seen.insert(key) {
16            diags.push(Diag::warning(
17                format!("duplicate endpoint {} {}", ep.method.as_str(), ep.path),
18                ep.span.clone(),
19                "this overrides another endpoint with the same method+path",
20            ));
21        }
22    }
23    diags
24}
25
26fn normalize_path(path: &str) -> String {
27    // collapse parameter type annotations for collision detection
28    path.split('/')
29        .map(|seg| {
30            if let Some(rest) = seg.strip_prefix(':') {
31                format!(":{}", rest.split(':').next().unwrap_or(""))
32            } else {
33                seg.to_string()
34            }
35        })
36        .collect::<Vec<_>>()
37        .join("/")
38}
39
40fn check_setup(setup: &Setup, diags: &mut Vec<Diag>) {
41    if let Some(AuthSpec::BearerHeader { header, span }) = &setup.auth {
42        if header.is_empty() {
43            diags.push(Diag::error(
44                "AUTH header name is empty",
45                span.clone(),
46                "specify a header name",
47            ));
48        }
49        if setup.jwt_verifier.is_none() && setup.token_secret.is_none() {
50            diags.push(Diag::warning(
51                "AUTH Bearer configured without JWT_VERIFIER or TOKEN_SECRET",
52                span.clone(),
53                "tokens cannot be validated; any value will be accepted",
54            ).with_note("add `JWT_VERIFIER [ENV ...]` or `TOKEN_SECRET [ENV ...]`"));
55        }
56    }
57}
58
59fn check_endpoint(ep: &Endpoint, _setup: &Setup, diags: &mut Vec<Diag>) {
60    // unique names within scope
61    check_unique(&ep.query_params, "query parameter", diags);
62    check_unique(&ep.headers, "header", diags);
63    let var_names: HashSet<&str> = ep.vars.iter().map(|v| v.name.as_str()).collect();
64    if var_names.len() != ep.vars.len() {
65        diags.push(Diag::error(
66            "duplicate VAR name",
67            ep.span.clone(),
68            "VAR names must be unique within an endpoint",
69        ));
70    }
71
72    // path params
73    let path_params: HashSet<&str> = ep
74        .path_segments
75        .iter()
76        .filter_map(|s| match s {
77            PathSegment::Param { name, .. } => Some(name.as_str()),
78            _ => None,
79        })
80        .collect();
81
82    // body schema validation
83    if let Some(body) = &ep.body {
84        match body {
85            BodySpec::Form { fields, .. } => {
86                check_unique(fields, "form field", diags);
87                for f in fields {
88                    forbid_stdin_only_in_field(&f.ty, &f.name, "form field", diags, &f.span);
89                }
90            }
91            BodySpec::Json {
92                schema: Some(schema),
93                ..
94            } => {
95                let mut names = HashSet::new();
96                for f in &schema.fields {
97                    if !names.insert(f.name.clone()) {
98                        diags.push(Diag::error(
99                            format!("duplicate JSON field `{}`", f.name),
100                            f.span.clone(),
101                            "JSON field names must be unique",
102                        ));
103                    }
104                    let inner_ty = match &f.ty {
105                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
106                    };
107                    forbid_stdin_only_in_field(inner_ty, &f.name, "JSON field", diags, &f.span);
108                    security_check_type(inner_ty, &f.span, diags);
109                }
110            }
111            _ => {}
112        }
113    }
114
115    for q in &ep.query_params {
116        forbid_stdin_only_in_field(&q.ty, &q.name, "query parameter", diags, &q.span);
117        security_check_type(&q.ty, &q.span, diags);
118    }
119    for h in &ep.headers {
120        forbid_stdin_only_in_field(&h.ty, &h.name, "header", diags, &h.span);
121        security_check_type(&h.ty, &h.span, diags);
122    }
123    for seg in &ep.path_segments {
124        if let PathSegment::Param { name, ty, span } = seg {
125            forbid_stdin_only_in_field(ty, name, "path parameter", diags, span);
126            security_check_type(ty, span, diags);
127        }
128    }
129
130    // exec references resolve
131    let scope = RefScope {
132        query: ep.query_params.iter().map(|f| f.name.as_str()).collect(),
133        headers: ep.headers.iter().map(|f| f.name.as_str()).collect(),
134        path: path_params,
135        vars: var_names,
136        ep,
137    };
138    for stage in &ep.exec.pipeline {
139        match stage {
140            ExecStage::Source { reference, span } => {
141                check_ref(reference, span, &scope, diags);
142            }
143            ExecStage::Command { tokens, .. } => {
144                for t in tokens {
145                    check_token(t, &scope, diags);
146                }
147            }
148        }
149    }
150
151    // GET should not have a BODY
152    if ep.method == Method::Get && ep.body.is_some() {
153        diags.push(Diag::warning(
154            "GET endpoint declares a BODY",
155            ep.span.clone(),
156            "request bodies on GET requests are unusual",
157        ));
158    }
159}
160
161fn check_unique(fields: &[NamedField], kind: &str, diags: &mut Vec<Diag>) {
162    let mut seen = HashSet::new();
163    for f in fields {
164        if !seen.insert(f.name.clone()) {
165            diags.push(Diag::error(
166                format!("duplicate {} `{}`", kind, f.name),
167                f.span.clone(),
168                "names must be unique",
169            ));
170        }
171    }
172}
173
174fn forbid_stdin_only_in_field(
175    ty: &TypeExpr,
176    name: &str,
177    kind: &str,
178    diags: &mut Vec<Diag>,
179    span: &Span,
180) {
181    // `string` and `json` (untyped) are allowed to be *declared* on any
182    // field; they are reserved for stdin use, which is enforced at argv
183    // construction time by `check_argv_safety`. Only `binary`, which is
184    // body-only, is rejected at the declaration site.
185    if matches!(ty, TypeExpr::Binary) {
186        diags.push(Diag::error(
187            format!("`binary` type only allowed as BODY for {} `{}`", kind, name),
188            span.clone(),
189            "binary is allowed only on top-level BODY",
190        ));
191    }
192}
193
194fn security_check_type(ty: &TypeExpr, span: &Span, diags: &mut Vec<Diag>) {
195    if let TypeExpr::Regex { pattern, .. } = ty {
196        let suspicious = matches!(pattern.as_str(), ".*" | ".+" | "(.*)" | "(.+)" | "[\\s\\S]*");
197        if suspicious {
198            diags.push(Diag::warning(
199                format!("permissive regex `/{}/` accepts almost any input", pattern),
200                span.clone(),
201                "consider restricting the pattern",
202            ));
203        }
204        // also check for unanchored .* style
205        if pattern.contains(".*") || pattern.contains(".+") {
206            diags.push(Diag::warning(
207                "regex contains `.*`/`.+` which can match command-injection payloads",
208                span.clone(),
209                "constrain to expected character class (e.g. /[a-zA-Z0-9_-]+/)",
210            ));
211        }
212    }
213}
214
215/// All names declared on an endpoint that an Exec reference can resolve to,
216/// plus a back-pointer to the endpoint for body-schema lookups.
217struct RefScope<'a> {
218    query: HashSet<&'a str>,
219    headers: HashSet<&'a str>,
220    path: HashSet<&'a str>,
221    vars: HashSet<&'a str>,
222    ep: &'a Endpoint,
223}
224
225/// Verify that a `ValueRef` resolves to something declared on the endpoint.
226/// Argv-context safety (forbidding unconstrained types as command arguments)
227/// is handled separately by [`check_argv_safety`].
228fn check_ref(r: &ValueRef, span: &Span, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
229    let ep = scope.ep;
230    let ok = match r {
231        ValueRef::Query(n) => scope.query.contains(n.as_str()),
232        ValueRef::Header(n) => scope.headers.contains(n.as_str()),
233        ValueRef::Path(n) => scope.path.contains(n.as_str()),
234        ValueRef::Var(n) => scope.vars.contains(n.as_str()),
235        ValueRef::Body { path: p } => match (&ep.body, p.is_empty()) {
236            (Some(BodySpec::Json { schema: Some(schema), .. }), false) => {
237                let head = &p[0];
238                schema.fields.iter().any(|f| &f.name == head)
239            }
240            (Some(BodySpec::Form { fields, .. }), false) if p.len() == 1 => {
241                fields.iter().any(|f| &f.name == &p[0])
242            }
243            (Some(_), true) => true,
244            (Some(BodySpec::Json { schema: None, .. }), false) => true,
245            _ => false,
246        },
247    };
248    if !ok {
249        diags.push(Diag::error(
250            format!("unresolved reference: {}", r.describe()),
251            span.clone(),
252            "no such field declared on this endpoint",
253        ));
254    }
255}
256
257/// Reject references that, used as a command argv token, would expose
258/// unconstrained user input directly to the command line.
259fn check_argv_safety(r: &ValueRef, span: &Span, ep: &Endpoint, diags: &mut Vec<Diag>) {
260    match r {
261        ValueRef::Query(name) => {
262            if let Some(f) = ep.query_params.iter().find(|f| &f.name == name) {
263                argv_unsafe_named(&f.ty, "query parameter", &f.name, span, diags);
264            }
265        }
266        ValueRef::Header(name) => {
267            if let Some(f) = ep.headers.iter().find(|f| &f.name == name) {
268                argv_unsafe_named(&f.ty, "header", &f.name, span, diags);
269            }
270        }
271        ValueRef::Path(name) => {
272            for seg in &ep.path_segments {
273                if let PathSegment::Param { name: n, ty, .. } = seg {
274                    if n == name {
275                        argv_unsafe_named(ty, "path parameter", n, span, diags);
276                    }
277                }
278            }
279        }
280        // VAR values come from the server's environment / static literals;
281        // they are not user input, so they're allowed in argv.
282        ValueRef::Var(_) => {}
283        ValueRef::Body { path: p } => match &ep.body {
284            Some(BodySpec::String { .. }) => diags.push(Diag::error(
285                "string body cannot be passed as argv",
286                span.clone(),
287                "use stdin (e.g. `$ | command`)",
288            )),
289            Some(BodySpec::Binary { .. }) => diags.push(Diag::error(
290                "binary body cannot be passed as argv",
291                span.clone(),
292                "use stdin (e.g. `$ | command`)",
293            )),
294            Some(BodySpec::Json { schema: None, .. }) => diags.push(Diag::error(
295                "untyped JSON body cannot be passed as argv",
296                span.clone(),
297                "declare a JSON schema with safe types, or use stdin",
298            )),
299            Some(BodySpec::Json { schema: Some(schema), .. }) if !p.is_empty() => {
300                if let Some(field) = schema.fields.iter().find(|f| f.name == p[0]) {
301                    let inner = match &field.ty {
302                        JsonFieldType::Scalar(t) | JsonFieldType::Array(t) => t,
303                    };
304                    argv_unsafe_named(inner, "JSON field", &field.name, span, diags);
305                }
306            }
307            Some(BodySpec::Form { fields, .. }) if !p.is_empty() => {
308                if let Some(field) = fields.iter().find(|f| f.name == p[0]) {
309                    argv_unsafe_named(&field.ty, "form field", &field.name, span, diags);
310                }
311            }
312            _ => {}
313        },
314    }
315}
316
317/// Emit an argv-context error for a named field whose declared type is
318/// unconstrained (`string` or `json`). Other types are safe.
319fn argv_unsafe_named(
320    ty: &TypeExpr,
321    kind: &str,
322    name: &str,
323    span: &Span,
324    diags: &mut Vec<Diag>,
325) {
326    if matches!(ty, TypeExpr::String | TypeExpr::Json) {
327        diags.push(Diag::error(
328            format!(
329                "{} `{}` of type `{}` cannot be passed as argv",
330                kind,
331                name,
332                ty.name()
333            ),
334            span.clone(),
335            "use a constrained type (regex, union, int, ...) or pipe via stdin",
336        ).with_note("`string`/`json` are reserved for stdin to avoid command injection"));
337    }
338}
339
340fn check_token(t: &ExecToken, scope: &RefScope<'_>, diags: &mut Vec<Diag>) {
341    let parts_iter: Box<dyn Iterator<Item = (&Span, &Vec<TextPart>)>> = match t {
342        ExecToken::Text { parts, span } => Box::new(std::iter::once((span, parts))),
343        ExecToken::Group { pieces, span } => {
344            Box::new(pieces.iter().map(move |p| (span, &p.parts)))
345        }
346    };
347    for (span, parts) in parts_iter {
348        for p in parts {
349            if let TextPart::Interp(r) = p {
350                check_ref(r, span, scope, diags);
351                check_argv_safety(r, span, scope.ep, diags);
352            }
353        }
354    }
355}