Skip to main content

zlayer_builder/windows/
deps.rs

1//! Static validation of Windows package-manager usage in parsed Dockerfiles.
2//!
3//! The `nanoserver` Windows base image is intentionally minimal: it ships no
4//! `PowerShell`, no `choco`, no `winget`, and only the bare `cmd.exe` shell.
5//! Users unfamiliar with Windows-container constraints routinely write
6//!
7//! ```dockerfile
8//! FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
9//! RUN choco install nginx -y
10//! ```
11//!
12//! which then fails deep inside the backend with an unhelpful
13//! `'choco' is not recognized as an internal or external command` error. This
14//! module catches that case at parse time and emits an actionable error
15//! pointing users at `servercore` (which has `PowerShell`) or a multi-stage
16//! build where the package install happens in a `servercore` stage and the
17//! artifacts are `COPY --from=...`'d into the final nanoserver stage.
18//!
19//! # Scope (first iteration)
20//!
21//! - Detects `choco` and `winget` used as the effective RUN command, handling:
22//!   - Exec form: `RUN ["choco", "install", "nginx"]`
23//!   - Shell form: `RUN choco install nginx`
24//!   - Via `cmd /c`: `RUN cmd /c choco install nginx`
25//!   - Via `PowerShell`: `RUN powershell -Command "choco install nginx"`
26//! - Flags only when the stage's base image is `nanoserver`. `servercore`
27//!   (which bundles `PowerShell`) and non-Windows bases are skipped.
28//! - Multi-stage Dockerfiles are validated per stage; each stage's own base
29//!   image drives its verdict. A `servercore` builder stage that runs `choco`
30//!   and `COPY --from=builder`s into a final `nanoserver` stage is the
31//!   recommended remediation and passes validation.
32//!
33//! Future iterations may auto-inject the multi-stage rewrite; for now the
34//! validator's job is to detect + error clearly.
35
36use thiserror::Error;
37
38use crate::dockerfile::{Dockerfile, DockerfileFromTarget, Instruction, ShellOrExec};
39
40/// Errors surfaced by the Windows dependency validator.
41#[derive(Debug, Error)]
42pub enum DepsError {
43    /// The stage's base image is `nanoserver` but a `RUN` instruction tries
44    /// to invoke `choco` or `winget`, neither of which exist on nanoserver.
45    #[error(
46        "`{package_manager}` requires a Windows base image with PowerShell \
47         (e.g. mcr.microsoft.com/windows/servercore:ltsc2022). The nanoserver \
48         base image has no package manager. Change the FROM line to servercore, \
49         or install dependencies in a separate `servercore`-based build stage \
50         and COPY them into the final nanoserver stage. Offending RUN \
51         instruction #{instruction_index}."
52    )]
53    ChocoOnNanoserver {
54        /// Zero-based index of the offending `RUN` instruction within the
55        /// stage's instruction list.
56        instruction_index: usize,
57        /// The package manager that was detected (`"choco"` or `"winget"`).
58        package_manager: String,
59    },
60}
61
62/// Windows package-manager tokens we care about.
63///
64/// These are matched case-insensitively against the effective command in a
65/// `RUN` instruction (ignoring wrapping shells like `cmd /c` or
66/// `powershell -Command`).
67const WINDOWS_PACKAGE_MANAGERS: &[&str] = &["choco", "winget"];
68
69/// Walk every stage in `dockerfile` and error if any `RUN` on a
70/// `nanoserver`-based stage uses `choco` or `winget`.
71///
72/// Non-Windows base images (Linux, scratch, other stage refs) and
73/// `servercore` bases are skipped — they either don't apply or are capable of
74/// running the package managers in question.
75///
76/// # Errors
77///
78/// Returns [`DepsError::ChocoOnNanoserver`] on the first offending `RUN`
79/// instruction encountered. First-match-wins: we do not accumulate multiple
80/// errors because fixing the first one usually reveals the rest.
81pub fn validate_dockerfile(dockerfile: &Dockerfile) -> Result<(), DepsError> {
82    for stage in &dockerfile.stages {
83        let base_kind = classify_base_image(&stage.base_image);
84
85        // Only nanoserver needs this guard. Servercore has `PowerShell` +
86        // package managers; non-Windows bases use apt/yum/apk/etc.
87        if base_kind != WindowsBase::Nanoserver {
88            continue;
89        }
90
91        for (idx, instr) in stage.instructions.iter().enumerate() {
92            if let Instruction::Run(run) = instr {
93                if let Some(pm) = detect_package_manager(&run.command) {
94                    return Err(DepsError::ChocoOnNanoserver {
95                        instruction_index: idx,
96                        package_manager: pm.to_string(),
97                    });
98                }
99            }
100        }
101    }
102
103    Ok(())
104}
105
106/// Coarse classification of a stage's `FROM` image for this validator.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108enum WindowsBase {
109    /// Image reference mentions `nanoserver` (case-insensitive).
110    Nanoserver,
111    /// Image reference mentions `servercore` (case-insensitive) or otherwise
112    /// looks like a Windows base (starts with `windows/` or
113    /// `mcr.microsoft.com/windows/...`). These have `PowerShell`.
114    ServerCoreOrOtherWindows,
115    /// Linux image, `scratch`, stage reference, or anything we don't
116    /// recognise as Windows. Skip validation — `apt`/`yum`/`apk` are fine.
117    NotWindows,
118}
119
120/// Inspect the raw base-image string of a stage and decide which bucket it
121/// falls into.
122///
123/// Matches are substring-based + case-insensitive so all of these resolve to
124/// `Nanoserver`:
125/// - `mcr.microsoft.com/windows/nanoserver:ltsc2022`
126/// - `mcr.microsoft.com/windows/nanoserver:ltsc2019`
127/// - `nanoserver` (short form, sometimes used with local registries)
128/// - `my-registry.example.com/MyProject/Nanoserver:latest` (odd-casing)
129fn classify_base_image(base: &DockerfileFromTarget) -> WindowsBase {
130    let image_str = match base {
131        DockerfileFromTarget::Image(r) => r.repository().to_ascii_lowercase(),
132        // Stage refs and `scratch` are not Windows bases in their own right;
133        // if a later stage copies from them, that's still fine.
134        DockerfileFromTarget::Stage(_) | DockerfileFromTarget::Scratch => {
135            return WindowsBase::NotWindows;
136        }
137    };
138
139    if image_str.contains("nanoserver") {
140        WindowsBase::Nanoserver
141    } else if image_str.contains("servercore") || image_str.contains("windows/") {
142        WindowsBase::ServerCoreOrOtherWindows
143    } else {
144        WindowsBase::NotWindows
145    }
146}
147
148/// If the effective command in `cmd` is `choco` or `winget`, return which
149/// one. Otherwise return `None`.
150///
151/// Handles the common shell-wrapping idioms so that
152/// `powershell -Command "choco install nginx"`,
153/// `cmd /c winget install ...`, and plain `choco install ...` all trip.
154fn detect_package_manager(cmd: &ShellOrExec) -> Option<&'static str> {
155    match cmd {
156        ShellOrExec::Exec(args) => {
157            // Exec form: first arg is the executable. Strip an explicit
158            // `cmd`/`powershell` wrapper if present and look at the real
159            // target.
160            detect_in_tokens(args)
161        }
162        ShellOrExec::Shell(s) => {
163            let tokens = tokenize_shell(s);
164            detect_in_tokens(&tokens)
165        }
166    }
167}
168
169/// Given the already-tokenised argv-ish representation of a RUN command,
170/// peel off known wrappers (`cmd /c`, `powershell -Command`, …) and match
171/// the first surviving token against the package-manager allowlist.
172fn detect_in_tokens<S: AsRef<str>>(tokens: &[S]) -> Option<&'static str> {
173    let stripped: Vec<String> = strip_wrapper(tokens);
174    let first: &str = stripped.first()?.as_str();
175    let lower = first.to_ascii_lowercase();
176    // Also strip a trailing `.exe` so `choco.exe install ...` trips.
177    let normalised = lower.strip_suffix(".exe").unwrap_or(&lower);
178
179    WINDOWS_PACKAGE_MANAGERS
180        .iter()
181        .find(|pm| normalised == **pm)
182        .copied()
183}
184
185/// Remove a leading `cmd /c`, `cmd.exe /c`, `powershell -Command`,
186/// `pwsh -Command`, etc. from a tokenised RUN command. Returns the slice
187/// positioned at the effective command.
188///
189/// For `powershell -Command "choco install nginx"` we receive (after
190/// tokenisation) `["powershell", "-Command", "choco install nginx"]` — that
191/// third token is itself a shell-y string, so we re-tokenise and recurse.
192/// The recursion is bounded because each layer strictly shortens the
193/// argv list.
194fn strip_wrapper<S: AsRef<str>>(tokens: &[S]) -> Vec<String> {
195    if tokens.is_empty() {
196        return Vec::new();
197    }
198    let head = tokens[0].as_ref().to_ascii_lowercase();
199    let head = head.strip_suffix(".exe").unwrap_or(&head).to_string();
200
201    // cmd [/s] /c <rest>
202    if head == "cmd" {
203        // Skip any /S, /Q, /V:ON style switches, stop on /c (or /k) which
204        // introduces the payload.
205        let mut i = 1;
206        while i < tokens.len() {
207            let t = tokens[i].as_ref().to_ascii_lowercase();
208            if t == "/c" || t == "/k" {
209                i += 1;
210                break;
211            }
212            if t.starts_with('/') {
213                i += 1;
214                continue;
215            }
216            // Unrecognised — treat as start of payload.
217            break;
218        }
219        if i >= tokens.len() {
220            return Vec::new();
221        }
222        // The payload may be a single quoted string (joined) or multiple
223        // separate argv tokens. If it's exactly one token and it contains a
224        // space, re-tokenise it.
225        let rest: Vec<String> = tokens[i..].iter().map(|s| s.as_ref().to_string()).collect();
226        if rest.len() == 1 && rest[0].contains(char::is_whitespace) {
227            return tokenize_shell(&rest[0]);
228        }
229        return rest;
230    }
231
232    // powershell / pwsh -Command <rest>   (also -c, -EncodedCommand variants
233    // we don't try to decode).
234    if head == "powershell" || head == "pwsh" {
235        let mut i = 1;
236        while i < tokens.len() {
237            let t = tokens[i].as_ref().to_ascii_lowercase();
238            if t == "-command" || t == "-c" {
239                i += 1;
240                break;
241            }
242            if t.starts_with('-') {
243                // Skip flags like -NoProfile, -ExecutionPolicy, etc.
244                // -ExecutionPolicy takes a value; consume it too.
245                if t == "-executionpolicy" || t == "-file" {
246                    i += 2;
247                } else {
248                    i += 1;
249                }
250                continue;
251            }
252            break;
253        }
254        if i >= tokens.len() {
255            return Vec::new();
256        }
257        let rest: Vec<String> = tokens[i..].iter().map(|s| s.as_ref().to_string()).collect();
258        if rest.len() == 1 && rest[0].contains(char::is_whitespace) {
259            return tokenize_shell(&rest[0]);
260        }
261        return rest;
262    }
263
264    // No known wrapper — return tokens as-is.
265    tokens.iter().map(|s| s.as_ref().to_string()).collect()
266}
267
268/// Very small shell tokenizer: splits on whitespace, respects single and
269/// double quotes (no escape handling beyond `\"` / `\'`). Good enough to pull
270/// the first executable out of a `RUN` shell-form string.
271///
272/// We don't need a full POSIX shell parser here: the only thing this function
273/// has to get right is identifying the first real argv entry. If a
274/// pathological RUN line slips through, the worst case is we miss a
275/// `choco`/`winget` detection and the build later fails with the same
276/// error the user sees today — i.e. we strictly improve on the status quo.
277fn tokenize_shell(input: &str) -> Vec<String> {
278    let mut out = Vec::new();
279    let mut current = String::new();
280    let mut in_single = false;
281    let mut in_double = false;
282    let mut chars = input.chars().peekable();
283
284    while let Some(c) = chars.next() {
285        match c {
286            '\'' if !in_double => {
287                in_single = !in_single;
288            }
289            '"' if !in_single => {
290                in_double = !in_double;
291            }
292            '\\' if in_double => {
293                // Preserve the escaped char verbatim if present.
294                if let Some(&next) = chars.peek() {
295                    current.push(next);
296                    chars.next();
297                } else {
298                    current.push('\\');
299                }
300            }
301            c if c.is_whitespace() && !in_single && !in_double => {
302                if !current.is_empty() {
303                    out.push(std::mem::take(&mut current));
304                }
305            }
306            c => current.push(c),
307        }
308    }
309    if !current.is_empty() {
310        out.push(current);
311    }
312    out
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use crate::dockerfile::Dockerfile;
319
320    fn parse(s: &str) -> Dockerfile {
321        Dockerfile::parse(s).expect("test Dockerfile should parse")
322    }
323
324    #[test]
325    fn nanoserver_plus_choco_errors() {
326        let df = parse(
327            r"
328FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
329RUN choco install nginx -y
330",
331        );
332        let err = validate_dockerfile(&df).expect_err("should flag choco on nanoserver");
333        match err {
334            DepsError::ChocoOnNanoserver {
335                instruction_index,
336                package_manager,
337            } => {
338                assert_eq!(instruction_index, 0);
339                assert_eq!(package_manager, "choco");
340            }
341        }
342    }
343
344    #[test]
345    fn nanoserver_plus_winget_errors() {
346        let df = parse(
347            r"
348FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
349RUN winget install --id Git.Git
350",
351        );
352        let err = validate_dockerfile(&df).expect_err("should flag winget on nanoserver");
353        assert!(matches!(
354            err,
355            DepsError::ChocoOnNanoserver { ref package_manager, .. }
356                if package_manager == "winget"
357        ));
358    }
359
360    #[test]
361    fn nanoserver_without_package_manager_is_ok() {
362        let df = parse(
363            r"
364FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
365RUN cmd /c echo hello
366COPY . C:\\app
367",
368        );
369        validate_dockerfile(&df).expect("plain cmd /c echo should pass");
370    }
371
372    #[test]
373    fn servercore_plus_choco_is_ok() {
374        let df = parse(
375            r"
376FROM mcr.microsoft.com/windows/servercore:ltsc2022
377RUN choco install nginx -y
378",
379        );
380        validate_dockerfile(&df).expect("choco on servercore should pass");
381    }
382
383    #[test]
384    fn servercore_plus_powershell_choco_is_ok() {
385        let df = parse(
386            r#"
387FROM mcr.microsoft.com/windows/servercore:ltsc2022
388RUN powershell -Command "choco install nginx -y"
389"#,
390        );
391        validate_dockerfile(&df).expect("powershell-wrapped choco on servercore should pass");
392    }
393
394    #[test]
395    fn nanoserver_plus_powershell_choco_errors() {
396        // Nanoserver does not actually ship powershell, but the choco token
397        // is what this validator is responsible for. We still flag it.
398        let df = parse(
399            r#"
400FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
401RUN powershell -Command "choco install nginx -y"
402"#,
403        );
404        let err = validate_dockerfile(&df)
405            .expect_err("powershell-wrapped choco on nanoserver should still be flagged");
406        assert!(matches!(
407            err,
408            DepsError::ChocoOnNanoserver { ref package_manager, .. }
409                if package_manager == "choco"
410        ));
411    }
412
413    #[test]
414    fn linux_base_is_skipped() {
415        let df = parse(
416            r"
417FROM alpine:3.19
418RUN apk add --no-cache nginx
419",
420        );
421        validate_dockerfile(&df).expect("alpine + apk has nothing to do with this validator");
422    }
423
424    #[test]
425    fn multi_stage_servercore_then_nanoserver_is_ok() {
426        // The canonical remediation: install tooling in a servercore stage,
427        // COPY the resulting artifacts into a lean nanoserver runtime stage.
428        let df = parse(
429            r"
430FROM mcr.microsoft.com/windows/servercore:ltsc2022 AS builder
431RUN choco install nginx -y
432
433FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
434COPY --from=builder C:\\nginx C:\\nginx
435",
436        );
437        validate_dockerfile(&df).expect("multi-stage canonical pattern should pass");
438    }
439
440    #[test]
441    fn nanoserver_cmd_c_choco_errors() {
442        let df = parse(
443            r"
444FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
445RUN cmd /c choco install nginx -y
446",
447        );
448        let err = validate_dockerfile(&df).expect_err("cmd /c wrapping choco should still trip");
449        assert!(matches!(
450            err,
451            DepsError::ChocoOnNanoserver { ref package_manager, .. }
452                if package_manager == "choco"
453        ));
454    }
455
456    #[test]
457    fn nanoserver_exec_form_winget_errors() {
458        // Exec form: RUN ["winget", "install", ...]
459        let df = parse(
460            r#"
461FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
462RUN ["winget", "install", "--id", "Git.Git"]
463"#,
464        );
465        let err = validate_dockerfile(&df).expect_err("exec-form winget on nanoserver should trip");
466        assert!(matches!(
467            err,
468            DepsError::ChocoOnNanoserver { ref package_manager, .. }
469                if package_manager == "winget"
470        ));
471    }
472
473    #[test]
474    fn nanoserver_choco_exe_errors() {
475        let df = parse(
476            r"
477FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
478RUN choco.exe install nginx -y
479",
480        );
481        let err = validate_dockerfile(&df).expect_err("choco.exe should normalise to choco");
482        assert!(matches!(
483            err,
484            DepsError::ChocoOnNanoserver { ref package_manager, .. }
485                if package_manager == "choco"
486        ));
487    }
488
489    #[test]
490    fn nanoserver_reports_correct_instruction_index() {
491        let df = parse(
492            r"
493FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
494COPY . C:\\app
495RUN cmd /c echo build step
496RUN choco install nginx -y
497",
498        );
499        let err = validate_dockerfile(&df).expect_err("should flag third instruction");
500        match err {
501            DepsError::ChocoOnNanoserver {
502                instruction_index, ..
503            } => {
504                // Instructions in the stage: COPY, RUN echo, RUN choco → idx 2.
505                assert_eq!(instruction_index, 2);
506            }
507        }
508    }
509
510    #[test]
511    fn error_message_points_at_servercore() {
512        let df = parse(
513            r"
514FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
515RUN choco install nginx -y
516",
517        );
518        let err = validate_dockerfile(&df).unwrap_err();
519        let msg = err.to_string();
520        assert!(
521            msg.contains("servercore"),
522            "error message should steer users at servercore, got: {msg}"
523        );
524        assert!(
525            msg.contains("COPY them into the final nanoserver stage"),
526            "error message should mention the multi-stage remediation, got: {msg}"
527        );
528    }
529
530    // ------------------------------------------------------------------
531    // Tokenizer unit tests
532    // ------------------------------------------------------------------
533
534    #[test]
535    fn tokenize_handles_double_quoted_payload() {
536        let toks = tokenize_shell(r#"powershell -Command "choco install nginx -y""#);
537        assert_eq!(toks.len(), 3);
538        assert_eq!(toks[0], "powershell");
539        assert_eq!(toks[1], "-Command");
540        assert_eq!(toks[2], "choco install nginx -y");
541    }
542
543    #[test]
544    fn tokenize_handles_single_quoted_payload() {
545        let toks = tokenize_shell(r"cmd /c 'choco install nginx'");
546        assert_eq!(toks, vec!["cmd", "/c", "choco install nginx"]);
547    }
548
549    #[test]
550    fn strip_wrapper_peels_cmd_c() {
551        let toks = vec!["cmd", "/c", "choco", "install", "nginx"];
552        let stripped = strip_wrapper(&toks);
553        assert_eq!(stripped, vec!["choco", "install", "nginx"]);
554    }
555
556    #[test]
557    fn strip_wrapper_peels_powershell_joined_payload() {
558        let toks = vec!["powershell", "-Command", "choco install nginx"];
559        let stripped = strip_wrapper(&toks);
560        assert_eq!(stripped, vec!["choco", "install", "nginx"]);
561    }
562
563    #[test]
564    fn strip_wrapper_leaves_non_wrappers_alone() {
565        let toks = vec!["apt-get", "install", "-y", "nginx"];
566        let stripped = strip_wrapper(&toks);
567        assert_eq!(stripped, vec!["apt-get", "install", "-y", "nginx"]);
568    }
569}