Skip to main content

zlayer_builder/windows/
deps.rs

1//! Static validation of Windows package-manager usage in parsed Dockerfiles.
2//!
3//! The `nanoserver` Windows base image is intentionally minimal: it ships no
4//! `PowerShell`, no `choco`, no `winget`, and only the bare `cmd.exe` shell.
5//! Users unfamiliar with Windows-container constraints routinely write
6//!
7//! ```dockerfile
8//! FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
9//! RUN choco install nginx -y
10//! ```
11//!
12//! which then fails deep inside the backend with an unhelpful
13//! `'choco' is not recognized as an internal or external command` error. This
14//! module catches that case at parse time and emits an actionable error
15//! pointing users at `servercore` (which has `PowerShell`) or a multi-stage
16//! build where the package install happens in a `servercore` stage and the
17//! artifacts are `COPY --from=...`'d into the final nanoserver stage.
18//!
19//! # Scope (first iteration)
20//!
21//! - Detects `choco` and `winget` used as the effective RUN command, handling:
22//!   - Exec form: `RUN ["choco", "install", "nginx"]`
23//!   - Shell form: `RUN choco install nginx`
24//!   - Via `cmd /c`: `RUN cmd /c choco install nginx`
25//!   - Via `PowerShell`: `RUN powershell -Command "choco install nginx"`
26//! - Flags only when the stage's base image is `nanoserver`. `servercore`
27//!   (which bundles `PowerShell`) and non-Windows bases are skipped.
28//! - Multi-stage Dockerfiles are validated per stage; each stage's own base
29//!   image drives its verdict. A `servercore` builder stage that runs `choco`
30//!   and `COPY --from=builder`s into a final `nanoserver` stage is the
31//!   recommended remediation and passes validation.
32//!
33//! Future iterations may auto-inject the multi-stage rewrite; for now the
34//! validator's job is to detect + error clearly.
35
36use thiserror::Error;
37
38use crate::dockerfile::{Dockerfile, ImageRef, Instruction, ShellOrExec};
39
40/// Errors surfaced by the Windows dependency validator.
41#[derive(Debug, Error)]
42pub enum DepsError {
43    /// The stage's base image is `nanoserver` but a `RUN` instruction tries
44    /// to invoke `choco` or `winget`, neither of which exist on nanoserver.
45    #[error(
46        "`{package_manager}` requires a Windows base image with PowerShell \
47         (e.g. mcr.microsoft.com/windows/servercore:ltsc2022). The nanoserver \
48         base image has no package manager. Change the FROM line to servercore, \
49         or install dependencies in a separate `servercore`-based build stage \
50         and COPY them into the final nanoserver stage. Offending RUN \
51         instruction #{instruction_index}."
52    )]
53    ChocoOnNanoserver {
54        /// Zero-based index of the offending `RUN` instruction within the
55        /// stage's instruction list.
56        instruction_index: usize,
57        /// The package manager that was detected (`"choco"` or `"winget"`).
58        package_manager: String,
59    },
60}
61
62/// Windows package-manager tokens we care about.
63///
64/// These are matched case-insensitively against the effective command in a
65/// `RUN` instruction (ignoring wrapping shells like `cmd /c` or
66/// `powershell -Command`).
67const WINDOWS_PACKAGE_MANAGERS: &[&str] = &["choco", "winget"];
68
69/// Walk every stage in `dockerfile` and error if any `RUN` on a
70/// `nanoserver`-based stage uses `choco` or `winget`.
71///
72/// Non-Windows base images (Linux, scratch, other stage refs) and
73/// `servercore` bases are skipped — they either don't apply or are capable of
74/// running the package managers in question.
75///
76/// # Errors
77///
78/// Returns [`DepsError::ChocoOnNanoserver`] on the first offending `RUN`
79/// instruction encountered. First-match-wins: we do not accumulate multiple
80/// errors because fixing the first one usually reveals the rest.
81pub fn validate_dockerfile(dockerfile: &Dockerfile) -> Result<(), DepsError> {
82    for stage in &dockerfile.stages {
83        let base_kind = classify_base_image(&stage.base_image);
84
85        // Only nanoserver needs this guard. Servercore has `PowerShell` +
86        // package managers; non-Windows bases use apt/yum/apk/etc.
87        if base_kind != WindowsBase::Nanoserver {
88            continue;
89        }
90
91        for (idx, instr) in stage.instructions.iter().enumerate() {
92            if let Instruction::Run(run) = instr {
93                if let Some(pm) = detect_package_manager(&run.command) {
94                    return Err(DepsError::ChocoOnNanoserver {
95                        instruction_index: idx,
96                        package_manager: pm.to_string(),
97                    });
98                }
99            }
100        }
101    }
102
103    Ok(())
104}
105
106/// Coarse classification of a stage's `FROM` image for this validator.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108enum WindowsBase {
109    /// Image reference mentions `nanoserver` (case-insensitive).
110    Nanoserver,
111    /// Image reference mentions `servercore` (case-insensitive) or otherwise
112    /// looks like a Windows base (starts with `windows/` or
113    /// `mcr.microsoft.com/windows/...`). These have `PowerShell`.
114    ServerCoreOrOtherWindows,
115    /// Linux image, `scratch`, stage reference, or anything we don't
116    /// recognise as Windows. Skip validation — `apt`/`yum`/`apk` are fine.
117    NotWindows,
118}
119
120/// Inspect the raw base-image string of a stage and decide which bucket it
121/// falls into.
122///
123/// Matches are substring-based + case-insensitive so all of these resolve to
124/// `Nanoserver`:
125/// - `mcr.microsoft.com/windows/nanoserver:ltsc2022`
126/// - `mcr.microsoft.com/windows/nanoserver:ltsc2019`
127/// - `nanoserver` (short form, sometimes used with local registries)
128/// - `my-registry.example.com/MyProject/Nanoserver:latest` (odd-casing)
129fn classify_base_image(base: &ImageRef) -> WindowsBase {
130    let image_str = match base {
131        ImageRef::Registry { image, .. } => image.to_ascii_lowercase(),
132        // Stage refs and `scratch` are not Windows bases in their own right;
133        // if a later stage copies from them, that's still fine.
134        ImageRef::Stage(_) | ImageRef::Scratch => return WindowsBase::NotWindows,
135    };
136
137    if image_str.contains("nanoserver") {
138        WindowsBase::Nanoserver
139    } else if image_str.contains("servercore") || image_str.contains("windows/") {
140        WindowsBase::ServerCoreOrOtherWindows
141    } else {
142        WindowsBase::NotWindows
143    }
144}
145
146/// If the effective command in `cmd` is `choco` or `winget`, return which
147/// one. Otherwise return `None`.
148///
149/// Handles the common shell-wrapping idioms so that
150/// `powershell -Command "choco install nginx"`,
151/// `cmd /c winget install ...`, and plain `choco install ...` all trip.
152fn detect_package_manager(cmd: &ShellOrExec) -> Option<&'static str> {
153    match cmd {
154        ShellOrExec::Exec(args) => {
155            // Exec form: first arg is the executable. Strip an explicit
156            // `cmd`/`powershell` wrapper if present and look at the real
157            // target.
158            detect_in_tokens(args)
159        }
160        ShellOrExec::Shell(s) => {
161            let tokens = tokenize_shell(s);
162            detect_in_tokens(&tokens)
163        }
164    }
165}
166
167/// Given the already-tokenised argv-ish representation of a RUN command,
168/// peel off known wrappers (`cmd /c`, `powershell -Command`, …) and match
169/// the first surviving token against the package-manager allowlist.
170fn detect_in_tokens<S: AsRef<str>>(tokens: &[S]) -> Option<&'static str> {
171    let stripped: Vec<String> = strip_wrapper(tokens);
172    let first: &str = stripped.first()?.as_str();
173    let lower = first.to_ascii_lowercase();
174    // Also strip a trailing `.exe` so `choco.exe install ...` trips.
175    let normalised = lower.strip_suffix(".exe").unwrap_or(&lower);
176
177    WINDOWS_PACKAGE_MANAGERS
178        .iter()
179        .find(|pm| normalised == **pm)
180        .copied()
181}
182
183/// Remove a leading `cmd /c`, `cmd.exe /c`, `powershell -Command`,
184/// `pwsh -Command`, etc. from a tokenised RUN command. Returns the slice
185/// positioned at the effective command.
186///
187/// For `powershell -Command "choco install nginx"` we receive (after
188/// tokenisation) `["powershell", "-Command", "choco install nginx"]` — that
189/// third token is itself a shell-y string, so we re-tokenise and recurse.
190/// The recursion is bounded because each layer strictly shortens the
191/// argv list.
192fn strip_wrapper<S: AsRef<str>>(tokens: &[S]) -> Vec<String> {
193    if tokens.is_empty() {
194        return Vec::new();
195    }
196    let head = tokens[0].as_ref().to_ascii_lowercase();
197    let head = head.strip_suffix(".exe").unwrap_or(&head).to_string();
198
199    // cmd [/s] /c <rest>
200    if head == "cmd" {
201        // Skip any /S, /Q, /V:ON style switches, stop on /c (or /k) which
202        // introduces the payload.
203        let mut i = 1;
204        while i < tokens.len() {
205            let t = tokens[i].as_ref().to_ascii_lowercase();
206            if t == "/c" || t == "/k" {
207                i += 1;
208                break;
209            }
210            if t.starts_with('/') {
211                i += 1;
212                continue;
213            }
214            // Unrecognised — treat as start of payload.
215            break;
216        }
217        if i >= tokens.len() {
218            return Vec::new();
219        }
220        // The payload may be a single quoted string (joined) or multiple
221        // separate argv tokens. If it's exactly one token and it contains a
222        // space, re-tokenise it.
223        let rest: Vec<String> = tokens[i..].iter().map(|s| s.as_ref().to_string()).collect();
224        if rest.len() == 1 && rest[0].contains(char::is_whitespace) {
225            return tokenize_shell(&rest[0]);
226        }
227        return rest;
228    }
229
230    // powershell / pwsh -Command <rest>   (also -c, -EncodedCommand variants
231    // we don't try to decode).
232    if head == "powershell" || head == "pwsh" {
233        let mut i = 1;
234        while i < tokens.len() {
235            let t = tokens[i].as_ref().to_ascii_lowercase();
236            if t == "-command" || t == "-c" {
237                i += 1;
238                break;
239            }
240            if t.starts_with('-') {
241                // Skip flags like -NoProfile, -ExecutionPolicy, etc.
242                // -ExecutionPolicy takes a value; consume it too.
243                if t == "-executionpolicy" || t == "-file" {
244                    i += 2;
245                } else {
246                    i += 1;
247                }
248                continue;
249            }
250            break;
251        }
252        if i >= tokens.len() {
253            return Vec::new();
254        }
255        let rest: Vec<String> = tokens[i..].iter().map(|s| s.as_ref().to_string()).collect();
256        if rest.len() == 1 && rest[0].contains(char::is_whitespace) {
257            return tokenize_shell(&rest[0]);
258        }
259        return rest;
260    }
261
262    // No known wrapper — return tokens as-is.
263    tokens.iter().map(|s| s.as_ref().to_string()).collect()
264}
265
266/// Very small shell tokenizer: splits on whitespace, respects single and
267/// double quotes (no escape handling beyond `\"` / `\'`). Good enough to pull
268/// the first executable out of a `RUN` shell-form string.
269///
270/// We don't need a full POSIX shell parser here: the only thing this function
271/// has to get right is identifying the first real argv entry. If a
272/// pathological RUN line slips through, the worst case is we miss a
273/// `choco`/`winget` detection and the build later fails with the same
274/// error the user sees today — i.e. we strictly improve on the status quo.
275fn tokenize_shell(input: &str) -> Vec<String> {
276    let mut out = Vec::new();
277    let mut current = String::new();
278    let mut in_single = false;
279    let mut in_double = false;
280    let mut chars = input.chars().peekable();
281
282    while let Some(c) = chars.next() {
283        match c {
284            '\'' if !in_double => {
285                in_single = !in_single;
286            }
287            '"' if !in_single => {
288                in_double = !in_double;
289            }
290            '\\' if in_double => {
291                // Preserve the escaped char verbatim if present.
292                if let Some(&next) = chars.peek() {
293                    current.push(next);
294                    chars.next();
295                } else {
296                    current.push('\\');
297                }
298            }
299            c if c.is_whitespace() && !in_single && !in_double => {
300                if !current.is_empty() {
301                    out.push(std::mem::take(&mut current));
302                }
303            }
304            c => current.push(c),
305        }
306    }
307    if !current.is_empty() {
308        out.push(current);
309    }
310    out
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::dockerfile::Dockerfile;
317
318    fn parse(s: &str) -> Dockerfile {
319        Dockerfile::parse(s).expect("test Dockerfile should parse")
320    }
321
322    #[test]
323    fn nanoserver_plus_choco_errors() {
324        let df = parse(
325            r"
326FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
327RUN choco install nginx -y
328",
329        );
330        let err = validate_dockerfile(&df).expect_err("should flag choco on nanoserver");
331        match err {
332            DepsError::ChocoOnNanoserver {
333                instruction_index,
334                package_manager,
335            } => {
336                assert_eq!(instruction_index, 0);
337                assert_eq!(package_manager, "choco");
338            }
339        }
340    }
341
342    #[test]
343    fn nanoserver_plus_winget_errors() {
344        let df = parse(
345            r"
346FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
347RUN winget install --id Git.Git
348",
349        );
350        let err = validate_dockerfile(&df).expect_err("should flag winget on nanoserver");
351        assert!(matches!(
352            err,
353            DepsError::ChocoOnNanoserver { ref package_manager, .. }
354                if package_manager == "winget"
355        ));
356    }
357
358    #[test]
359    fn nanoserver_without_package_manager_is_ok() {
360        let df = parse(
361            r"
362FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
363RUN cmd /c echo hello
364COPY . C:\\app
365",
366        );
367        validate_dockerfile(&df).expect("plain cmd /c echo should pass");
368    }
369
370    #[test]
371    fn servercore_plus_choco_is_ok() {
372        let df = parse(
373            r"
374FROM mcr.microsoft.com/windows/servercore:ltsc2022
375RUN choco install nginx -y
376",
377        );
378        validate_dockerfile(&df).expect("choco on servercore should pass");
379    }
380
381    #[test]
382    fn servercore_plus_powershell_choco_is_ok() {
383        let df = parse(
384            r#"
385FROM mcr.microsoft.com/windows/servercore:ltsc2022
386RUN powershell -Command "choco install nginx -y"
387"#,
388        );
389        validate_dockerfile(&df).expect("powershell-wrapped choco on servercore should pass");
390    }
391
392    #[test]
393    fn nanoserver_plus_powershell_choco_errors() {
394        // Nanoserver does not actually ship powershell, but the choco token
395        // is what this validator is responsible for. We still flag it.
396        let df = parse(
397            r#"
398FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
399RUN powershell -Command "choco install nginx -y"
400"#,
401        );
402        let err = validate_dockerfile(&df)
403            .expect_err("powershell-wrapped choco on nanoserver should still be flagged");
404        assert!(matches!(
405            err,
406            DepsError::ChocoOnNanoserver { ref package_manager, .. }
407                if package_manager == "choco"
408        ));
409    }
410
411    #[test]
412    fn linux_base_is_skipped() {
413        let df = parse(
414            r"
415FROM alpine:3.19
416RUN apk add --no-cache nginx
417",
418        );
419        validate_dockerfile(&df).expect("alpine + apk has nothing to do with this validator");
420    }
421
422    #[test]
423    fn multi_stage_servercore_then_nanoserver_is_ok() {
424        // The canonical remediation: install tooling in a servercore stage,
425        // COPY the resulting artifacts into a lean nanoserver runtime stage.
426        let df = parse(
427            r"
428FROM mcr.microsoft.com/windows/servercore:ltsc2022 AS builder
429RUN choco install nginx -y
430
431FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
432COPY --from=builder C:\\nginx C:\\nginx
433",
434        );
435        validate_dockerfile(&df).expect("multi-stage canonical pattern should pass");
436    }
437
438    #[test]
439    fn nanoserver_cmd_c_choco_errors() {
440        let df = parse(
441            r"
442FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
443RUN cmd /c choco install nginx -y
444",
445        );
446        let err = validate_dockerfile(&df).expect_err("cmd /c wrapping choco should still trip");
447        assert!(matches!(
448            err,
449            DepsError::ChocoOnNanoserver { ref package_manager, .. }
450                if package_manager == "choco"
451        ));
452    }
453
454    #[test]
455    fn nanoserver_exec_form_winget_errors() {
456        // Exec form: RUN ["winget", "install", ...]
457        let df = parse(
458            r#"
459FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
460RUN ["winget", "install", "--id", "Git.Git"]
461"#,
462        );
463        let err = validate_dockerfile(&df).expect_err("exec-form winget on nanoserver should trip");
464        assert!(matches!(
465            err,
466            DepsError::ChocoOnNanoserver { ref package_manager, .. }
467                if package_manager == "winget"
468        ));
469    }
470
471    #[test]
472    fn nanoserver_choco_exe_errors() {
473        let df = parse(
474            r"
475FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
476RUN choco.exe install nginx -y
477",
478        );
479        let err = validate_dockerfile(&df).expect_err("choco.exe should normalise to choco");
480        assert!(matches!(
481            err,
482            DepsError::ChocoOnNanoserver { ref package_manager, .. }
483                if package_manager == "choco"
484        ));
485    }
486
487    #[test]
488    fn nanoserver_reports_correct_instruction_index() {
489        let df = parse(
490            r"
491FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
492COPY . C:\\app
493RUN cmd /c echo build step
494RUN choco install nginx -y
495",
496        );
497        let err = validate_dockerfile(&df).expect_err("should flag third instruction");
498        match err {
499            DepsError::ChocoOnNanoserver {
500                instruction_index, ..
501            } => {
502                // Instructions in the stage: COPY, RUN echo, RUN choco → idx 2.
503                assert_eq!(instruction_index, 2);
504            }
505        }
506    }
507
508    #[test]
509    fn error_message_points_at_servercore() {
510        let df = parse(
511            r"
512FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
513RUN choco install nginx -y
514",
515        );
516        let err = validate_dockerfile(&df).unwrap_err();
517        let msg = err.to_string();
518        assert!(
519            msg.contains("servercore"),
520            "error message should steer users at servercore, got: {msg}"
521        );
522        assert!(
523            msg.contains("COPY them into the final nanoserver stage"),
524            "error message should mention the multi-stage remediation, got: {msg}"
525        );
526    }
527
528    // ------------------------------------------------------------------
529    // Tokenizer unit tests
530    // ------------------------------------------------------------------
531
532    #[test]
533    fn tokenize_handles_double_quoted_payload() {
534        let toks = tokenize_shell(r#"powershell -Command "choco install nginx -y""#);
535        assert_eq!(toks.len(), 3);
536        assert_eq!(toks[0], "powershell");
537        assert_eq!(toks[1], "-Command");
538        assert_eq!(toks[2], "choco install nginx -y");
539    }
540
541    #[test]
542    fn tokenize_handles_single_quoted_payload() {
543        let toks = tokenize_shell(r"cmd /c 'choco install nginx'");
544        assert_eq!(toks, vec!["cmd", "/c", "choco install nginx"]);
545    }
546
547    #[test]
548    fn strip_wrapper_peels_cmd_c() {
549        let toks = vec!["cmd", "/c", "choco", "install", "nginx"];
550        let stripped = strip_wrapper(&toks);
551        assert_eq!(stripped, vec!["choco", "install", "nginx"]);
552    }
553
554    #[test]
555    fn strip_wrapper_peels_powershell_joined_payload() {
556        let toks = vec!["powershell", "-Command", "choco install nginx"];
557        let stripped = strip_wrapper(&toks);
558        assert_eq!(stripped, vec!["choco", "install", "nginx"]);
559    }
560
561    #[test]
562    fn strip_wrapper_leaves_non_wrappers_alone() {
563        let toks = vec!["apt-get", "install", "-y", "nginx"];
564        let stripped = strip_wrapper(&toks);
565        assert_eq!(stripped, vec!["apt-get", "install", "-y", "nginx"]);
566    }
567}