safe-chains 0.132.0

Auto-allow safe, read-only bash commands in agentic coding tools
Documentation
use crate::verdict::{SafetyLevel, Verdict};
use crate::parse::{Token, WordSet};
use crate::policy::{self, FlagPolicy, FlagStyle};

fn strip_regex_literals(s: &str) -> String {
    let bytes = s.as_bytes();
    let mut result = Vec::with_capacity(bytes.len());
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] == b'/' {
            result.push(b' ');
            i += 1;
            while i < bytes.len() {
                if bytes[i] == b'\\' && i + 1 < bytes.len() {
                    i += 2;
                    continue;
                }
                if bytes[i] == b'/' {
                    i += 1;
                    break;
                }
                i += 1;
            }
        } else {
            result.push(bytes[i]);
            i += 1;
        }
    }
    String::from_utf8(result).unwrap_or_default()
}

fn has_redirect(code: &str) -> bool {
    let bytes = code.as_bytes();
    for (i, &b) in bytes.iter().enumerate() {
        if b == b'>' && !(i + 1 < bytes.len() && bytes[i + 1] == b'=') {
            if b == b'>' && i > 0 && bytes[i - 1] == b'>' {
                return true;
            }
            let stmt_start = bytes[..i].iter().rposition(|&c| c == b';' || c == b'{').map_or(0, |p| p + 1);
            let before = &code[stmt_start..i];
            if before.contains("printf") || before.contains("print") {
                return true;
            }
        }
    }
    false
}

fn awk_has_dangerous_construct(token: &Token) -> bool {
    let code = token.content_outside_double_quotes();
    if code.contains("system") || code.contains("getline") {
        return true;
    }
    let stripped = strip_regex_literals(&code);
    stripped.contains('|') || has_redirect(&stripped)
}

static AWK_POLICY: FlagPolicy = FlagPolicy {
    standalone: WordSet::flags(&[
        "--characters-as-bytes", "--copyright", "--gen-pot",
        "--lint", "--no-optimize", "--optimize",
        "--posix", "--re-interval", "--sandbox",
        "--traditional", "--use-lc-numeric", "--version",
        "-C", "-N", "-O", "-P", "-S", "-V",
        "-b", "-c", "-g", "-r", "-s", "-t",
    ]),
    valued: WordSet::flags(&[
        "--assign", "--field-separator",
        "-F", "-v",
    ]),
    bare: false,
    max_positional: None,
    flag_style: FlagStyle::Strict,
    numeric_dash: false,
};

fn is_safe_awk(tokens: &[Token]) -> bool {
    for token in &tokens[1..] {
        if !token.starts_with("-") && awk_has_dangerous_construct(token) {
            return false;
        }
    }
    policy::check(tokens, &AWK_POLICY)
}

pub(in crate::handlers::coreutils) fn dispatch(cmd: &str, tokens: &[Token]) -> Option<Verdict> {
    match cmd {
        "awk" | "gawk" | "mawk" | "nawk" => Some(if is_safe_awk(tokens) { Verdict::Allowed(SafetyLevel::Inert) } else { Verdict::Denied }),
        _ => None,
    }
}

pub(in crate::handlers::coreutils) fn command_docs() -> Vec<crate::docs::CommandDoc> {
    vec![
        crate::docs::CommandDoc::handler("awk / gawk / mawk / nawk",
            "https://www.gnu.org/software/gawk/manual/gawk.html",
            format!("- Program validated: system, getline, |, > constructs checked\n{}", AWK_POLICY.describe())),
    ]
}

#[cfg(test)]
pub(in crate::handlers::coreutils) const REGISTRY: &[crate::handlers::CommandEntry] = &[
    crate::handlers::CommandEntry::Custom { cmd: "awk", valid_prefix: Some("awk '{print}'") },
    crate::handlers::CommandEntry::Custom { cmd: "gawk", valid_prefix: Some("gawk '{print}'") },
    crate::handlers::CommandEntry::Custom { cmd: "mawk", valid_prefix: Some("mawk '{print}'") },
    crate::handlers::CommandEntry::Custom { cmd: "nawk", valid_prefix: Some("nawk '{print}'") },
];

#[cfg(test)]
mod tests {
    use crate::is_safe_command;
    fn check(cmd: &str) -> bool { is_safe_command(cmd) }

    safe! {
        awk_print_field: "awk '{print $1}' file.txt",
        awk_print_multiple_fields: "awk '{print $1, $3}' file.txt",
        awk_field_separator: "awk -F: '{print $1}' /etc/passwd",
        awk_pattern: "awk '/error/ {print $0}' log.txt",
        awk_nr: "awk 'NR==5' file.txt",
        awk_begin_end_safe: "awk 'BEGIN{n=0} {n++} END{print n}' file.txt",
        gawk_safe: "gawk '{print $2}' file.txt",
        awk_netstat_pipeline: "awk '{print $6}'",
        awk_comparison_gte: "awk 'NR>=10 {print}' file.txt",
        awk_comparison_gte_complex: "awk '{if(length($0)>=80) print NR\": \"$0}' file.txt",
        awk_multiple_comparisons: "awk 'NR>=5 && NR<=20' file.txt",
        awk_division: "awk '{print $1/100}' file.txt",
        awk_multiple_divisions: "awk '{avg=$1/10; pct=avg/total*100; print pct}' file.txt",
        awk_modulo_and_division: "awk '{print $1%10, $1/10}' file.txt",

        awk_string_literal_system: "awk 'BEGIN{print \"system failed\"}'",
        awk_string_literal_redirect: "awk '{print \">\"}'",
        awk_string_literal_pipe: "awk '{print \"a | b\"}'",
        awk_string_literal_getline: "awk 'BEGIN{print \"getline is a keyword\"}'",

        awk_regex_alternation: "awk '/foo|bar/ {print}' file.txt",
        awk_regex_multi_alt: "awk '/^def |^class |^end/ {print}' file.rb",
        awk_regex_redirect_char: "awk '/a>b/ {print}' file.txt",
        awk_regex_complex: "awk '/^  def /{m=$0; l=NR} NR-l>=10 && /^  def |^class |^end/{print}' file.rb",
        awk_regex_single_char: "awk '/^#/ {print}' file.txt",
        awk_regex_escaped_slash: "awk '/path\\/to/ {print}' file.txt",
        awk_regex_pipe_and_gte: "awk '/error|warning/ && NR>=10 {print}' log.txt",
        awk_regex_empty: "awk '/^$/ {print NR}' file.txt",
        awk_regex_pipe_in_match: "awk '$0 ~ /foo|bar/ {print}' file.txt",
        awk_regex_multiple_patterns: "awk '/start/,/end/ {print}' file.txt",
        awk_regex_redirect_in_char_class: "awk '/[><=]/ {print}' file.txt",
        awk_regex_pipe_in_char_class: "awk '/[|&]/ {print}' file.txt",
        awk_regex_mixed_with_math: "awk '/error|warn/ {c++} END{print c>=0 ? c : 0}' log.txt",
        awk_no_program_just_flag: "awk --version",
        awk_comparison_gt: "awk 'length > 80' file.txt",
        awk_comparison_gt_print: "awk 'length > 80 {print}' file.txt",
        awk_comparison_nr_gt: "awk 'NR > 5' file.txt",
        awk_comparison_field_gt: "awk '$1 > 100 {print $0}' file.txt",
        awk_comparison_gt_conditional: "awk '{if(length($0) > 80) print NR}' file.txt",
        awk_comparison_gt_multi: "awk 'NR > 5 && NR < 20' file.txt",
    }

    denied! {
        awk_system_denied: "awk 'BEGIN{system(\"rm -rf /\")}'",
        awk_getline_denied: "awk '{getline line < \"/etc/shadow\"; print line}'",
        awk_pipe_output_denied: "awk '{print $0 | \"mail user@host\"}'",
        awk_redirect_denied: "awk '{print $0 > \"output.txt\"}'",
        awk_append_denied: "awk '{print $0 >> \"output.txt\"}'",
        awk_file_program_denied: "awk -f script.awk data.txt",
        gawk_system_denied: "gawk 'BEGIN{system(\"rm\")}'",
        awk_system_call_denied: "awk 'BEGIN{system(\"rm\")}'",
        awk_system_space_paren_denied: "awk 'BEGIN{system (\"rm\")}'",
        awk_pipe_outside_string_denied: "awk '{print $0 | \"cmd\"}'",
        awk_redirect_outside_string_denied: "awk '{print $0 > \"file\"}'",
        awk_system_trailing_help_denied: "awk 'BEGIN{system(\"rm\")}' --help",
        awk_system_trailing_version_denied: "awk 'BEGIN{system(\"rm\")}' --version",
        awk_system_between_division_denied: "awk '{x=1/2;system(\"rm\");y=3/4}' file",
        awk_getline_in_regex_context_denied: "awk '/foo/ {getline; print}' file",
        awk_getline_from_cmd_denied: "awk 'BEGIN{cmd=\"date\"; cmd | getline d; print d}'",
        awk_pipe_bare_denied: "awk '{cmd=\"sort\"; print $0 | cmd}' file",
        awk_redirect_bare_var_denied: "awk '{f=\"out.txt\"; print $0 > f}' file",
        awk_system_in_function_denied: "awk 'function run(){system(\"rm\")} BEGIN{run()}'",
        awk_getline_from_pipe_denied: "awk 'BEGIN{\"date\" | getline d; print d}'",
        awk_append_bare_denied: "awk '{print >> \"log.txt\"}' file",
        awk_redirect_no_space_denied: "awk '{print >\"out\"}' file",
        awk_pipe_no_space_denied: "awk '{print|\"cmd\"}' file",
    }
}