use crate::cli::args::DatasetExportFormat;
use crate::models::{Config, Error, Result};
use std::path::{Path, PathBuf};
pub(crate) fn corpus_cwe_mapping(json: bool) -> Result<()> {
use crate::cli::color::*;
use crate::corpus::cwe_mapping;
if json {
let entries: Vec<serde_json::Value> = cwe_mapping::CWE_MAPPINGS
.iter()
.map(|m| {
serde_json::json!({
"rule": m.rule,
"pattern": m.pattern,
"cwe": m.cwe,
"cwe_id": m.cwe_id,
"cvss_score": m.cvss_score,
"cvss_severity": m.cvss_severity.as_str(),
"owasp": m.owasp,
})
})
.collect();
let ood: Vec<serde_json::Value> = cwe_mapping::OOD_CWES
.iter()
.map(|o| {
serde_json::json!({
"cwe": o.cwe,
"cwe_id": o.cwe_id,
"name": o.name,
"description": o.description,
"cvss_score": o.cvss_score,
"cvss_severity": o.cvss_severity.as_str(),
})
})
.collect();
let output = serde_json::json!({
"linter_rules": entries,
"ood_cwes": ood,
"summary": {
"total_rules": cwe_mapping::CWE_MAPPINGS.len(),
"unique_cwes": cwe_mapping::linter_cwe_ids().len(),
"ood_cwes": cwe_mapping::OOD_CWES.len(),
"ood_disjoint": cwe_mapping::verify_ood_disjoint(),
}
});
println!("{}", serde_json::to_string_pretty(&output)?);
} else {
println!("{BOLD}CWE Taxonomy Mapping (SSC v12 \u{00a7}14.2){RESET}");
println!();
let owasp_hdr = "OWASP";
println!(
" {DIM}{:<8} {:<35} {:<10} {:>5} {:<10} {}{RESET}",
"Rule", "Pattern", "CWE", "CVSS", "Severity", owasp_hdr
);
println!(" {}", "-".repeat(90));
for m in cwe_mapping::CWE_MAPPINGS {
let severity_color = match m.cvss_severity {
cwe_mapping::CvssSeverity::Critical => RED,
cwe_mapping::CvssSeverity::High => YELLOW,
cwe_mapping::CvssSeverity::Medium => CYAN,
cwe_mapping::CvssSeverity::Low => DIM,
cwe_mapping::CvssSeverity::None => DIM,
};
println!(
" {:<8} {:<35} {:<10} {severity_color}{:>5.1}{RESET} {severity_color}{:<10}{RESET} {}",
m.rule, m.pattern, m.cwe, m.cvss_score, m.cvss_severity, m.owasp
);
}
println!();
println!("{BOLD}OOD CWEs (eval-only, not in linter){RESET}");
println!();
for o in cwe_mapping::OOD_CWES {
println!(
" {:<10} {:<40} {:>5.1} ({})",
o.cwe, o.name, o.cvss_score, o.cvss_severity
);
}
println!();
println!("{DIM}{}{RESET}", cwe_mapping::summary());
}
Ok(())
}
pub(crate) fn corpus_label(input: PathBuf, output: Option<PathBuf>) -> Result<()> {
use crate::cli::color::*;
use crate::corpus::cwe_mapping;
use crate::linter;
let file = std::fs::File::open(&input)?;
let reader = std::io::BufReader::new(file);
let writer: Box<dyn std::io::Write> = if let Some(ref path) = output {
Box::new(std::fs::File::create(path)?)
} else {
Box::new(std::io::stdout())
};
let mut buf = std::io::BufWriter::new(writer);
let mut total = 0usize;
let mut unsafe_count = 0usize;
for line in std::io::BufRead::lines(reader) {
let line = line?;
if line.trim().is_empty() {
continue;
}
let entry: serde_json::Value = serde_json::from_str(&line)
.map_err(|e| Error::Validation(format!("Invalid JSON on line {}: {e}", total + 1)))?;
let script = entry
.get("script")
.or_else(|| entry.get("text"))
.or_else(|| entry.get("input"))
.or_else(|| entry.get("unsafe_script"))
.and_then(|v| v.as_str())
.ok_or_else(|| {
Error::Validation(format!(
"Line {}: missing 'script', 'text', 'input', or 'unsafe_script' field",
total + 1
))
})?;
let lint_result = linter::lint_shell(script);
let security_diags: Vec<&linter::Diagnostic> = lint_result
.diagnostics
.iter()
.filter(|d| {
d.code.starts_with("SEC") || d.code.starts_with("DET") || d.code.starts_with("IDEM")
})
.collect();
let is_unsafe = !security_diags.is_empty();
let label = if is_unsafe { 1 } else { 0 };
let findings: Vec<serde_json::Value> = security_diags
.iter()
.map(|d| {
let cwe_info = cwe_mapping::lookup_rule(&d.code);
serde_json::json!({
"rule": d.code,
"message": d.message,
"cwe": cwe_info.map(|c| c.cwe).unwrap_or("unknown"),
"cwe_id": cwe_info.map(|c| c.cwe_id).unwrap_or(0),
"cvss_score": cwe_info.map(|c| c.cvss_score).unwrap_or(0.0),
})
})
.collect();
let mut labeled = entry.clone();
if let Some(obj) = labeled.as_object_mut() {
obj.insert("label".to_string(), serde_json::json!(label));
obj.insert(
"classification".to_string(),
serde_json::json!(if is_unsafe { "unsafe" } else { "safe" }),
);
obj.insert("findings".to_string(), serde_json::json!(findings));
obj.insert(
"finding_count".to_string(),
serde_json::json!(security_diags.len()),
);
}
serde_json::to_writer(&mut buf, &labeled)?;
std::io::Write::write_all(&mut buf, b"\n")?;
total += 1;
if is_unsafe {
unsafe_count += 1;
}
}
std::io::Write::flush(&mut buf)?;
if output.is_some() {
eprintln!("{BOLD}Label Summary{RESET}");
eprintln!(" Total: {total}");
eprintln!(
" Safe: {} ({:.1}%)",
total - unsafe_count,
100.0 * (total - unsafe_count) as f64 / total.max(1) as f64
);
eprintln!(
" Unsafe: {unsafe_count} ({:.1}%)",
100.0 * unsafe_count as f64 / total.max(1) as f64
);
}
Ok(())
}
pub(crate) fn corpus_export_benchmark(output: Option<PathBuf>, limit: Option<usize>) -> Result<()> {
use crate::cli::color::*;
use crate::corpus::benchmark_export;
use crate::corpus::registry::CorpusRegistry;
let registry = CorpusRegistry::load_full();
let (entries, summary) = benchmark_export::export_benchmark(®istry, limit);
let writer: Box<dyn std::io::Write> = if let Some(ref path) = output {
Box::new(std::fs::File::create(path)?)
} else {
Box::new(std::io::stdout())
};
let mut buf = std::io::BufWriter::new(writer);
for entry in &entries {
serde_json::to_writer(&mut buf, entry)?;
std::io::Write::write_all(&mut buf, b"\n")?;
}
std::io::Write::flush(&mut buf)?;
if output.is_some() {
eprintln!("{BOLD}ShellSafetyBench Export Summary{RESET}");
eprintln!(" Total: {}", summary.total);
eprintln!(
" Safe: {} ({:.1}%)",
summary.safe,
100.0 * summary.safe as f64 / summary.total.max(1) as f64
);
eprintln!(
" Unsafe: {} ({:.1}%)",
summary.unsafe_count,
100.0 * summary.unsafe_count as f64 / summary.total.max(1) as f64
);
eprintln!(" By lang:");
for (lang, count) in &summary.by_lang {
eprintln!(" {lang}: {count}");
}
eprintln!(" Unique CWEs: {}", summary.by_cwe.len());
if let Some(path) = &output {
eprintln!(" Written to: {}", path.display());
}
}
Ok(())
}
pub(crate) fn corpus_domain_categories() -> Result<()> {
use crate::cli::color::*;
use crate::corpus::domain_categories;
use crate::corpus::registry::CorpusRegistry;
use crate::corpus::runner::CorpusRunner;
let registry = CorpusRegistry::load_full();
let runner = CorpusRunner::new(Config::default());
let score = runner.run(®istry);
let stats = domain_categories::categorize_corpus(®istry, &score.results);
println!("{BOLD}Domain-Specific Corpus Categories (\u{00a7}11.11){RESET}");
println!();
let report = domain_categories::format_categories_report(&stats);
for line in report.lines().skip(2) {
let colored = line
.replace("COMPLETE", &format!("{GREEN}COMPLETE{RESET}"))
.replace("EMPTY", &format!("{RED}EMPTY{RESET}"))
.replace("SPARSE", &format!("{YELLOW}SPARSE{RESET}"))
.replace("PARTIAL", &format!("{CYAN}PARTIAL{RESET}"));
println!(" {colored}");
}
Ok(())
}
pub(crate) fn corpus_domain_coverage() -> Result<()> {
use crate::cli::color::*;
use crate::corpus::domain_categories;
use crate::corpus::registry::CorpusRegistry;
use crate::corpus::runner::CorpusRunner;
let registry = CorpusRegistry::load_full();
let runner = CorpusRunner::new(Config::default());
let score = runner.run(®istry);
let stats = domain_categories::categorize_corpus(®istry, &score.results);
println!("{BOLD}Domain Coverage Analysis (\u{00a7}11.11){RESET}");
println!();
let report = domain_categories::format_domain_coverage(&stats, &score);
for line in report.lines().skip(2) {
let colored = line
.replace("COMPLETE", &format!("{GREEN}COMPLETE{RESET}"))
.replace("EMPTY", &format!("{RED}EMPTY{RESET}"))
.replace("SPARSE", &format!("{YELLOW}SPARSE{RESET}"))
.replace("PARTIAL", &format!("{CYAN}PARTIAL{RESET}"))
.replace("Coverage Gaps:", &format!("{YELLOW}Coverage Gaps:{RESET}"));
println!(" {colored}");
}
Ok(())
}
pub(crate) fn corpus_domain_matrix() -> Result<()> {
use crate::cli::color::*;
use crate::corpus::domain_categories;
use crate::corpus::registry::CorpusRegistry;
use crate::corpus::runner::CorpusRunner;
let registry = CorpusRegistry::load_full();
let runner = CorpusRunner::new(Config::default());
let score = runner.run(®istry);
let stats = domain_categories::categorize_corpus(®istry, &score.results);
println!("{BOLD}Cross-Category Quality Matrix (\u{00a7}11.11.9){RESET}");
println!();
let report = domain_categories::format_quality_matrix(&stats);
for line in report.lines().skip(2) {
let colored = line
.replace("REQ", &format!("{GREEN}REQ{RESET}"))
.replace("N/A", &format!("{DIM}N/A{RESET}"));
println!(" {colored}");
}
Ok(())
}
pub(crate) fn corpus_tier_weights() -> Result<()> {
use crate::cli::color::*;
use crate::corpus::registry::CorpusRegistry;
use crate::corpus::runner::CorpusRunner;
use crate::corpus::tier_analysis;
let registry = CorpusRegistry::load_full();
let runner = CorpusRunner::new(Config::default());
let score = runner.run(®istry);
let analysis = tier_analysis::analyze_tiers(®istry, &score);
println!("{BOLD}Tier-Weighted Corpus Scoring (\u{00a7}4.3){RESET}");
println!();
let report = tier_analysis::format_tier_weights(&analysis);
for line in report.lines().skip(2) {
let colored = line
.replace("100.0%", &format!("{GREEN}100.0%{RESET}"))
.replace("Weighted Score:", &format!("{BOLD}Weighted Score:{RESET}"))
.replace("Weight Effect:", &format!("{BOLD}Weight Effect:{RESET}"));
println!(" {colored}");
}
Ok(())
}
include!("corpus_config_commands_corpus_3.rs");