use crate::corpus::registry::{CorpusEntry, CorpusFormat, CorpusRegistry};
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GrammarCategory {
MissingQuoting,
Bashism,
TabSpaceConfusion,
ShellFormCmd,
UndefinedVariable,
InvalidArithmetic,
MissingFrom,
CircularDependency,
}
impl GrammarCategory {
pub fn code(&self) -> &'static str {
match self {
Self::MissingQuoting => "GRAM-001",
Self::Bashism => "GRAM-002",
Self::TabSpaceConfusion => "GRAM-003",
Self::ShellFormCmd => "GRAM-004",
Self::UndefinedVariable => "GRAM-005",
Self::InvalidArithmetic => "GRAM-006",
Self::MissingFrom => "GRAM-007",
Self::CircularDependency => "GRAM-008",
}
}
pub fn description(&self) -> &'static str {
match self {
Self::MissingQuoting => "Missing quoting in expansion",
Self::Bashism => "Bashism in POSIX output",
Self::TabSpaceConfusion => "Tab/space confusion in Makefile recipe",
Self::ShellFormCmd => "Shell form in Dockerfile CMD/ENTRYPOINT",
Self::UndefinedVariable => "Undefined variable reference",
Self::InvalidArithmetic => "Invalid POSIX arithmetic",
Self::MissingFrom => "Missing FROM in Dockerfile",
Self::CircularDependency => "Circular Make dependency",
}
}
pub fn fix_pattern(&self) -> &'static str {
match self {
Self::MissingQuoting => "Add double quotes around ${}",
Self::Bashism => "Replace [[ ]] with [ ]",
Self::TabSpaceConfusion => "Ensure recipe lines use \\t",
Self::ShellFormCmd => "Convert to exec form [\"cmd\", \"arg\"]",
Self::UndefinedVariable => "Add := assignment before use",
Self::InvalidArithmetic => "Replace (( )) with $(( ))",
Self::MissingFrom => "Add FROM as first instruction",
Self::CircularDependency => "Reorder targets to break cycle",
}
}
pub fn applicable_format(&self) -> CorpusFormat {
match self {
Self::MissingQuoting | Self::Bashism | Self::InvalidArithmetic => CorpusFormat::Bash,
Self::TabSpaceConfusion | Self::UndefinedVariable | Self::CircularDependency => {
CorpusFormat::Makefile
}
Self::ShellFormCmd | Self::MissingFrom => CorpusFormat::Dockerfile,
}
}
pub fn all() -> &'static [GrammarCategory] {
&[
Self::MissingQuoting,
Self::Bashism,
Self::TabSpaceConfusion,
Self::ShellFormCmd,
Self::UndefinedVariable,
Self::InvalidArithmetic,
Self::MissingFrom,
Self::CircularDependency,
]
}
}
impl fmt::Display for GrammarCategory {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.code())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ValidationLayer {
Lexical,
Syntactic,
Semantic,
Behavioral,
}
impl fmt::Display for ValidationLayer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Lexical => write!(f, "L1:Lexical"),
Self::Syntactic => write!(f, "L2:Syntactic"),
Self::Semantic => write!(f, "L3:Semantic"),
Self::Behavioral => write!(f, "L4:Behavioral"),
}
}
}
#[derive(Debug, Clone)]
pub struct GrammarViolation {
pub category: GrammarCategory,
pub layer: ValidationLayer,
pub entry_id: String,
pub line: usize,
pub message: String,
}
#[derive(Debug, Clone)]
pub struct SchemaResult {
pub entry_id: String,
pub format: CorpusFormat,
pub valid: bool,
pub violations: Vec<GrammarViolation>,
pub layers_passed: Vec<ValidationLayer>,
}
#[derive(Debug, Clone)]
pub struct SchemaReport {
pub results: Vec<SchemaResult>,
pub total_entries: usize,
pub valid_entries: usize,
pub total_violations: usize,
pub violations_by_category: Vec<(GrammarCategory, usize)>,
}
impl SchemaReport {
pub fn pass_rate(&self) -> f64 {
if self.total_entries == 0 {
return 0.0;
}
(self.valid_entries as f64 / self.total_entries as f64) * 100.0
}
}
pub fn validate_entry(entry: &CorpusEntry) -> SchemaResult {
let mut violations = Vec::new();
let mut layers_passed = Vec::new();
match entry.format {
CorpusFormat::Bash => validate_bash_entry(entry, &mut violations, &mut layers_passed),
CorpusFormat::Makefile => {
validate_makefile_entry(entry, &mut violations, &mut layers_passed);
}
CorpusFormat::Dockerfile => {
validate_dockerfile_entry(entry, &mut violations, &mut layers_passed);
}
}
SchemaResult {
entry_id: entry.id.clone(),
format: entry.format,
valid: violations.is_empty(),
violations,
layers_passed,
}
}
fn validate_bash_entry(
entry: &CorpusEntry,
violations: &mut Vec<GrammarViolation>,
layers_passed: &mut Vec<ValidationLayer>,
) {
let output = &entry.expected_output;
let l1_pass = !output.is_empty();
if l1_pass {
layers_passed.push(ValidationLayer::Lexical);
}
for (i, line) in output.lines().enumerate() {
let trimmed = line.trim();
if trimmed.contains("[[") && trimmed.contains("]]") {
violations.push(GrammarViolation {
category: GrammarCategory::Bashism,
layer: ValidationLayer::Syntactic,
entry_id: entry.id.clone(),
line: i + 1,
message: "Double bracket [[ ]] is a bashism; use [ ] for POSIX".into(),
});
}
if trimmed.contains("(( ") && !trimmed.contains("$((") {
violations.push(GrammarViolation {
category: GrammarCategory::InvalidArithmetic,
layer: ValidationLayer::Syntactic,
entry_id: entry.id.clone(),
line: i + 1,
message: "(( )) is bash-specific; use $(( )) for POSIX arithmetic".into(),
});
}
}
for (i, line) in output.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with('#') || trimmed.starts_with("#!/") {
continue;
}
if check_unquoted_expansion(trimmed) {
violations.push(GrammarViolation {
category: GrammarCategory::MissingQuoting,
layer: ValidationLayer::Semantic,
entry_id: entry.id.clone(),
line: i + 1,
message: "Unquoted variable expansion; wrap in double quotes".into(),
});
}
}
if violations
.iter()
.all(|v| v.layer != ValidationLayer::Syntactic)
{
layers_passed.push(ValidationLayer::Syntactic);
}
if violations
.iter()
.all(|v| v.layer != ValidationLayer::Semantic)
{
layers_passed.push(ValidationLayer::Semantic);
}
}
fn is_shell_assignment(line: &str) -> bool {
line.find('=').is_some_and(|eq_pos| {
line[..eq_pos]
.chars()
.all(|c| c.is_alphanumeric() || c == '_')
})
}
include!("schema_enforcement_is_unquoted.rs");