use crate::meta::Section;
use anyhow::{Result, bail};
use regex::Regex;
use std::collections::{BTreeMap, BTreeSet, HashSet};
pub const KNOWN_TAGS: &[&str] = &[
"regex",
"regex-code",
"sets",
"branch-reset",
"substitution",
"transliteration",
"qw",
"quote",
"quote-like",
"qq",
"qr",
"qx",
"q",
"heredoc",
"heredoc-indented",
"heredoc-backtick",
"scalar",
"array",
"hash",
"reference",
"glob",
"typeglob",
"my",
"our",
"local",
"state",
"package",
"variable",
"declaration",
"if",
"unless",
"while",
"until",
"for",
"foreach",
"loop",
"given",
"when",
"flow",
"labels",
"continue",
"next",
"last",
"redo",
"goto",
"flipflop",
"range",
"ternary",
"sub",
"subroutine",
"function",
"method",
"attribute",
"signature",
"prototype",
"anonymous",
"closure",
"return",
"wantarray",
"builtin",
"math",
"string",
"list",
"file",
"filetest",
"io",
"pack",
"unpack",
"split",
"join",
"tr",
"sort",
"map",
"grep",
"print",
"say",
"printf",
"sprintf",
"format",
"operator",
"arithmetic",
"comparison",
"logical",
"bitwise",
"assignment",
"lvalue",
"dereference",
"arrow",
"smartmatch",
"binding",
"range-operator",
"use",
"require",
"no",
"import",
"pragma",
"strict",
"warnings",
"feature",
"experimental",
"version",
"vstring",
"constant",
"bytes",
"utf8",
"encoding",
"charnames",
"unicode",
"mro",
"class",
"field",
"method",
"bless",
"isa",
"can",
"does",
"inheritance",
"overload",
"constructor",
"destructor",
"autoload",
"special-var",
"magic",
"punctuation-var",
"english",
"try",
"catch",
"finally",
"defer",
"async",
"await",
"signatures",
"postfix",
"defined-or",
"open",
"close",
"pipe",
"socket",
"perlio",
"layers",
"system",
"exec",
"fork",
"wait",
"signal",
"alarm",
"tie",
"tied",
"untie",
"pod",
"comment",
"annotation",
"pragma",
"shebang",
"legacy",
"do",
"eval",
"block",
"expression",
"statement",
"context",
"list-context",
"scalar-context",
"void-context",
"interpolation",
"escape",
"delimiter",
"end-section",
"error",
"diagnostic",
"source-filter",
"inline",
"xs",
"ffi",
"test",
"debug",
"assertion",
"invariant",
"edge-case",
"ambiguous",
"lexer-sensitive",
"parser-sensitive",
"error-recovery",
"incomplete",
"invalid",
];
pub const KNOWN_FLAGS: &[&str] = &[
"lexer-sensitive",
"parser-sensitive",
"ambiguous",
"error-node-expected",
"experimental",
"version-gated",
"slow",
"incomplete",
"todo",
];
pub struct LintConfig {
pub max_sections_per_file: usize,
pub check_unknown_tags: bool,
pub check_unknown_flags: bool,
pub require_perl_version: bool,
}
impl Default for LintConfig {
fn default() -> Self {
Self {
max_sections_per_file: 12,
check_unknown_tags: true,
check_unknown_flags: true,
require_perl_version: false,
}
}
}
pub struct LintResult {
pub errors: Vec<String>,
pub warnings: Vec<String>,
}
impl LintResult {
pub fn is_ok(&self) -> bool {
self.errors.is_empty()
}
}
pub fn lint(sections: &[Section]) -> Result<()> {
lint_with_config(sections, &LintConfig::default())
}
pub fn lint_with_config(sections: &[Section], config: &LintConfig) -> Result<()> {
let result = check_sections(sections, config);
for warning in &result.warnings {
tracing::warn!("{}", warning);
}
for error in &result.errors {
tracing::error!("{}", error);
}
if !result.is_ok() {
bail!("Linting failed with {} errors", result.errors.len());
}
Ok(())
}
pub fn check_sections(sections: &[Section], config: &LintConfig) -> LintResult {
let mut result = LintResult { errors: Vec::new(), warnings: Vec::new() };
static ID_RE: once_cell::sync::Lazy<Option<Regex>> =
once_cell::sync::Lazy::new(|| Regex::new(r"^[a-z0-9._-]+$").ok());
let mut seen_ids = BTreeSet::new();
let mut per_file: BTreeMap<&str, usize> = BTreeMap::new();
let known_tags: HashSet<&str> = KNOWN_TAGS.iter().copied().collect();
let known_flags: HashSet<&str> = KNOWN_FLAGS.iter().copied().collect();
for section in sections {
if section.id.is_empty() {
result.errors.push(format!("Missing @id in {}: {}", section.file, section.title));
} else if !ID_RE.as_ref().is_some_and(|re| re.is_match(§ion.id)) {
result.errors.push(format!(
"Invalid @id format '{}' in {}: {} (must match [a-z0-9._-]+)",
section.id, section.file, section.title
));
}
if !section.id.is_empty() && !seen_ids.insert(§ion.id) {
result.errors.push(format!("Duplicate @id '{}' in {}", section.id, section.file));
}
*per_file.entry(§ion.file).or_default() += 1;
if config.check_unknown_tags {
for tag in §ion.tags {
if !known_tags.contains(tag.as_str()) {
result
.warnings
.push(format!("Unknown tag '{}' in {}: {}", tag, section.file, section.id));
}
}
}
if config.check_unknown_flags {
for flag in §ion.flags {
if !known_flags.contains(flag.as_str()) {
result.warnings.push(format!(
"Unknown flag '{}' in {}: {}",
flag, section.file, section.id
));
}
}
}
if config.require_perl_version && section.perl.is_none() {
result
.warnings
.push(format!("Missing @perl version in {}: {}", section.file, section.id));
}
if section.body.trim().is_empty() {
result.warnings.push(format!("Empty body in {}: {}", section.file, section.id));
}
}
for (file, count) in per_file {
if count > config.max_sections_per_file {
result.warnings.push(format!(
"File {} has {} sections (exceeds limit of {})",
file, count, config.max_sections_per_file
));
}
}
result
}