use crate::files::{CorpusPaths, get_test_files_from};
use crate::lint::KNOWN_TAGS;
use crate::meta::Section;
use crate::parse_file;
use anyhow::Result;
use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct CorpusInventory {
pub schema_version: u32,
pub files: usize,
pub sections: usize,
pub cases: usize,
pub ids: InventoryIds,
pub tags: InventoryTags,
pub flags: BTreeMap<String, usize>,
pub markers: InventoryMarkers,
pub generators: Vec<String>,
pub concept_mapping_available: bool,
pub expectations_available: bool,
pub fixtures_without_concepts: Vec<String>,
pub fixtures_without_expectations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct InventoryIds {
pub total: usize,
pub missing: usize,
pub duplicates: Vec<String>,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct InventoryTags {
pub known: Vec<String>,
pub unknown: Vec<String>,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct InventoryMarkers {
pub expected_error: usize,
pub wip: usize,
pub parser_sensitive: usize,
}
pub fn build_inventory() -> Result<CorpusInventory> {
build_inventory_from_paths(&CorpusPaths::discover())
}
pub fn build_inventory_from_paths(paths: &CorpusPaths) -> Result<CorpusInventory> {
let files = get_test_files_from(paths);
let mut sections = Vec::new();
for file in &files {
sections.extend(parse_file(file)?);
}
let mut inventory = inventory_from_sections(files.len(), §ions);
let gold_root = paths.root.join("test_corpus/gold");
populate_fixture_coverage(&gold_root, &mut inventory)?;
Ok(inventory)
}
pub fn inventory_from_sections(file_count: usize, sections: &[Section]) -> CorpusInventory {
let mut id_counts: BTreeMap<&str, usize> = BTreeMap::new();
let mut missing_ids = 0usize;
let mut known_tags = BTreeSet::new();
let mut unknown_tags = BTreeSet::new();
let known_tag_set: BTreeSet<&str> = KNOWN_TAGS.iter().copied().collect();
let mut flags = BTreeMap::new();
let mut markers = InventoryMarkers { expected_error: 0, wip: 0, parser_sensitive: 0 };
for section in sections {
if section.id.trim().is_empty() {
missing_ids += 1;
} else {
*id_counts.entry(section.id.as_str()).or_default() += 1;
}
for tag in §ion.tags {
if known_tag_set.contains(tag.as_str()) {
known_tags.insert(tag.clone());
} else {
unknown_tags.insert(tag.clone());
}
}
for flag in §ion.flags {
*flags.entry(flag.clone()).or_default() += 1;
}
if section.has_flag("expected-error") {
markers.expected_error += 1;
}
if section.has_flag("wip") || section.has_flag("todo") {
markers.wip += 1;
}
if section.has_flag("parser-sensitive") {
markers.parser_sensitive += 1;
}
}
let duplicates = id_counts
.into_iter()
.filter_map(|(id, count)| (count > 1).then_some(id.to_string()))
.collect::<Vec<_>>();
CorpusInventory {
schema_version: 1,
files: file_count,
sections: sections.len(),
cases: sections.len(),
ids: InventoryIds {
total: sections.len().saturating_sub(missing_ids),
missing: missing_ids,
duplicates,
},
tags: InventoryTags {
known: known_tags.into_iter().collect(),
unknown: unknown_tags.into_iter().collect(),
},
flags,
markers,
generators: generator_families(),
concept_mapping_available: false,
expectations_available: false,
fixtures_without_concepts: Vec::new(),
fixtures_without_expectations: Vec::new(),
}
}
pub fn generator_families() -> Vec<String> {
vec![
"ambiguity",
"builtins",
"control_flow",
"declarations",
"expressions",
"filetest",
"format_statements",
"glob",
"heredoc",
"io",
"list_ops",
"object_oriented",
"phasers",
"program",
"quote_like",
"qw",
"regex",
"sigils",
"special_vars",
"tie",
"whitespace",
]
.into_iter()
.map(ToString::to_string)
.collect()
}
fn populate_fixture_coverage(gold_root: &Path, inventory: &mut CorpusInventory) -> Result<()> {
if !gold_root.exists() {
return Ok(());
}
let mut fixtures = Vec::new();
for entry in fs::read_dir(gold_root)? {
let entry = entry?;
let path = entry.path();
if !path.is_dir() || !path.join("fixture.pl").exists() {
continue;
}
if let Some(name) = path.file_name().map(|f| f.to_string_lossy().to_string()) {
fixtures.push((name, path));
}
}
fixtures.sort_by(|a, b| a.0.cmp(&b.0));
let mut has_expectation_file = false;
let mut has_concept_file = false;
let concept_file_names = ["expected_concepts.json", "concepts.json", "concepts.toml"];
let mut fixtures_without_expectations = Vec::new();
let mut fixtures_without_concepts = Vec::new();
for (name, path) in fixtures {
let has_expected = path.join("expected.json").exists();
has_expectation_file |= has_expected;
if !has_expected {
fixtures_without_expectations.push(name.clone());
}
let has_concepts = concept_file_names.iter().any(|file| path.join(file).exists());
has_concept_file |= has_concepts;
if !has_concepts {
fixtures_without_concepts.push(name);
}
}
inventory.expectations_available = has_expectation_file;
if has_expectation_file {
inventory.fixtures_without_expectations = fixtures_without_expectations;
}
inventory.concept_mapping_available = has_concept_file;
if has_concept_file {
inventory.fixtures_without_concepts = fixtures_without_concepts;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_section(id: &str, tags: &[&str], flags: &[&str]) -> Section {
Section {
id: id.to_string(),
title: "title".to_string(),
file: "sample.txt".to_string(),
tags: tags.iter().map(|tag| (*tag).to_string()).collect(),
perl: None,
flags: flags.iter().map(|flag| (*flag).to_string()).collect(),
body: "my $x = 1;".to_string(),
line: Some(1),
}
}
#[test]
fn inventory_reports_missing_and_duplicate_ids() {
let sections = vec![
sample_section("case.1", &["regex"], &[]),
sample_section("case.1", &["regex", "custom-tag"], &["parser-sensitive"]),
sample_section("", &["custom-tag"], &["wip"]),
];
let inventory = inventory_from_sections(2, §ions);
assert_eq!(inventory.schema_version, 1);
assert_eq!(inventory.files, 2);
assert_eq!(inventory.sections, 3);
assert_eq!(inventory.ids.total, 2);
assert_eq!(inventory.ids.missing, 1);
assert_eq!(inventory.ids.duplicates, vec!["case.1".to_string()]);
assert_eq!(inventory.tags.known, vec!["regex".to_string()]);
assert_eq!(inventory.tags.unknown, vec!["custom-tag".to_string()]);
assert_eq!(inventory.markers.parser_sensitive, 1);
assert_eq!(inventory.markers.wip, 1);
}
#[test]
fn inventory_is_deterministic() {
let sections = vec![
sample_section("z.case", &["z-unknown", "regex"], &["todo"]),
sample_section("a.case", &["regex", "a-unknown"], &["parser-sensitive"]),
];
let first = inventory_from_sections(1, §ions);
let second = inventory_from_sections(1, §ions);
assert_eq!(first, second);
assert_eq!(first.tags.unknown, vec!["a-unknown".to_string(), "z-unknown".to_string()]);
assert_eq!(first.ids.duplicates, Vec::<String>::new());
}
}