use std::collections::{HashMap, HashSet};
use std::path::Path;
use serde::Deserialize;
use crate::parser::ParseError;
const MAX_CATEGORIES: usize = 10;
const MAX_TAGS_PER_CATEGORY: usize = 30;
const MAX_TOTAL_TAGS: usize = 200;
const MAX_SLUG_LEN: usize = 30;
const MAX_LABEL_LEN: usize = 60;
#[derive(Deserialize)]
struct TagRegistryFile {
categories: Vec<CategoryEntry>,
}
#[derive(Deserialize)]
struct CategoryEntry {
slug: String,
label: String,
tags: Vec<TagEntry>,
}
#[derive(Deserialize)]
struct TagEntry {
slug: String,
label: String,
}
#[derive(Debug, Clone)]
pub struct Tag {
pub slug: String,
pub label: String,
pub category_slug: String,
pub category_label: String,
}
#[derive(Debug)]
pub struct TagRegistry {
tags: Vec<Tag>,
slug_index: HashMap<String, usize>,
category_slugs: Vec<String>,
}
impl TagRegistry {
pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
let path = root.join("tags.yaml");
if !path.exists() {
return Ok(Self::empty());
}
let content = std::fs::read_to_string(&path).map_err(|e| {
vec![ParseError {
line: 0,
message: format!("{}: error reading file: {e}", path.display()),
}]
})?;
Self::parse(&content)
}
pub fn parse(content: &str) -> Result<Self, Vec<ParseError>> {
let file: TagRegistryFile = serde_yaml::from_str(content).map_err(|e| {
vec![ParseError {
line: 0,
message: format!("tags.yaml: invalid YAML: {e}"),
}]
})?;
let mut errors = Vec::new();
if file.categories.len() > MAX_CATEGORIES {
errors.push(ParseError {
line: 0,
message: format!(
"tags.yaml: too many categories ({}, max {MAX_CATEGORIES})",
file.categories.len()
),
});
}
let mut tags = Vec::new();
let mut slug_index = HashMap::new();
let mut category_slugs = Vec::new();
let mut seen_category_slugs: HashSet<String> = HashSet::new();
let mut seen_tag_slugs: HashSet<String> = HashSet::new();
for cat in &file.categories {
validate_slug(&cat.slug, "category", &mut errors);
validate_label(&cat.label, "category", &mut errors);
if !seen_category_slugs.insert(cat.slug.clone()) {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: duplicate category slug {:?}", cat.slug),
});
}
if cat.tags.len() > MAX_TAGS_PER_CATEGORY {
errors.push(ParseError {
line: 0,
message: format!(
"tags.yaml: category {:?} has too many tags ({}, max {MAX_TAGS_PER_CATEGORY})",
cat.slug,
cat.tags.len()
),
});
}
category_slugs.push(cat.slug.clone());
for tag in &cat.tags {
validate_slug(&tag.slug, "tag", &mut errors);
validate_label(&tag.label, "tag", &mut errors);
if !seen_tag_slugs.insert(tag.slug.clone()) {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: duplicate tag slug {:?}", tag.slug),
});
}
let idx = tags.len();
slug_index.insert(tag.slug.clone(), idx);
tags.push(Tag {
slug: tag.slug.clone(),
label: tag.label.clone(),
category_slug: cat.slug.clone(),
category_label: cat.label.clone(),
});
}
}
if tags.len() > MAX_TOTAL_TAGS {
errors.push(ParseError {
line: 0,
message: format!(
"tags.yaml: too many total tags ({}, max {MAX_TOTAL_TAGS})",
tags.len()
),
});
}
if !errors.is_empty() {
return Err(errors);
}
Ok(Self {
tags,
slug_index,
category_slugs,
})
}
pub fn empty() -> Self {
Self {
tags: Vec::new(),
slug_index: HashMap::new(),
category_slugs: Vec::new(),
}
}
pub fn get(&self, slug: &str) -> Option<&Tag> {
self.slug_index.get(slug).map(|&idx| &self.tags[idx])
}
pub fn contains(&self, slug: &str) -> bool {
self.slug_index.contains_key(slug)
}
pub fn len(&self) -> usize {
self.tags.len()
}
pub fn is_empty(&self) -> bool {
self.tags.is_empty()
}
pub fn tags(&self) -> &[Tag] {
&self.tags
}
pub fn category_slugs(&self) -> &[String] {
&self.category_slugs
}
pub fn validate_tags(&self, tags: &[String], line: usize) -> Vec<ParseError> {
let mut errors = Vec::new();
if self.is_empty() {
return errors;
}
for tag in tags {
if !self.contains(tag) {
errors.push(ParseError {
line,
message: format!("unknown tag {tag:?} (not in tags.yaml registry)"),
});
}
}
errors
}
}
fn validate_slug(slug: &str, kind: &str, errors: &mut Vec<ParseError>) {
if slug.len() > MAX_SLUG_LEN {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: {kind} slug {slug:?} exceeds {MAX_SLUG_LEN} chars"),
});
}
if slug.is_empty() {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: {kind} slug must not be empty"),
});
}
if !is_kebab_case(slug) {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: {kind} slug {slug:?} must be kebab-case"),
});
}
}
fn validate_label(label: &str, kind: &str, errors: &mut Vec<ParseError>) {
if label.len() > MAX_LABEL_LEN {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: {kind} label {label:?} exceeds {MAX_LABEL_LEN} chars"),
});
}
if label.is_empty() {
errors.push(ParseError {
line: 0,
message: format!("tags.yaml: {kind} label must not be empty"),
});
}
}
fn is_kebab_case(s: &str) -> bool {
!s.is_empty()
&& !s.starts_with('-')
&& !s.ends_with('-')
&& !s.contains("--")
&& s.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
}
#[cfg(test)]
mod tests {
use super::*;
const VALID_YAML: &str = r#"
categories:
- slug: crime-type
label: Crime Type
tags:
- slug: bribery
label: Bribery
- slug: fraud
label: Fraud
- slug: sector
label: Sector
tags:
- slug: government
label: Government
"#;
#[test]
fn parse_valid_registry() {
let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
assert_eq!(reg.len(), 3);
assert_eq!(reg.category_slugs().len(), 2);
assert!(reg.contains("bribery"));
assert!(reg.contains("fraud"));
assert!(reg.contains("government"));
assert!(!reg.contains("unknown"));
let tag = reg.get("bribery").expect("should exist");
assert_eq!(tag.label, "Bribery");
assert_eq!(tag.category_slug, "crime-type");
}
#[test]
fn validate_tags_known() {
let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
let errors = reg.validate_tags(&["bribery".to_string(), "government".to_string()], 5);
assert!(errors.is_empty());
}
#[test]
fn validate_tags_unknown() {
let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
let errors = reg.validate_tags(&["bribery".to_string(), "unknown".to_string()], 5);
assert_eq!(errors.len(), 1);
assert!(errors[0].message.contains("unknown"));
}
#[test]
fn validate_tags_empty_registry() {
let reg = TagRegistry::empty();
let errors = reg.validate_tags(&["anything".to_string()], 5);
assert!(errors.is_empty(), "empty registry should skip validation");
}
#[test]
fn duplicate_tag_slug_error() {
let yaml = r#"
categories:
- slug: a
label: A
tags:
- slug: dupe
label: First
- slug: dupe
label: Second
"#;
let errors = TagRegistry::parse(yaml).expect_err("should fail");
assert!(errors.iter().any(|e| e.message.contains("duplicate tag")));
}
#[test]
fn duplicate_category_slug_error() {
let yaml = r#"
categories:
- slug: same
label: First
tags:
- slug: a
label: A
- slug: same
label: Second
tags:
- slug: b
label: B
"#;
let errors = TagRegistry::parse(yaml).expect_err("should fail");
assert!(
errors
.iter()
.any(|e| e.message.contains("duplicate category"))
);
}
#[test]
fn empty_registry() {
let reg = TagRegistry::empty();
assert!(reg.is_empty());
assert_eq!(reg.len(), 0);
assert!(!reg.contains("anything"));
}
}