1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use serde::Deserialize;
5
6use crate::parser::ParseError;
7
8const MAX_CATEGORIES: usize = 10;
10
11const MAX_TAGS_PER_CATEGORY: usize = 30;
13
14const MAX_TOTAL_TAGS: usize = 200;
16
17const MAX_SLUG_LEN: usize = 30;
19
20const MAX_LABEL_LEN: usize = 60;
22
23#[derive(Deserialize)]
25struct TagRegistryFile {
26 categories: Vec<CategoryEntry>,
27}
28
29#[derive(Deserialize)]
31struct CategoryEntry {
32 slug: String,
33 label: String,
34 tags: Vec<TagEntry>,
35}
36
37#[derive(Deserialize)]
39struct TagEntry {
40 slug: String,
41 label: String,
42}
43
44#[derive(Debug, Clone)]
46pub struct Tag {
47 pub slug: String,
48 pub label: String,
49 pub category_slug: String,
50 pub category_label: String,
51}
52
53#[derive(Debug)]
56pub struct TagRegistry {
57 tags: Vec<Tag>,
58 slug_index: HashMap<String, usize>,
60 category_slugs: Vec<String>,
62}
63
64impl TagRegistry {
65 pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
68 let path = root.join("tags.yaml");
69 if !path.exists() {
70 return Ok(Self::empty());
71 }
72
73 let content = std::fs::read_to_string(&path).map_err(|e| {
74 vec![ParseError {
75 line: 0,
76 message: format!("{}: error reading file: {e}", path.display()),
77 }]
78 })?;
79
80 Self::parse(&content)
81 }
82
83 pub fn parse(content: &str) -> Result<Self, Vec<ParseError>> {
85 let file: TagRegistryFile = serde_yaml::from_str(content).map_err(|e| {
86 vec![ParseError {
87 line: 0,
88 message: format!("tags.yaml: invalid YAML: {e}"),
89 }]
90 })?;
91
92 let mut errors = Vec::new();
93
94 if file.categories.len() > MAX_CATEGORIES {
95 errors.push(ParseError {
96 line: 0,
97 message: format!(
98 "tags.yaml: too many categories ({}, max {MAX_CATEGORIES})",
99 file.categories.len()
100 ),
101 });
102 }
103
104 let mut tags = Vec::new();
105 let mut slug_index = HashMap::new();
106 let mut category_slugs = Vec::new();
107 let mut seen_category_slugs: HashSet<String> = HashSet::new();
108 let mut seen_tag_slugs: HashSet<String> = HashSet::new();
109
110 for cat in &file.categories {
111 validate_slug(&cat.slug, "category", &mut errors);
112 validate_label(&cat.label, "category", &mut errors);
113
114 if !seen_category_slugs.insert(cat.slug.clone()) {
115 errors.push(ParseError {
116 line: 0,
117 message: format!("tags.yaml: duplicate category slug {:?}", cat.slug),
118 });
119 }
120
121 if cat.tags.len() > MAX_TAGS_PER_CATEGORY {
122 errors.push(ParseError {
123 line: 0,
124 message: format!(
125 "tags.yaml: category {:?} has too many tags ({}, max {MAX_TAGS_PER_CATEGORY})",
126 cat.slug,
127 cat.tags.len()
128 ),
129 });
130 }
131
132 category_slugs.push(cat.slug.clone());
133
134 for tag in &cat.tags {
135 validate_slug(&tag.slug, "tag", &mut errors);
136 validate_label(&tag.label, "tag", &mut errors);
137
138 if !seen_tag_slugs.insert(tag.slug.clone()) {
139 errors.push(ParseError {
140 line: 0,
141 message: format!("tags.yaml: duplicate tag slug {:?}", tag.slug),
142 });
143 }
144
145 let idx = tags.len();
146 slug_index.insert(tag.slug.clone(), idx);
147 tags.push(Tag {
148 slug: tag.slug.clone(),
149 label: tag.label.clone(),
150 category_slug: cat.slug.clone(),
151 category_label: cat.label.clone(),
152 });
153 }
154 }
155
156 if tags.len() > MAX_TOTAL_TAGS {
157 errors.push(ParseError {
158 line: 0,
159 message: format!(
160 "tags.yaml: too many total tags ({}, max {MAX_TOTAL_TAGS})",
161 tags.len()
162 ),
163 });
164 }
165
166 if !errors.is_empty() {
167 return Err(errors);
168 }
169
170 Ok(Self {
171 tags,
172 slug_index,
173 category_slugs,
174 })
175 }
176
177 pub fn empty() -> Self {
179 Self {
180 tags: Vec::new(),
181 slug_index: HashMap::new(),
182 category_slugs: Vec::new(),
183 }
184 }
185
186 pub fn get(&self, slug: &str) -> Option<&Tag> {
188 self.slug_index.get(slug).map(|&idx| &self.tags[idx])
189 }
190
191 pub fn contains(&self, slug: &str) -> bool {
193 self.slug_index.contains_key(slug)
194 }
195
196 pub fn len(&self) -> usize {
198 self.tags.len()
199 }
200
201 pub fn is_empty(&self) -> bool {
203 self.tags.is_empty()
204 }
205
206 pub fn tags(&self) -> &[Tag] {
208 &self.tags
209 }
210
211 pub fn category_slugs(&self) -> &[String] {
213 &self.category_slugs
214 }
215
216 pub fn validate_tags(&self, tags: &[String], line: usize) -> Vec<ParseError> {
219 let mut errors = Vec::new();
220
221 if self.is_empty() {
222 return errors;
224 }
225
226 for tag in tags {
227 if !self.contains(tag) {
228 errors.push(ParseError {
229 line,
230 message: format!("unknown tag {tag:?} (not in tags.yaml registry)"),
231 });
232 }
233 }
234
235 errors
236 }
237}
238
239fn validate_slug(slug: &str, kind: &str, errors: &mut Vec<ParseError>) {
240 if slug.len() > MAX_SLUG_LEN {
241 errors.push(ParseError {
242 line: 0,
243 message: format!("tags.yaml: {kind} slug {slug:?} exceeds {MAX_SLUG_LEN} chars"),
244 });
245 }
246 if slug.is_empty() {
247 errors.push(ParseError {
248 line: 0,
249 message: format!("tags.yaml: {kind} slug must not be empty"),
250 });
251 }
252 if !is_kebab_case(slug) {
253 errors.push(ParseError {
254 line: 0,
255 message: format!("tags.yaml: {kind} slug {slug:?} must be kebab-case"),
256 });
257 }
258}
259
260fn validate_label(label: &str, kind: &str, errors: &mut Vec<ParseError>) {
261 if label.len() > MAX_LABEL_LEN {
262 errors.push(ParseError {
263 line: 0,
264 message: format!("tags.yaml: {kind} label {label:?} exceeds {MAX_LABEL_LEN} chars"),
265 });
266 }
267 if label.is_empty() {
268 errors.push(ParseError {
269 line: 0,
270 message: format!("tags.yaml: {kind} label must not be empty"),
271 });
272 }
273}
274
275fn is_kebab_case(s: &str) -> bool {
277 !s.is_empty()
278 && !s.starts_with('-')
279 && !s.ends_with('-')
280 && !s.contains("--")
281 && s.chars()
282 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 const VALID_YAML: &str = r#"
290categories:
291 - slug: crime-type
292 label: Crime Type
293 tags:
294 - slug: bribery
295 label: Bribery
296 - slug: fraud
297 label: Fraud
298 - slug: sector
299 label: Sector
300 tags:
301 - slug: government
302 label: Government
303"#;
304
305 #[test]
306 fn parse_valid_registry() {
307 let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
308 assert_eq!(reg.len(), 3);
309 assert_eq!(reg.category_slugs().len(), 2);
310 assert!(reg.contains("bribery"));
311 assert!(reg.contains("fraud"));
312 assert!(reg.contains("government"));
313 assert!(!reg.contains("unknown"));
314
315 let tag = reg.get("bribery").expect("should exist");
316 assert_eq!(tag.label, "Bribery");
317 assert_eq!(tag.category_slug, "crime-type");
318 }
319
320 #[test]
321 fn validate_tags_known() {
322 let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
323 let errors = reg.validate_tags(&["bribery".to_string(), "government".to_string()], 5);
324 assert!(errors.is_empty());
325 }
326
327 #[test]
328 fn validate_tags_unknown() {
329 let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
330 let errors = reg.validate_tags(&["bribery".to_string(), "unknown".to_string()], 5);
331 assert_eq!(errors.len(), 1);
332 assert!(errors[0].message.contains("unknown"));
333 }
334
335 #[test]
336 fn validate_tags_empty_registry() {
337 let reg = TagRegistry::empty();
338 let errors = reg.validate_tags(&["anything".to_string()], 5);
339 assert!(errors.is_empty(), "empty registry should skip validation");
340 }
341
342 #[test]
343 fn duplicate_tag_slug_error() {
344 let yaml = r#"
345categories:
346 - slug: a
347 label: A
348 tags:
349 - slug: dupe
350 label: First
351 - slug: dupe
352 label: Second
353"#;
354 let errors = TagRegistry::parse(yaml).expect_err("should fail");
355 assert!(errors.iter().any(|e| e.message.contains("duplicate tag")));
356 }
357
358 #[test]
359 fn duplicate_category_slug_error() {
360 let yaml = r#"
361categories:
362 - slug: same
363 label: First
364 tags:
365 - slug: a
366 label: A
367 - slug: same
368 label: Second
369 tags:
370 - slug: b
371 label: B
372"#;
373 let errors = TagRegistry::parse(yaml).expect_err("should fail");
374 assert!(
375 errors
376 .iter()
377 .any(|e| e.message.contains("duplicate category"))
378 );
379 }
380
381 #[test]
382 fn empty_registry() {
383 let reg = TagRegistry::empty();
384 assert!(reg.is_empty());
385 assert_eq!(reg.len(), 0);
386 assert!(!reg.contains("anything"));
387 }
388}