Skip to main content

weave_content/
tags.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use serde::Deserialize;
5
6use crate::parser::ParseError;
7
8/// Maximum number of categories in the tag registry.
9const MAX_CATEGORIES: usize = 10;
10
11/// Maximum tags per category.
12const MAX_TAGS_PER_CATEGORY: usize = 30;
13
14/// Maximum total tags in registry.
15const MAX_TOTAL_TAGS: usize = 200;
16
17/// Maximum tag slug length.
18const MAX_SLUG_LEN: usize = 30;
19
20/// Maximum tag label length.
21const MAX_LABEL_LEN: usize = 60;
22
23/// YAML schema for the tag registry file.
24#[derive(Deserialize)]
25struct TagRegistryFile {
26    categories: Vec<CategoryEntry>,
27}
28
29/// A tag category in the YAML file.
30#[derive(Deserialize)]
31struct CategoryEntry {
32    slug: String,
33    label: String,
34    tags: Vec<TagEntry>,
35}
36
37/// A single tag in the YAML file.
38#[derive(Deserialize)]
39struct TagEntry {
40    slug: String,
41    label: String,
42}
43
44/// A loaded tag with its category.
45#[derive(Debug, Clone)]
46pub struct Tag {
47    pub slug: String,
48    pub label: String,
49    pub category_slug: String,
50    pub category_label: String,
51}
52
53/// Tag registry: holds all valid tags loaded from `tags.yaml`.
54/// Provides slug-based lookup for validation.
55#[derive(Debug)]
56pub struct TagRegistry {
57    tags: Vec<Tag>,
58    /// Slug → index into `tags`.
59    slug_index: HashMap<String, usize>,
60    /// Category slugs for iteration.
61    category_slugs: Vec<String>,
62}
63
64impl TagRegistry {
65    /// Load a tag registry from `{root}/tags.yaml`.
66    /// Returns an empty registry if the file doesn't exist.
67    pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
68        let path = root.join("tags.yaml");
69        if !path.exists() {
70            return Ok(Self::empty());
71        }
72
73        let content = std::fs::read_to_string(&path).map_err(|e| {
74            vec![ParseError {
75                line: 0,
76                message: format!("{}: error reading file: {e}", path.display()),
77            }]
78        })?;
79
80        Self::parse(&content)
81    }
82
83    /// Parse a tag registry from YAML content.
84    pub fn parse(content: &str) -> Result<Self, Vec<ParseError>> {
85        let file: TagRegistryFile = serde_yaml::from_str(content).map_err(|e| {
86            vec![ParseError {
87                line: 0,
88                message: format!("tags.yaml: invalid YAML: {e}"),
89            }]
90        })?;
91
92        let mut errors = Vec::new();
93
94        if file.categories.len() > MAX_CATEGORIES {
95            errors.push(ParseError {
96                line: 0,
97                message: format!(
98                    "tags.yaml: too many categories ({}, max {MAX_CATEGORIES})",
99                    file.categories.len()
100                ),
101            });
102        }
103
104        let mut tags = Vec::new();
105        let mut slug_index = HashMap::new();
106        let mut category_slugs = Vec::new();
107        let mut seen_category_slugs: HashSet<String> = HashSet::new();
108        let mut seen_tag_slugs: HashSet<String> = HashSet::new();
109
110        for cat in &file.categories {
111            validate_slug(&cat.slug, "category", &mut errors);
112            validate_label(&cat.label, "category", &mut errors);
113
114            if !seen_category_slugs.insert(cat.slug.clone()) {
115                errors.push(ParseError {
116                    line: 0,
117                    message: format!("tags.yaml: duplicate category slug {:?}", cat.slug),
118                });
119            }
120
121            if cat.tags.len() > MAX_TAGS_PER_CATEGORY {
122                errors.push(ParseError {
123                    line: 0,
124                    message: format!(
125                        "tags.yaml: category {:?} has too many tags ({}, max {MAX_TAGS_PER_CATEGORY})",
126                        cat.slug,
127                        cat.tags.len()
128                    ),
129                });
130            }
131
132            category_slugs.push(cat.slug.clone());
133
134            for tag in &cat.tags {
135                validate_slug(&tag.slug, "tag", &mut errors);
136                validate_label(&tag.label, "tag", &mut errors);
137
138                if !seen_tag_slugs.insert(tag.slug.clone()) {
139                    errors.push(ParseError {
140                        line: 0,
141                        message: format!("tags.yaml: duplicate tag slug {:?}", tag.slug),
142                    });
143                }
144
145                let idx = tags.len();
146                slug_index.insert(tag.slug.clone(), idx);
147                tags.push(Tag {
148                    slug: tag.slug.clone(),
149                    label: tag.label.clone(),
150                    category_slug: cat.slug.clone(),
151                    category_label: cat.label.clone(),
152                });
153            }
154        }
155
156        if tags.len() > MAX_TOTAL_TAGS {
157            errors.push(ParseError {
158                line: 0,
159                message: format!(
160                    "tags.yaml: too many total tags ({}, max {MAX_TOTAL_TAGS})",
161                    tags.len()
162                ),
163            });
164        }
165
166        if !errors.is_empty() {
167            return Err(errors);
168        }
169
170        Ok(Self {
171            tags,
172            slug_index,
173            category_slugs,
174        })
175    }
176
177    /// Create an empty tag registry (no tags.yaml).
178    pub fn empty() -> Self {
179        Self {
180            tags: Vec::new(),
181            slug_index: HashMap::new(),
182            category_slugs: Vec::new(),
183        }
184    }
185
186    /// Look up a tag by slug. Returns None if not found.
187    pub fn get(&self, slug: &str) -> Option<&Tag> {
188        self.slug_index.get(slug).map(|&idx| &self.tags[idx])
189    }
190
191    /// Check if a tag slug is valid (exists in the registry).
192    pub fn contains(&self, slug: &str) -> bool {
193        self.slug_index.contains_key(slug)
194    }
195
196    /// Number of tags in the registry.
197    pub fn len(&self) -> usize {
198        self.tags.len()
199    }
200
201    /// Whether the registry is empty (no tags.yaml or no tags).
202    pub fn is_empty(&self) -> bool {
203        self.tags.is_empty()
204    }
205
206    /// All tags.
207    pub fn tags(&self) -> &[Tag] {
208        &self.tags
209    }
210
211    /// All category slugs.
212    pub fn category_slugs(&self) -> &[String] {
213        &self.category_slugs
214    }
215
216    /// Validate a list of tags against the registry.
217    /// Returns errors for unknown tags.
218    pub fn validate_tags(&self, tags: &[String], line: usize) -> Vec<ParseError> {
219        let mut errors = Vec::new();
220
221        if self.is_empty() {
222            // No registry loaded -- skip validation
223            return errors;
224        }
225
226        for tag in tags {
227            if !self.contains(tag) {
228                errors.push(ParseError {
229                    line,
230                    message: format!("unknown tag {tag:?} (not in tags.yaml registry)"),
231                });
232            }
233        }
234
235        errors
236    }
237}
238
239fn validate_slug(slug: &str, kind: &str, errors: &mut Vec<ParseError>) {
240    if slug.len() > MAX_SLUG_LEN {
241        errors.push(ParseError {
242            line: 0,
243            message: format!("tags.yaml: {kind} slug {slug:?} exceeds {MAX_SLUG_LEN} chars"),
244        });
245    }
246    if slug.is_empty() {
247        errors.push(ParseError {
248            line: 0,
249            message: format!("tags.yaml: {kind} slug must not be empty"),
250        });
251    }
252    if !is_kebab_case(slug) {
253        errors.push(ParseError {
254            line: 0,
255            message: format!("tags.yaml: {kind} slug {slug:?} must be kebab-case"),
256        });
257    }
258}
259
260fn validate_label(label: &str, kind: &str, errors: &mut Vec<ParseError>) {
261    if label.len() > MAX_LABEL_LEN {
262        errors.push(ParseError {
263            line: 0,
264            message: format!("tags.yaml: {kind} label {label:?} exceeds {MAX_LABEL_LEN} chars"),
265        });
266    }
267    if label.is_empty() {
268        errors.push(ParseError {
269            line: 0,
270            message: format!("tags.yaml: {kind} label must not be empty"),
271        });
272    }
273}
274
275/// Check if a string is valid kebab-case: `[a-z0-9](-[a-z0-9]+)*`
276fn is_kebab_case(s: &str) -> bool {
277    !s.is_empty()
278        && !s.starts_with('-')
279        && !s.ends_with('-')
280        && !s.contains("--")
281        && s.chars()
282            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    const VALID_YAML: &str = r#"
290categories:
291  - slug: crime-type
292    label: Crime Type
293    tags:
294      - slug: bribery
295        label: Bribery
296      - slug: fraud
297        label: Fraud
298  - slug: sector
299    label: Sector
300    tags:
301      - slug: government
302        label: Government
303"#;
304
305    #[test]
306    fn parse_valid_registry() {
307        let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
308        assert_eq!(reg.len(), 3);
309        assert_eq!(reg.category_slugs().len(), 2);
310        assert!(reg.contains("bribery"));
311        assert!(reg.contains("fraud"));
312        assert!(reg.contains("government"));
313        assert!(!reg.contains("unknown"));
314
315        let tag = reg.get("bribery").expect("should exist");
316        assert_eq!(tag.label, "Bribery");
317        assert_eq!(tag.category_slug, "crime-type");
318    }
319
320    #[test]
321    fn validate_tags_known() {
322        let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
323        let errors = reg.validate_tags(&["bribery".to_string(), "government".to_string()], 5);
324        assert!(errors.is_empty());
325    }
326
327    #[test]
328    fn validate_tags_unknown() {
329        let reg = TagRegistry::parse(VALID_YAML).expect("should parse");
330        let errors = reg.validate_tags(&["bribery".to_string(), "unknown".to_string()], 5);
331        assert_eq!(errors.len(), 1);
332        assert!(errors[0].message.contains("unknown"));
333    }
334
335    #[test]
336    fn validate_tags_empty_registry() {
337        let reg = TagRegistry::empty();
338        let errors = reg.validate_tags(&["anything".to_string()], 5);
339        assert!(errors.is_empty(), "empty registry should skip validation");
340    }
341
342    #[test]
343    fn duplicate_tag_slug_error() {
344        let yaml = r#"
345categories:
346  - slug: a
347    label: A
348    tags:
349      - slug: dupe
350        label: First
351      - slug: dupe
352        label: Second
353"#;
354        let errors = TagRegistry::parse(yaml).expect_err("should fail");
355        assert!(errors.iter().any(|e| e.message.contains("duplicate tag")));
356    }
357
358    #[test]
359    fn duplicate_category_slug_error() {
360        let yaml = r#"
361categories:
362  - slug: same
363    label: First
364    tags:
365      - slug: a
366        label: A
367  - slug: same
368    label: Second
369    tags:
370      - slug: b
371        label: B
372"#;
373        let errors = TagRegistry::parse(yaml).expect_err("should fail");
374        assert!(
375            errors
376                .iter()
377                .any(|e| e.message.contains("duplicate category"))
378        );
379    }
380
381    #[test]
382    fn empty_registry() {
383        let reg = TagRegistry::empty();
384        assert!(reg.is_empty());
385        assert_eq!(reg.len(), 0);
386        assert!(!reg.contains("anything"));
387    }
388}