Skip to main content

weave_content/
registry.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use crate::entity::{Entity, Label};
5use crate::parser::ParseError;
6
7/// Maximum length of entity filename stem (without `.md`).
8const MAX_FILENAME_LEN: usize = 200;
9
10/// A loaded entity with its source file path.
11#[derive(Debug)]
12pub struct RegistryEntry {
13    pub entity: Entity,
14    pub path: PathBuf,
15}
16
17/// Entity registry: holds all shared entities loaded from `actors/` and
18/// `institutions/` directories. Provides name-based lookup for cross-file
19/// resolution.
20#[derive(Debug)]
21pub struct EntityRegistry {
22    entries: Vec<RegistryEntry>,
23    /// Name → index into `entries`. Names are case-sensitive.
24    name_index: HashMap<String, usize>,
25}
26
27impl EntityRegistry {
28    /// Build a registry from a content root directory.
29    ///
30    /// Scans `{root}/actors/*.md` and `{root}/institutions/*.md`, parses each
31    /// file, validates for duplicates and filename mismatches.
32    pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
33        let mut entries = Vec::new();
34        let mut errors = Vec::new();
35
36        let actor_dir = root.join("actors");
37        let institution_dir = root.join("institutions");
38
39        load_directory(&actor_dir, Label::Actor, &mut entries, &mut errors);
40        load_directory(
41            &institution_dir,
42            Label::Institution,
43            &mut entries,
44            &mut errors,
45        );
46
47        // Build name index and detect duplicates
48        let name_index = build_name_index(&entries, &mut errors);
49
50        if errors.iter().any(|e| e.message.starts_with("duplicate")) {
51            return Err(errors);
52        }
53
54        // Filename mismatch warnings are non-fatal, report via errors but don't fail
55        // (caller can filter by message prefix if needed)
56
57        if errors.iter().any(|e| !e.message.starts_with("warning:")) {
58            return Err(errors);
59        }
60
61        // Warnings only -- attach them but succeed
62        if !errors.is_empty() {
63            for err in &errors {
64                eprintln!("{err}");
65            }
66        }
67
68        Ok(Self {
69            entries,
70            name_index,
71        })
72    }
73
74    /// Build a registry from pre-parsed entries.
75    pub fn from_entries(entries: Vec<RegistryEntry>) -> Result<Self, Vec<ParseError>> {
76        let mut errors = Vec::new();
77        let name_index = build_name_index(&entries, &mut errors);
78
79        let has_errors = errors.iter().any(|e| !e.message.starts_with("warning:"));
80        if has_errors {
81            return Err(errors);
82        }
83
84        Ok(Self {
85            entries,
86            name_index,
87        })
88    }
89
90    /// Look up an entity by name. Returns None if not found.
91    pub fn get_by_name(&self, name: &str) -> Option<&RegistryEntry> {
92        self.name_index.get(name).map(|&idx| &self.entries[idx])
93    }
94
95    /// Number of entities in the registry.
96    pub fn len(&self) -> usize {
97        self.entries.len()
98    }
99
100    /// Whether the registry is empty.
101    pub fn is_empty(&self) -> bool {
102        self.entries.is_empty()
103    }
104
105    /// All entity names in the registry.
106    pub fn names(&self) -> Vec<&str> {
107        self.entries
108            .iter()
109            .map(|e| e.entity.name.as_str())
110            .collect()
111    }
112}
113
114/// Load all `.md` files from a directory, parsing each as an entity file.
115fn load_directory(
116    dir: &Path,
117    label: Label,
118    entries: &mut Vec<RegistryEntry>,
119    errors: &mut Vec<ParseError>,
120) {
121    let Ok(read_dir) = std::fs::read_dir(dir) else {
122        // Directory doesn't exist -- not an error (may have no actors or no institutions)
123        return;
124    };
125
126    let mut paths: Vec<PathBuf> = read_dir
127        .filter_map(|entry| {
128            let entry = entry.ok()?;
129            let path = entry.path();
130            if path.extension().and_then(|e| e.to_str()) == Some("md") {
131                Some(path)
132            } else {
133                None
134            }
135        })
136        .collect();
137
138    // Sort for deterministic ordering
139    paths.sort();
140
141    for path in paths {
142        load_entity_file(&path, label, entries, errors);
143    }
144}
145
146/// Load and parse a single entity file.
147fn load_entity_file(
148    path: &Path,
149    label: Label,
150    entries: &mut Vec<RegistryEntry>,
151    errors: &mut Vec<ParseError>,
152) {
153    let content = match std::fs::read_to_string(path) {
154        Ok(c) => c,
155        Err(e) => {
156            errors.push(ParseError {
157                line: 0,
158                message: format!("{}: error reading file: {e}", path.display()),
159            });
160            return;
161        }
162    };
163
164    let parsed = match crate::parser::parse_entity_file(&content) {
165        Ok(p) => p,
166        Err(parse_errors) => {
167            for err in parse_errors {
168                errors.push(ParseError {
169                    line: err.line,
170                    message: format!("{}: {}", path.display(), err.message),
171                });
172            }
173            return;
174        }
175    };
176
177    let mut field_errors = Vec::new();
178    let entity = crate::entity::parse_entity_file_body(
179        &parsed.name,
180        &parsed.body,
181        label,
182        parsed.id,
183        parsed.title_line,
184        &mut field_errors,
185    );
186
187    for err in field_errors {
188        errors.push(ParseError {
189            line: err.line,
190            message: format!("{}: {}", path.display(), err.message),
191        });
192    }
193
194    // Validate filename matches content
195    validate_filename(path, &entity, errors);
196
197    entries.push(RegistryEntry {
198        entity,
199        path: path.to_path_buf(),
200    });
201}
202
203/// Build name → index map, detecting duplicate names.
204fn build_name_index(
205    entries: &[RegistryEntry],
206    errors: &mut Vec<ParseError>,
207) -> HashMap<String, usize> {
208    let mut index = HashMap::new();
209
210    for (i, entry) in entries.iter().enumerate() {
211        let name = &entry.entity.name;
212        if let Some(&existing_idx) = index.get(name.as_str()) {
213            let existing: &RegistryEntry = &entries[existing_idx];
214            errors.push(ParseError {
215                line: entry.entity.line,
216                message: format!(
217                    "duplicate entity name {name:?} in {} (first defined in {})",
218                    entry.path.display(),
219                    existing.path.display(),
220                ),
221            });
222        } else {
223            index.insert(name.clone(), i);
224        }
225    }
226
227    index
228}
229
230/// Warn if entity filename doesn't match content.
231/// Expected: `<name>--<qualifier>.md` in kebab-case.
232fn validate_filename(path: &Path, entity: &Entity, errors: &mut Vec<ParseError>) {
233    let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
234        return;
235    };
236
237    if stem.len() > MAX_FILENAME_LEN {
238        errors.push(ParseError {
239            line: 0,
240            message: format!(
241                "warning: {}: filename stem exceeds {MAX_FILENAME_LEN} chars",
242                path.display()
243            ),
244        });
245    }
246
247    let expected_name = to_kebab_case(&entity.name);
248    let qualifier = entity
249        .fields
250        .iter()
251        .find(|(k, _)| k == "qualifier")
252        .and_then(|(_, v)| match v {
253            crate::entity::FieldValue::Single(s) => Some(s.as_str()),
254            crate::entity::FieldValue::List(_) => None,
255        });
256
257    let expected_stem = match qualifier {
258        Some(q) => format!("{expected_name}--{}", to_kebab_case(q)),
259        None => expected_name,
260    };
261
262    if stem != expected_stem {
263        errors.push(ParseError {
264            line: 0,
265            message: format!(
266                "warning: {}: filename {stem:?} doesn't match expected {expected_stem:?}",
267                path.display()
268            ),
269        });
270    }
271}
272
273/// Convert a display name to kebab-case for filename comparison.
274fn to_kebab_case(s: &str) -> String {
275    s.chars()
276        .map(|c| {
277            if c.is_alphanumeric() {
278                c.to_ascii_lowercase()
279            } else {
280                '-'
281            }
282        })
283        .collect::<String>()
284        .split('-')
285        .filter(|p| !p.is_empty())
286        .collect::<Vec<_>>()
287        .join("-")
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use crate::entity::{Entity, FieldValue, Label};
294
295    fn make_entry(name: &str, label: Label, path: &str) -> RegistryEntry {
296        RegistryEntry {
297            entity: Entity {
298                name: name.to_string(),
299                label,
300                fields: Vec::new(),
301                id: None,
302                line: 1,
303            },
304            path: PathBuf::from(path),
305        }
306    }
307
308    #[test]
309    fn registry_from_entries_lookup() {
310        let entries = vec![
311            make_entry("Alice", Label::Actor, "actors/alice.md"),
312            make_entry("Corp Inc", Label::Institution, "institutions/corp-inc.md"),
313        ];
314
315        let registry = EntityRegistry::from_entries(entries).unwrap();
316        assert_eq!(registry.len(), 2);
317        assert!(registry.get_by_name("Alice").is_some());
318        assert!(registry.get_by_name("Corp Inc").is_some());
319        assert!(registry.get_by_name("Bob").is_none());
320    }
321
322    #[test]
323    fn registry_detects_duplicate_names() {
324        let entries = vec![
325            make_entry("Alice", Label::Actor, "actors/alice-a.md"),
326            make_entry("Alice", Label::Actor, "actors/alice-b.md"),
327        ];
328
329        let errors = EntityRegistry::from_entries(entries).unwrap_err();
330        assert!(errors.iter().any(|e| e.message.contains("duplicate")));
331    }
332
333    #[test]
334    fn registry_names_list() {
335        let entries = vec![
336            make_entry("Alice", Label::Actor, "actors/alice.md"),
337            make_entry("Bob", Label::Actor, "actors/bob.md"),
338        ];
339
340        let registry = EntityRegistry::from_entries(entries).unwrap();
341        let names = registry.names();
342        assert!(names.contains(&"Alice"));
343        assert!(names.contains(&"Bob"));
344    }
345
346    #[test]
347    fn to_kebab_case_conversion() {
348        assert_eq!(to_kebab_case("Mark Bonnick"), "mark-bonnick");
349        assert_eq!(to_kebab_case("Arsenal FC"), "arsenal-fc");
350        assert_eq!(
351            to_kebab_case("English Football Club"),
352            "english-football-club"
353        );
354        assert_eq!(to_kebab_case("Bob"), "bob");
355    }
356
357    #[test]
358    fn validate_filename_matching() {
359        let entity = Entity {
360            name: "Mark Bonnick".to_string(),
361            label: Label::Actor,
362            fields: vec![(
363                "qualifier".to_string(),
364                FieldValue::Single("Arsenal Kit Manager".to_string()),
365            )],
366            id: None,
367            line: 1,
368        };
369
370        let mut errors = Vec::new();
371
372        // Correct filename
373        validate_filename(
374            Path::new("actors/mark-bonnick--arsenal-kit-manager.md"),
375            &entity,
376            &mut errors,
377        );
378        assert!(errors.is_empty(), "errors: {errors:?}");
379
380        // Wrong filename
381        validate_filename(Path::new("actors/wrong-name.md"), &entity, &mut errors);
382        assert!(errors.iter().any(|e| e.message.contains("warning:")));
383    }
384
385    #[test]
386    fn validate_filename_no_qualifier() {
387        let entity = Entity {
388            name: "Bob".to_string(),
389            label: Label::Actor,
390            fields: Vec::new(),
391            id: None,
392            line: 1,
393        };
394
395        let mut errors = Vec::new();
396        validate_filename(Path::new("actors/bob.md"), &entity, &mut errors);
397        assert!(errors.is_empty(), "errors: {errors:?}");
398    }
399
400    #[test]
401    fn empty_registry() {
402        let registry = EntityRegistry::from_entries(Vec::new()).unwrap();
403        assert!(registry.is_empty());
404        assert_eq!(registry.len(), 0);
405        assert!(registry.get_by_name("anything").is_none());
406    }
407}