1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use crate::entity::{Entity, Label};
5use crate::parser::ParseError;
6
7const MAX_FILENAME_LEN: usize = 200;
9
10#[derive(Debug)]
12pub struct RegistryEntry {
13 pub entity: Entity,
14 pub path: PathBuf,
15}
16
17#[derive(Debug)]
21pub struct EntityRegistry {
22 entries: Vec<RegistryEntry>,
23 name_index: HashMap<String, usize>,
25}
26
27impl EntityRegistry {
28 pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
33 let mut entries = Vec::new();
34 let mut errors = Vec::new();
35
36 let actor_dir = root.join("actors");
37 let institution_dir = root.join("institutions");
38
39 load_directory(&actor_dir, Label::Actor, &mut entries, &mut errors);
40 load_directory(
41 &institution_dir,
42 Label::Institution,
43 &mut entries,
44 &mut errors,
45 );
46
47 let name_index = build_name_index(&entries, &mut errors);
49
50 if errors.iter().any(|e| e.message.starts_with("duplicate")) {
51 return Err(errors);
52 }
53
54 if errors.iter().any(|e| !e.message.starts_with("warning:")) {
58 return Err(errors);
59 }
60
61 if !errors.is_empty() {
63 for err in &errors {
64 eprintln!("{err}");
65 }
66 }
67
68 Ok(Self {
69 entries,
70 name_index,
71 })
72 }
73
74 pub fn from_entries(entries: Vec<RegistryEntry>) -> Result<Self, Vec<ParseError>> {
76 let mut errors = Vec::new();
77 let name_index = build_name_index(&entries, &mut errors);
78
79 let has_errors = errors.iter().any(|e| !e.message.starts_with("warning:"));
80 if has_errors {
81 return Err(errors);
82 }
83
84 Ok(Self {
85 entries,
86 name_index,
87 })
88 }
89
90 pub fn get_by_name(&self, name: &str) -> Option<&RegistryEntry> {
92 self.name_index.get(name).map(|&idx| &self.entries[idx])
93 }
94
95 pub fn len(&self) -> usize {
97 self.entries.len()
98 }
99
100 pub fn is_empty(&self) -> bool {
102 self.entries.is_empty()
103 }
104
105 pub fn names(&self) -> Vec<&str> {
107 self.entries
108 .iter()
109 .map(|e| e.entity.name.as_str())
110 .collect()
111 }
112}
113
114fn load_directory(
116 dir: &Path,
117 label: Label,
118 entries: &mut Vec<RegistryEntry>,
119 errors: &mut Vec<ParseError>,
120) {
121 let Ok(read_dir) = std::fs::read_dir(dir) else {
122 return;
124 };
125
126 let mut paths: Vec<PathBuf> = read_dir
127 .filter_map(|entry| {
128 let entry = entry.ok()?;
129 let path = entry.path();
130 if path.extension().and_then(|e| e.to_str()) == Some("md") {
131 Some(path)
132 } else {
133 None
134 }
135 })
136 .collect();
137
138 paths.sort();
140
141 for path in paths {
142 load_entity_file(&path, label, entries, errors);
143 }
144}
145
146fn load_entity_file(
148 path: &Path,
149 label: Label,
150 entries: &mut Vec<RegistryEntry>,
151 errors: &mut Vec<ParseError>,
152) {
153 let content = match std::fs::read_to_string(path) {
154 Ok(c) => c,
155 Err(e) => {
156 errors.push(ParseError {
157 line: 0,
158 message: format!("{}: error reading file: {e}", path.display()),
159 });
160 return;
161 }
162 };
163
164 let parsed = match crate::parser::parse_entity_file(&content) {
165 Ok(p) => p,
166 Err(parse_errors) => {
167 for err in parse_errors {
168 errors.push(ParseError {
169 line: err.line,
170 message: format!("{}: {}", path.display(), err.message),
171 });
172 }
173 return;
174 }
175 };
176
177 let mut field_errors = Vec::new();
178 let entity = crate::entity::parse_entity_file_body(
179 &parsed.name,
180 &parsed.body,
181 label,
182 parsed.id,
183 parsed.title_line,
184 &mut field_errors,
185 );
186
187 for err in field_errors {
188 errors.push(ParseError {
189 line: err.line,
190 message: format!("{}: {}", path.display(), err.message),
191 });
192 }
193
194 validate_filename(path, &entity, errors);
196
197 entries.push(RegistryEntry {
198 entity,
199 path: path.to_path_buf(),
200 });
201}
202
203fn build_name_index(
205 entries: &[RegistryEntry],
206 errors: &mut Vec<ParseError>,
207) -> HashMap<String, usize> {
208 let mut index = HashMap::new();
209
210 for (i, entry) in entries.iter().enumerate() {
211 let name = &entry.entity.name;
212 if let Some(&existing_idx) = index.get(name.as_str()) {
213 let existing: &RegistryEntry = &entries[existing_idx];
214 errors.push(ParseError {
215 line: entry.entity.line,
216 message: format!(
217 "duplicate entity name {name:?} in {} (first defined in {})",
218 entry.path.display(),
219 existing.path.display(),
220 ),
221 });
222 } else {
223 index.insert(name.clone(), i);
224 }
225 }
226
227 index
228}
229
230fn validate_filename(path: &Path, entity: &Entity, errors: &mut Vec<ParseError>) {
233 let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
234 return;
235 };
236
237 if stem.len() > MAX_FILENAME_LEN {
238 errors.push(ParseError {
239 line: 0,
240 message: format!(
241 "warning: {}: filename stem exceeds {MAX_FILENAME_LEN} chars",
242 path.display()
243 ),
244 });
245 }
246
247 let expected_name = to_kebab_case(&entity.name);
248 let qualifier = entity
249 .fields
250 .iter()
251 .find(|(k, _)| k == "qualifier")
252 .and_then(|(_, v)| match v {
253 crate::entity::FieldValue::Single(s) => Some(s.as_str()),
254 crate::entity::FieldValue::List(_) => None,
255 });
256
257 let expected_stem = match qualifier {
258 Some(q) => format!("{expected_name}--{}", to_kebab_case(q)),
259 None => expected_name,
260 };
261
262 if stem != expected_stem {
263 errors.push(ParseError {
264 line: 0,
265 message: format!(
266 "warning: {}: filename {stem:?} doesn't match expected {expected_stem:?}",
267 path.display()
268 ),
269 });
270 }
271}
272
273fn to_kebab_case(s: &str) -> String {
275 s.chars()
276 .map(|c| {
277 if c.is_alphanumeric() {
278 c.to_ascii_lowercase()
279 } else {
280 '-'
281 }
282 })
283 .collect::<String>()
284 .split('-')
285 .filter(|p| !p.is_empty())
286 .collect::<Vec<_>>()
287 .join("-")
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293 use crate::entity::{Entity, FieldValue, Label};
294
295 fn make_entry(name: &str, label: Label, path: &str) -> RegistryEntry {
296 RegistryEntry {
297 entity: Entity {
298 name: name.to_string(),
299 label,
300 fields: Vec::new(),
301 id: None,
302 line: 1,
303 },
304 path: PathBuf::from(path),
305 }
306 }
307
308 #[test]
309 fn registry_from_entries_lookup() {
310 let entries = vec![
311 make_entry("Alice", Label::Actor, "actors/alice.md"),
312 make_entry("Corp Inc", Label::Institution, "institutions/corp-inc.md"),
313 ];
314
315 let registry = EntityRegistry::from_entries(entries).unwrap();
316 assert_eq!(registry.len(), 2);
317 assert!(registry.get_by_name("Alice").is_some());
318 assert!(registry.get_by_name("Corp Inc").is_some());
319 assert!(registry.get_by_name("Bob").is_none());
320 }
321
322 #[test]
323 fn registry_detects_duplicate_names() {
324 let entries = vec![
325 make_entry("Alice", Label::Actor, "actors/alice-a.md"),
326 make_entry("Alice", Label::Actor, "actors/alice-b.md"),
327 ];
328
329 let errors = EntityRegistry::from_entries(entries).unwrap_err();
330 assert!(errors.iter().any(|e| e.message.contains("duplicate")));
331 }
332
333 #[test]
334 fn registry_names_list() {
335 let entries = vec![
336 make_entry("Alice", Label::Actor, "actors/alice.md"),
337 make_entry("Bob", Label::Actor, "actors/bob.md"),
338 ];
339
340 let registry = EntityRegistry::from_entries(entries).unwrap();
341 let names = registry.names();
342 assert!(names.contains(&"Alice"));
343 assert!(names.contains(&"Bob"));
344 }
345
346 #[test]
347 fn to_kebab_case_conversion() {
348 assert_eq!(to_kebab_case("Mark Bonnick"), "mark-bonnick");
349 assert_eq!(to_kebab_case("Arsenal FC"), "arsenal-fc");
350 assert_eq!(
351 to_kebab_case("English Football Club"),
352 "english-football-club"
353 );
354 assert_eq!(to_kebab_case("Bob"), "bob");
355 }
356
357 #[test]
358 fn validate_filename_matching() {
359 let entity = Entity {
360 name: "Mark Bonnick".to_string(),
361 label: Label::Actor,
362 fields: vec![(
363 "qualifier".to_string(),
364 FieldValue::Single("Arsenal Kit Manager".to_string()),
365 )],
366 id: None,
367 line: 1,
368 };
369
370 let mut errors = Vec::new();
371
372 validate_filename(
374 Path::new("actors/mark-bonnick--arsenal-kit-manager.md"),
375 &entity,
376 &mut errors,
377 );
378 assert!(errors.is_empty(), "errors: {errors:?}");
379
380 validate_filename(Path::new("actors/wrong-name.md"), &entity, &mut errors);
382 assert!(errors.iter().any(|e| e.message.contains("warning:")));
383 }
384
385 #[test]
386 fn validate_filename_no_qualifier() {
387 let entity = Entity {
388 name: "Bob".to_string(),
389 label: Label::Actor,
390 fields: Vec::new(),
391 id: None,
392 line: 1,
393 };
394
395 let mut errors = Vec::new();
396 validate_filename(Path::new("actors/bob.md"), &entity, &mut errors);
397 assert!(errors.is_empty(), "errors: {errors:?}");
398 }
399
400 #[test]
401 fn empty_registry() {
402 let registry = EntityRegistry::from_entries(Vec::new()).unwrap();
403 assert!(registry.is_empty());
404 assert_eq!(registry.len(), 0);
405 assert!(registry.get_by_name("anything").is_none());
406 }
407}