1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use rayon::prelude::*;
5
6use crate::entity::{Entity, Label};
7use crate::parser::ParseError;
8
9const MAX_FILENAME_LEN: usize = 200;
11
12#[derive(Debug)]
14pub struct RegistryEntry {
15 pub entity: Entity,
16 pub path: PathBuf,
17 pub tags: Vec<String>,
18}
19
20#[derive(Debug)]
24pub struct EntityRegistry {
25 entries: Vec<RegistryEntry>,
26 name_index: HashMap<String, usize>,
28 content_root: Option<PathBuf>,
30}
31
32impl EntityRegistry {
33 pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
39 let mut entries = Vec::new();
40 let mut errors = Vec::new();
41
42 let actor_dir = root.join("people");
43 let institution_dir = root.join("organizations");
44
45 load_directory(&actor_dir, Label::Person, &mut entries, &mut errors);
46 load_directory(
47 &institution_dir,
48 Label::Organization,
49 &mut entries,
50 &mut errors,
51 );
52
53 let name_index = build_name_index(&entries, &mut errors);
55
56 if errors.iter().any(|e| e.message.starts_with("duplicate")) {
57 return Err(errors);
58 }
59
60 if errors.iter().any(|e| !e.message.starts_with("warning:")) {
64 return Err(errors);
65 }
66
67 if !errors.is_empty() {
69 for err in &errors {
70 eprintln!("{err}");
71 }
72 }
73
74 Ok(Self {
75 entries,
76 name_index,
77 content_root: Some(root.to_path_buf()),
78 })
79 }
80
81 pub fn from_entries(entries: Vec<RegistryEntry>) -> Result<Self, Vec<ParseError>> {
83 let mut errors = Vec::new();
84 let name_index = build_name_index(&entries, &mut errors);
85
86 let has_errors = errors.iter().any(|e| !e.message.starts_with("warning:"));
87 if has_errors {
88 return Err(errors);
89 }
90
91 Ok(Self {
92 entries,
93 name_index,
94 content_root: None,
95 })
96 }
97
98 pub fn get_by_name(&self, name: &str) -> Option<&RegistryEntry> {
100 self.name_index.get(name).map(|&idx| &self.entries[idx])
101 }
102
103 pub fn len(&self) -> usize {
105 self.entries.len()
106 }
107
108 pub fn is_empty(&self) -> bool {
110 self.entries.is_empty()
111 }
112
113 pub fn names(&self) -> Vec<&str> {
115 self.entries
116 .iter()
117 .map(|e| e.entity.name.as_str())
118 .collect()
119 }
120
121 pub fn entries(&self) -> &[RegistryEntry] {
123 &self.entries
124 }
125
126 pub fn slug_for(&self, entry: &RegistryEntry) -> Option<String> {
129 let root = self.content_root.as_ref()?;
130 path_to_slug(&entry.path, root)
131 }
132
133 pub fn content_root(&self) -> Option<&Path> {
135 self.content_root.as_deref()
136 }
137
138 pub fn check_filenames(&self) -> Vec<ParseError> {
142 let mut warnings = Vec::new();
143 for entry in &self.entries {
144 validate_filename(&entry.path, &entry.entity, &mut warnings);
145 }
146 warnings
147 }
148}
149
150pub fn path_to_slug(path: &Path, content_root: &Path) -> Option<String> {
153 let relative = path.strip_prefix(content_root).ok()?;
154 let s = relative.to_str()?;
155 Some(s.strip_suffix(".md").unwrap_or(s).to_string())
156}
157
158fn load_directory(
162 dir: &Path,
163 label: Label,
164 entries: &mut Vec<RegistryEntry>,
165 errors: &mut Vec<ParseError>,
166) {
167 let mut paths = Vec::new();
168 collect_md_files(dir, &mut paths, 0);
169
170 paths.sort();
172
173 let results: Vec<ParseResult> = paths
175 .par_iter()
176 .map(|path| parse_entity_file(path, label))
177 .collect();
178
179 for result in results {
181 if let Some(entry) = result.entry {
182 entries.push(entry);
183 }
184 errors.extend(result.errors);
185 }
186}
187
188fn collect_md_files(dir: &Path, paths: &mut Vec<PathBuf>, depth: usize) {
191 const MAX_DEPTH: usize = 2;
192 if depth > MAX_DEPTH {
193 return;
194 }
195
196 let Ok(read_dir) = std::fs::read_dir(dir) else {
197 return;
198 };
199
200 let mut dir_entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
201 dir_entries.sort_by_key(std::fs::DirEntry::file_name);
202
203 for entry in dir_entries {
204 let path = entry.path();
205 if path.is_dir() {
206 collect_md_files(&path, paths, depth + 1);
207 } else if path.extension().and_then(|e| e.to_str()) == Some("md") {
208 paths.push(path);
209 }
210 }
211}
212
213struct ParseResult {
215 entry: Option<RegistryEntry>,
216 errors: Vec<ParseError>,
217}
218
219fn parse_entity_file(path: &Path, label: Label) -> ParseResult {
221 let content = match std::fs::read_to_string(path) {
222 Ok(c) => c,
223 Err(e) => {
224 return ParseResult {
225 entry: None,
226 errors: vec![ParseError {
227 line: 0,
228 message: format!("{}: error reading file: {e}", path.display()),
229 }],
230 };
231 }
232 };
233
234 let parsed = match crate::parser::parse_entity_file(&content) {
235 Ok(p) => p,
236 Err(parse_errors) => {
237 return ParseResult {
238 entry: None,
239 errors: parse_errors
240 .into_iter()
241 .map(|err| ParseError {
242 line: err.line,
243 message: format!("{}: {}", path.display(), err.message),
244 })
245 .collect(),
246 };
247 }
248 };
249
250 let mut field_errors = Vec::new();
251 let mut entity = crate::entity::parse_entity_file_body(
252 &parsed.name,
253 &parsed.body,
254 label,
255 parsed.id,
256 parsed.title_line,
257 &mut field_errors,
258 );
259 entity.tags.clone_from(&parsed.tags);
260
261 let mut errors: Vec<ParseError> = field_errors
262 .into_iter()
263 .map(|err| ParseError {
264 line: err.line,
265 message: format!("{}: {}", path.display(), err.message),
266 })
267 .collect();
268
269 validate_filename(path, &entity, &mut errors);
271
272 ParseResult {
273 entry: Some(RegistryEntry {
274 entity,
275 path: path.to_path_buf(),
276 tags: parsed.tags,
277 }),
278 errors,
279 }
280}
281
282fn build_name_index(
284 entries: &[RegistryEntry],
285 errors: &mut Vec<ParseError>,
286) -> HashMap<String, usize> {
287 let mut index = HashMap::new();
288
289 for (i, entry) in entries.iter().enumerate() {
290 let name = &entry.entity.name;
291 if let Some(&existing_idx) = index.get(name.as_str()) {
292 let existing: &RegistryEntry = &entries[existing_idx];
293 errors.push(ParseError {
294 line: entry.entity.line,
295 message: format!(
296 "duplicate entity name {name:?} in {} (first defined in {})",
297 entry.path.display(),
298 existing.path.display(),
299 ),
300 });
301 } else {
302 index.insert(name.clone(), i);
303 }
304 }
305
306 index
307}
308
309fn validate_filename(path: &Path, entity: &Entity, errors: &mut Vec<ParseError>) {
312 let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
313 return;
314 };
315
316 if stem.len() > MAX_FILENAME_LEN {
317 errors.push(ParseError {
318 line: 0,
319 message: format!(
320 "warning: {}: filename stem exceeds {MAX_FILENAME_LEN} chars",
321 path.display()
322 ),
323 });
324 }
325
326 let expected_name = to_kebab_case(&entity.name);
327 let qualifier = entity
328 .fields
329 .iter()
330 .find(|(k, _)| k == "qualifier")
331 .and_then(|(_, v)| match v {
332 crate::entity::FieldValue::Single(s) => Some(s.as_str()),
333 crate::entity::FieldValue::List(_) => None,
334 });
335
336 let expected_stem = match qualifier {
337 Some(q) => format!("{expected_name}--{}", to_kebab_case(q)),
338 None => expected_name,
339 };
340
341 if stem != expected_stem {
342 errors.push(ParseError {
343 line: 0,
344 message: format!(
345 "warning: {}: filename {stem:?} doesn't match expected {expected_stem:?}",
346 path.display()
347 ),
348 });
349 }
350}
351
352fn to_kebab_case(s: &str) -> String {
354 s.chars()
355 .map(|c| {
356 if c.is_alphanumeric() {
357 c.to_ascii_lowercase()
358 } else {
359 '-'
360 }
361 })
362 .collect::<String>()
363 .split('-')
364 .filter(|p| !p.is_empty())
365 .collect::<Vec<_>>()
366 .join("-")
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use crate::entity::{Entity, FieldValue, Label};
373
374 fn make_entry(name: &str, label: Label, path: &str) -> RegistryEntry {
375 RegistryEntry {
376 entity: Entity {
377 name: name.to_string(),
378 label,
379 fields: Vec::new(),
380 id: None,
381 line: 1,
382 tags: Vec::new(),
383 slug: None,
384 },
385 path: PathBuf::from(path),
386 tags: Vec::new(),
387 }
388 }
389
390 #[test]
391 fn registry_from_entries_lookup() {
392 let entries = vec![
393 make_entry("Alice", Label::Person, "people/alice.md"),
394 make_entry("Corp Inc", Label::Organization, "organizations/corp-inc.md"),
395 ];
396
397 let registry = EntityRegistry::from_entries(entries).unwrap();
398 assert_eq!(registry.len(), 2);
399 assert!(registry.get_by_name("Alice").is_some());
400 assert!(registry.get_by_name("Corp Inc").is_some());
401 assert!(registry.get_by_name("Bob").is_none());
402 }
403
404 #[test]
405 fn registry_detects_duplicate_names() {
406 let entries = vec![
407 make_entry("Alice", Label::Person, "people/alice-a.md"),
408 make_entry("Alice", Label::Person, "people/alice-b.md"),
409 ];
410
411 let errors = EntityRegistry::from_entries(entries).unwrap_err();
412 assert!(errors.iter().any(|e| e.message.contains("duplicate")));
413 }
414
415 #[test]
416 fn registry_names_list() {
417 let entries = vec![
418 make_entry("Alice", Label::Person, "people/alice.md"),
419 make_entry("Bob", Label::Person, "people/bob.md"),
420 ];
421
422 let registry = EntityRegistry::from_entries(entries).unwrap();
423 let names = registry.names();
424 assert!(names.contains(&"Alice"));
425 assert!(names.contains(&"Bob"));
426 }
427
428 #[test]
429 fn to_kebab_case_conversion() {
430 assert_eq!(to_kebab_case("Mark Bonnick"), "mark-bonnick");
431 assert_eq!(to_kebab_case("Arsenal FC"), "arsenal-fc");
432 assert_eq!(
433 to_kebab_case("English Football Club"),
434 "english-football-club"
435 );
436 assert_eq!(to_kebab_case("Bob"), "bob");
437 }
438
439 #[test]
440 fn validate_filename_matching() {
441 let entity = Entity {
442 name: "Mark Bonnick".to_string(),
443 label: Label::Person,
444 fields: vec![(
445 "qualifier".to_string(),
446 FieldValue::Single("Arsenal Kit Manager".to_string()),
447 )],
448 id: None,
449 line: 1,
450 tags: Vec::new(),
451 slug: None,
452 };
453
454 let mut errors = Vec::new();
455
456 validate_filename(
458 Path::new("people/mark-bonnick--arsenal-kit-manager.md"),
459 &entity,
460 &mut errors,
461 );
462 assert!(errors.is_empty(), "errors: {errors:?}");
463
464 validate_filename(Path::new("people/wrong-name.md"), &entity, &mut errors);
466 assert!(errors.iter().any(|e| e.message.contains("warning:")));
467 }
468
469 #[test]
470 fn validate_filename_no_qualifier() {
471 let entity = Entity {
472 name: "Bob".to_string(),
473 label: Label::Person,
474 fields: Vec::new(),
475 id: None,
476 line: 1,
477 tags: Vec::new(),
478 slug: None,
479 };
480
481 let mut errors = Vec::new();
482 validate_filename(Path::new("people/bob.md"), &entity, &mut errors);
483 assert!(errors.is_empty(), "errors: {errors:?}");
484 }
485
486 #[test]
487 fn empty_registry() {
488 let registry = EntityRegistry::from_entries(Vec::new()).unwrap();
489 assert!(registry.is_empty());
490 assert_eq!(registry.len(), 0);
491 assert!(registry.get_by_name("anything").is_none());
492 }
493}