1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use rayon::prelude::*;
5
6use crate::entity::{Entity, Label};
7use crate::parser::ParseError;
8
9const MAX_FILENAME_LEN: usize = 200;
11
12#[derive(Debug)]
14pub struct RegistryEntry {
15 pub entity: Entity,
16 pub path: PathBuf,
17 pub tags: Vec<String>,
18}
19
20#[derive(Debug)]
24pub struct EntityRegistry {
25 entries: Vec<RegistryEntry>,
26 name_index: HashMap<String, usize>,
28}
29
30impl EntityRegistry {
31 pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
37 let mut entries = Vec::new();
38 let mut errors = Vec::new();
39
40 let actor_dir = root.join("people");
41 let institution_dir = root.join("organizations");
42
43 load_directory(&actor_dir, Label::Person, &mut entries, &mut errors);
44 load_directory(
45 &institution_dir,
46 Label::Organization,
47 &mut entries,
48 &mut errors,
49 );
50
51 let name_index = build_name_index(&entries, &mut errors);
53
54 if errors.iter().any(|e| e.message.starts_with("duplicate")) {
55 return Err(errors);
56 }
57
58 if errors.iter().any(|e| !e.message.starts_with("warning:")) {
62 return Err(errors);
63 }
64
65 if !errors.is_empty() {
67 for err in &errors {
68 eprintln!("{err}");
69 }
70 }
71
72 Ok(Self {
73 entries,
74 name_index,
75 })
76 }
77
78 pub fn from_entries(entries: Vec<RegistryEntry>) -> Result<Self, Vec<ParseError>> {
80 let mut errors = Vec::new();
81 let name_index = build_name_index(&entries, &mut errors);
82
83 let has_errors = errors.iter().any(|e| !e.message.starts_with("warning:"));
84 if has_errors {
85 return Err(errors);
86 }
87
88 Ok(Self {
89 entries,
90 name_index,
91 })
92 }
93
94 pub fn get_by_name(&self, name: &str) -> Option<&RegistryEntry> {
96 self.name_index.get(name).map(|&idx| &self.entries[idx])
97 }
98
99 pub fn len(&self) -> usize {
101 self.entries.len()
102 }
103
104 pub fn is_empty(&self) -> bool {
106 self.entries.is_empty()
107 }
108
109 pub fn names(&self) -> Vec<&str> {
111 self.entries
112 .iter()
113 .map(|e| e.entity.name.as_str())
114 .collect()
115 }
116
117 pub fn entries(&self) -> &[RegistryEntry] {
119 &self.entries
120 }
121}
122
123fn load_directory(
127 dir: &Path,
128 label: Label,
129 entries: &mut Vec<RegistryEntry>,
130 errors: &mut Vec<ParseError>,
131) {
132 let mut paths = Vec::new();
133 collect_md_files(dir, &mut paths, 0);
134
135 paths.sort();
137
138 let results: Vec<ParseResult> = paths
140 .par_iter()
141 .map(|path| parse_entity_file(path, label))
142 .collect();
143
144 for result in results {
146 if let Some(entry) = result.entry {
147 entries.push(entry);
148 }
149 errors.extend(result.errors);
150 }
151}
152
153fn collect_md_files(dir: &Path, paths: &mut Vec<PathBuf>, depth: usize) {
156 const MAX_DEPTH: usize = 2;
157 if depth > MAX_DEPTH {
158 return;
159 }
160
161 let Ok(read_dir) = std::fs::read_dir(dir) else {
162 return;
163 };
164
165 let mut dir_entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
166 dir_entries.sort_by_key(std::fs::DirEntry::file_name);
167
168 for entry in dir_entries {
169 let path = entry.path();
170 if path.is_dir() {
171 collect_md_files(&path, paths, depth + 1);
172 } else if path.extension().and_then(|e| e.to_str()) == Some("md") {
173 paths.push(path);
174 }
175 }
176}
177
178struct ParseResult {
180 entry: Option<RegistryEntry>,
181 errors: Vec<ParseError>,
182}
183
184fn parse_entity_file(path: &Path, label: Label) -> ParseResult {
186 let content = match std::fs::read_to_string(path) {
187 Ok(c) => c,
188 Err(e) => {
189 return ParseResult {
190 entry: None,
191 errors: vec![ParseError {
192 line: 0,
193 message: format!("{}: error reading file: {e}", path.display()),
194 }],
195 };
196 }
197 };
198
199 let parsed = match crate::parser::parse_entity_file(&content) {
200 Ok(p) => p,
201 Err(parse_errors) => {
202 return ParseResult {
203 entry: None,
204 errors: parse_errors
205 .into_iter()
206 .map(|err| ParseError {
207 line: err.line,
208 message: format!("{}: {}", path.display(), err.message),
209 })
210 .collect(),
211 };
212 }
213 };
214
215 let mut field_errors = Vec::new();
216 let mut entity = crate::entity::parse_entity_file_body(
217 &parsed.name,
218 &parsed.body,
219 label,
220 parsed.id,
221 parsed.title_line,
222 &mut field_errors,
223 );
224 entity.tags = parsed.tags.clone();
225
226 let mut errors: Vec<ParseError> = field_errors
227 .into_iter()
228 .map(|err| ParseError {
229 line: err.line,
230 message: format!("{}: {}", path.display(), err.message),
231 })
232 .collect();
233
234 validate_filename(path, &entity, &mut errors);
236
237 ParseResult {
238 entry: Some(RegistryEntry {
239 entity,
240 path: path.to_path_buf(),
241 tags: parsed.tags,
242 }),
243 errors,
244 }
245}
246
247fn build_name_index(
249 entries: &[RegistryEntry],
250 errors: &mut Vec<ParseError>,
251) -> HashMap<String, usize> {
252 let mut index = HashMap::new();
253
254 for (i, entry) in entries.iter().enumerate() {
255 let name = &entry.entity.name;
256 if let Some(&existing_idx) = index.get(name.as_str()) {
257 let existing: &RegistryEntry = &entries[existing_idx];
258 errors.push(ParseError {
259 line: entry.entity.line,
260 message: format!(
261 "duplicate entity name {name:?} in {} (first defined in {})",
262 entry.path.display(),
263 existing.path.display(),
264 ),
265 });
266 } else {
267 index.insert(name.clone(), i);
268 }
269 }
270
271 index
272}
273
274fn validate_filename(path: &Path, entity: &Entity, errors: &mut Vec<ParseError>) {
277 let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
278 return;
279 };
280
281 if stem.len() > MAX_FILENAME_LEN {
282 errors.push(ParseError {
283 line: 0,
284 message: format!(
285 "warning: {}: filename stem exceeds {MAX_FILENAME_LEN} chars",
286 path.display()
287 ),
288 });
289 }
290
291 let expected_name = to_kebab_case(&entity.name);
292 let qualifier = entity
293 .fields
294 .iter()
295 .find(|(k, _)| k == "qualifier")
296 .and_then(|(_, v)| match v {
297 crate::entity::FieldValue::Single(s) => Some(s.as_str()),
298 crate::entity::FieldValue::List(_) => None,
299 });
300
301 let expected_stem = match qualifier {
302 Some(q) => format!("{expected_name}--{}", to_kebab_case(q)),
303 None => expected_name,
304 };
305
306 if stem != expected_stem {
307 errors.push(ParseError {
308 line: 0,
309 message: format!(
310 "warning: {}: filename {stem:?} doesn't match expected {expected_stem:?}",
311 path.display()
312 ),
313 });
314 }
315}
316
317fn to_kebab_case(s: &str) -> String {
319 s.chars()
320 .map(|c| {
321 if c.is_alphanumeric() {
322 c.to_ascii_lowercase()
323 } else {
324 '-'
325 }
326 })
327 .collect::<String>()
328 .split('-')
329 .filter(|p| !p.is_empty())
330 .collect::<Vec<_>>()
331 .join("-")
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337 use crate::entity::{Entity, FieldValue, Label};
338
339 fn make_entry(name: &str, label: Label, path: &str) -> RegistryEntry {
340 RegistryEntry {
341 entity: Entity {
342 name: name.to_string(),
343 label,
344 fields: Vec::new(),
345 id: None,
346 line: 1,
347 tags: Vec::new(),
348 },
349 path: PathBuf::from(path),
350 tags: Vec::new(),
351 }
352 }
353
354 #[test]
355 fn registry_from_entries_lookup() {
356 let entries = vec![
357 make_entry("Alice", Label::Person, "people/alice.md"),
358 make_entry("Corp Inc", Label::Organization, "organizations/corp-inc.md"),
359 ];
360
361 let registry = EntityRegistry::from_entries(entries).unwrap();
362 assert_eq!(registry.len(), 2);
363 assert!(registry.get_by_name("Alice").is_some());
364 assert!(registry.get_by_name("Corp Inc").is_some());
365 assert!(registry.get_by_name("Bob").is_none());
366 }
367
368 #[test]
369 fn registry_detects_duplicate_names() {
370 let entries = vec![
371 make_entry("Alice", Label::Person, "people/alice-a.md"),
372 make_entry("Alice", Label::Person, "people/alice-b.md"),
373 ];
374
375 let errors = EntityRegistry::from_entries(entries).unwrap_err();
376 assert!(errors.iter().any(|e| e.message.contains("duplicate")));
377 }
378
379 #[test]
380 fn registry_names_list() {
381 let entries = vec![
382 make_entry("Alice", Label::Person, "people/alice.md"),
383 make_entry("Bob", Label::Person, "people/bob.md"),
384 ];
385
386 let registry = EntityRegistry::from_entries(entries).unwrap();
387 let names = registry.names();
388 assert!(names.contains(&"Alice"));
389 assert!(names.contains(&"Bob"));
390 }
391
392 #[test]
393 fn to_kebab_case_conversion() {
394 assert_eq!(to_kebab_case("Mark Bonnick"), "mark-bonnick");
395 assert_eq!(to_kebab_case("Arsenal FC"), "arsenal-fc");
396 assert_eq!(
397 to_kebab_case("English Football Club"),
398 "english-football-club"
399 );
400 assert_eq!(to_kebab_case("Bob"), "bob");
401 }
402
403 #[test]
404 fn validate_filename_matching() {
405 let entity = Entity {
406 name: "Mark Bonnick".to_string(),
407 label: Label::Person,
408 fields: vec![(
409 "qualifier".to_string(),
410 FieldValue::Single("Arsenal Kit Manager".to_string()),
411 )],
412 id: None,
413 line: 1,
414 tags: Vec::new(),
415 };
416
417 let mut errors = Vec::new();
418
419 validate_filename(
421 Path::new("people/mark-bonnick--arsenal-kit-manager.md"),
422 &entity,
423 &mut errors,
424 );
425 assert!(errors.is_empty(), "errors: {errors:?}");
426
427 validate_filename(Path::new("people/wrong-name.md"), &entity, &mut errors);
429 assert!(errors.iter().any(|e| e.message.contains("warning:")));
430 }
431
432 #[test]
433 fn validate_filename_no_qualifier() {
434 let entity = Entity {
435 name: "Bob".to_string(),
436 label: Label::Person,
437 fields: Vec::new(),
438 id: None,
439 line: 1,
440 tags: Vec::new(),
441 };
442
443 let mut errors = Vec::new();
444 validate_filename(Path::new("people/bob.md"), &entity, &mut errors);
445 assert!(errors.is_empty(), "errors: {errors:?}");
446 }
447
448 #[test]
449 fn empty_registry() {
450 let registry = EntityRegistry::from_entries(Vec::new()).unwrap();
451 assert!(registry.is_empty());
452 assert_eq!(registry.len(), 0);
453 assert!(registry.get_by_name("anything").is_none());
454 }
455}