use std::collections::HashMap;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use crate::entity::{Entity, Label};
use crate::parser::ParseError;
const MAX_FILENAME_LEN: usize = 200;
#[derive(Debug)]
pub struct RegistryEntry {
pub entity: Entity,
pub path: PathBuf,
pub tags: Vec<String>,
}
#[derive(Debug)]
pub struct EntityRegistry {
entries: Vec<RegistryEntry>,
name_index: HashMap<String, usize>,
content_root: Option<PathBuf>,
}
impl EntityRegistry {
pub fn load(root: &Path) -> Result<Self, Vec<ParseError>> {
let mut entries = Vec::new();
let mut errors = Vec::new();
let actor_dir = root.join("people");
let institution_dir = root.join("organizations");
load_directory(&actor_dir, Label::Person, &mut entries, &mut errors);
load_directory(
&institution_dir,
Label::Organization,
&mut entries,
&mut errors,
);
let name_index = build_name_index(&entries, &mut errors);
if errors.iter().any(|e| e.message.starts_with("duplicate")) {
return Err(errors);
}
if errors.iter().any(|e| !e.message.starts_with("warning:")) {
return Err(errors);
}
if !errors.is_empty() {
for err in &errors {
eprintln!("{err}");
}
}
Ok(Self {
entries,
name_index,
content_root: Some(root.to_path_buf()),
})
}
pub fn from_entries(entries: Vec<RegistryEntry>) -> Result<Self, Vec<ParseError>> {
let mut errors = Vec::new();
let name_index = build_name_index(&entries, &mut errors);
let has_errors = errors.iter().any(|e| !e.message.starts_with("warning:"));
if has_errors {
return Err(errors);
}
Ok(Self {
entries,
name_index,
content_root: None,
})
}
pub fn get_by_name(&self, name: &str) -> Option<&RegistryEntry> {
self.name_index.get(name).map(|&idx| &self.entries[idx])
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn names(&self) -> Vec<&str> {
self.entries
.iter()
.map(|e| e.entity.name.as_str())
.collect()
}
pub fn entries(&self) -> &[RegistryEntry] {
&self.entries
}
pub fn slug_for(&self, entry: &RegistryEntry) -> Option<String> {
let root = self.content_root.as_ref()?;
path_to_slug(&entry.path, root)
}
pub fn content_root(&self) -> Option<&Path> {
self.content_root.as_deref()
}
}
pub fn path_to_slug(path: &Path, content_root: &Path) -> Option<String> {
let relative = path.strip_prefix(content_root).ok()?;
let s = relative.to_str()?;
Some(s.strip_suffix(".md").unwrap_or(s).to_string())
}
fn load_directory(
dir: &Path,
label: Label,
entries: &mut Vec<RegistryEntry>,
errors: &mut Vec<ParseError>,
) {
let mut paths = Vec::new();
collect_md_files(dir, &mut paths, 0);
paths.sort();
let results: Vec<ParseResult> = paths
.par_iter()
.map(|path| parse_entity_file(path, label))
.collect();
for result in results {
if let Some(entry) = result.entry {
entries.push(entry);
}
errors.extend(result.errors);
}
}
fn collect_md_files(dir: &Path, paths: &mut Vec<PathBuf>, depth: usize) {
const MAX_DEPTH: usize = 2;
if depth > MAX_DEPTH {
return;
}
let Ok(read_dir) = std::fs::read_dir(dir) else {
return;
};
let mut dir_entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
dir_entries.sort_by_key(std::fs::DirEntry::file_name);
for entry in dir_entries {
let path = entry.path();
if path.is_dir() {
collect_md_files(&path, paths, depth + 1);
} else if path.extension().and_then(|e| e.to_str()) == Some("md") {
paths.push(path);
}
}
}
struct ParseResult {
entry: Option<RegistryEntry>,
errors: Vec<ParseError>,
}
fn parse_entity_file(path: &Path, label: Label) -> ParseResult {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
return ParseResult {
entry: None,
errors: vec![ParseError {
line: 0,
message: format!("{}: error reading file: {e}", path.display()),
}],
};
}
};
let parsed = match crate::parser::parse_entity_file(&content) {
Ok(p) => p,
Err(parse_errors) => {
return ParseResult {
entry: None,
errors: parse_errors
.into_iter()
.map(|err| ParseError {
line: err.line,
message: format!("{}: {}", path.display(), err.message),
})
.collect(),
};
}
};
let mut field_errors = Vec::new();
let mut entity = crate::entity::parse_entity_file_body(
&parsed.name,
&parsed.body,
label,
parsed.id,
parsed.title_line,
&mut field_errors,
);
entity.tags.clone_from(&parsed.tags);
let mut errors: Vec<ParseError> = field_errors
.into_iter()
.map(|err| ParseError {
line: err.line,
message: format!("{}: {}", path.display(), err.message),
})
.collect();
validate_filename(path, &entity, &mut errors);
ParseResult {
entry: Some(RegistryEntry {
entity,
path: path.to_path_buf(),
tags: parsed.tags,
}),
errors,
}
}
fn build_name_index(
entries: &[RegistryEntry],
errors: &mut Vec<ParseError>,
) -> HashMap<String, usize> {
let mut index = HashMap::new();
for (i, entry) in entries.iter().enumerate() {
let name = &entry.entity.name;
if let Some(&existing_idx) = index.get(name.as_str()) {
let existing: &RegistryEntry = &entries[existing_idx];
errors.push(ParseError {
line: entry.entity.line,
message: format!(
"duplicate entity name {name:?} in {} (first defined in {})",
entry.path.display(),
existing.path.display(),
),
});
} else {
index.insert(name.clone(), i);
}
}
index
}
fn validate_filename(path: &Path, entity: &Entity, errors: &mut Vec<ParseError>) {
let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
return;
};
if stem.len() > MAX_FILENAME_LEN {
errors.push(ParseError {
line: 0,
message: format!(
"warning: {}: filename stem exceeds {MAX_FILENAME_LEN} chars",
path.display()
),
});
}
let expected_name = to_kebab_case(&entity.name);
let qualifier = entity
.fields
.iter()
.find(|(k, _)| k == "qualifier")
.and_then(|(_, v)| match v {
crate::entity::FieldValue::Single(s) => Some(s.as_str()),
crate::entity::FieldValue::List(_) => None,
});
let expected_stem = match qualifier {
Some(q) => format!("{expected_name}--{}", to_kebab_case(q)),
None => expected_name,
};
if stem != expected_stem {
errors.push(ParseError {
line: 0,
message: format!(
"warning: {}: filename {stem:?} doesn't match expected {expected_stem:?}",
path.display()
),
});
}
}
fn to_kebab_case(s: &str) -> String {
s.chars()
.map(|c| {
if c.is_alphanumeric() {
c.to_ascii_lowercase()
} else {
'-'
}
})
.collect::<String>()
.split('-')
.filter(|p| !p.is_empty())
.collect::<Vec<_>>()
.join("-")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entity::{Entity, FieldValue, Label};
fn make_entry(name: &str, label: Label, path: &str) -> RegistryEntry {
RegistryEntry {
entity: Entity {
name: name.to_string(),
label,
fields: Vec::new(),
id: None,
line: 1,
tags: Vec::new(),
slug: None,
},
path: PathBuf::from(path),
tags: Vec::new(),
}
}
#[test]
fn registry_from_entries_lookup() {
let entries = vec![
make_entry("Alice", Label::Person, "people/alice.md"),
make_entry("Corp Inc", Label::Organization, "organizations/corp-inc.md"),
];
let registry = EntityRegistry::from_entries(entries).unwrap();
assert_eq!(registry.len(), 2);
assert!(registry.get_by_name("Alice").is_some());
assert!(registry.get_by_name("Corp Inc").is_some());
assert!(registry.get_by_name("Bob").is_none());
}
#[test]
fn registry_detects_duplicate_names() {
let entries = vec![
make_entry("Alice", Label::Person, "people/alice-a.md"),
make_entry("Alice", Label::Person, "people/alice-b.md"),
];
let errors = EntityRegistry::from_entries(entries).unwrap_err();
assert!(errors.iter().any(|e| e.message.contains("duplicate")));
}
#[test]
fn registry_names_list() {
let entries = vec![
make_entry("Alice", Label::Person, "people/alice.md"),
make_entry("Bob", Label::Person, "people/bob.md"),
];
let registry = EntityRegistry::from_entries(entries).unwrap();
let names = registry.names();
assert!(names.contains(&"Alice"));
assert!(names.contains(&"Bob"));
}
#[test]
fn to_kebab_case_conversion() {
assert_eq!(to_kebab_case("Mark Bonnick"), "mark-bonnick");
assert_eq!(to_kebab_case("Arsenal FC"), "arsenal-fc");
assert_eq!(
to_kebab_case("English Football Club"),
"english-football-club"
);
assert_eq!(to_kebab_case("Bob"), "bob");
}
#[test]
fn validate_filename_matching() {
let entity = Entity {
name: "Mark Bonnick".to_string(),
label: Label::Person,
fields: vec![(
"qualifier".to_string(),
FieldValue::Single("Arsenal Kit Manager".to_string()),
)],
id: None,
line: 1,
tags: Vec::new(),
slug: None,
};
let mut errors = Vec::new();
validate_filename(
Path::new("people/mark-bonnick--arsenal-kit-manager.md"),
&entity,
&mut errors,
);
assert!(errors.is_empty(), "errors: {errors:?}");
validate_filename(Path::new("people/wrong-name.md"), &entity, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("warning:")));
}
#[test]
fn validate_filename_no_qualifier() {
let entity = Entity {
name: "Bob".to_string(),
label: Label::Person,
fields: Vec::new(),
id: None,
line: 1,
tags: Vec::new(),
slug: None,
};
let mut errors = Vec::new();
validate_filename(Path::new("people/bob.md"), &entity, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
}
#[test]
fn empty_registry() {
let registry = EntityRegistry::from_entries(Vec::new()).unwrap();
assert!(registry.is_empty());
assert_eq!(registry.len(), 0);
assert!(registry.get_by_name("anything").is_none());
}
}