Skip to main content

weave_content/
lib.rs

1#![deny(unsafe_code)]
2#![deny(clippy::unwrap_used)]
3#![deny(clippy::expect_used)]
4#![allow(clippy::missing_errors_doc)]
5
6pub mod cache;
7pub mod entity;
8pub mod nulid_gen;
9pub mod output;
10pub mod parser;
11pub mod registry;
12pub mod relationship;
13pub mod timeline;
14pub mod verifier;
15pub mod writeback;
16
17use crate::entity::Entity;
18use crate::parser::{ParseError, ParsedCase, SectionKind};
19use crate::relationship::Rel;
20
21/// Parse a case file fully: front matter, entities, relationships, timeline.
22/// Returns the parsed case, inline entities, and relationships (including NEXT from timeline).
23///
24/// When a registry is provided, relationship and timeline names are resolved
25/// against both inline events AND the global entity registry.
26pub fn parse_full(
27    content: &str,
28    reg: Option<&registry::EntityRegistry>,
29) -> Result<(ParsedCase, Vec<Entity>, Vec<Rel>), Vec<ParseError>> {
30    let case = parser::parse(content)?;
31    let mut errors = Vec::new();
32
33    let mut all_entities = Vec::new();
34    for section in &case.sections {
35        if section.kind == SectionKind::Events {
36            let entities =
37                entity::parse_entities(&section.body, section.kind, section.line, &mut errors);
38            all_entities.extend(entities);
39        }
40    }
41
42    // Build combined name list: inline events + registry entities
43    let mut entity_names: Vec<&str> = all_entities.iter().map(|e| e.name.as_str()).collect();
44    if let Some(registry) = reg {
45        for name in registry.names() {
46            if !entity_names.contains(&name) {
47                entity_names.push(name);
48            }
49        }
50    }
51
52    let event_names: Vec<&str> = all_entities
53        .iter()
54        .filter(|e| e.label == entity::Label::PublicRecord)
55        .map(|e| e.name.as_str())
56        .collect();
57
58    let mut all_rels = Vec::new();
59    for section in &case.sections {
60        if section.kind == SectionKind::Relationships {
61            let rels = relationship::parse_relationships(
62                &section.body,
63                section.line,
64                &entity_names,
65                &case.sources,
66                &mut errors,
67            );
68            all_rels.extend(rels);
69        }
70    }
71
72    for section in &case.sections {
73        if section.kind == SectionKind::Timeline {
74            let rels =
75                timeline::parse_timeline(&section.body, section.line, &event_names, &mut errors);
76            all_rels.extend(rels);
77        }
78    }
79
80    if errors.is_empty() {
81        Ok((case, all_entities, all_rels))
82    } else {
83        Err(errors)
84    }
85}
86
87/// Collect registry entities referenced by relationships in this case.
88pub fn collect_referenced_registry_entities(
89    rels: &[Rel],
90    inline_entities: &[Entity],
91    reg: &registry::EntityRegistry,
92) -> Vec<Entity> {
93    let inline_names: Vec<&str> = inline_entities.iter().map(|e| e.name.as_str()).collect();
94    let mut referenced = Vec::new();
95    let mut seen_names: Vec<String> = Vec::new();
96
97    for rel in rels {
98        for name in [&rel.source_name, &rel.target_name] {
99            if !inline_names.contains(&name.as_str())
100                && !seen_names.contains(name)
101                && let Some(entry) = reg.get_by_name(name)
102            {
103                referenced.push(entry.entity.clone());
104                seen_names.push(name.clone());
105            }
106        }
107    }
108
109    referenced
110}
111
112/// Build a `CaseOutput` from a case file path.
113/// Handles parsing and ID writeback.
114pub fn build_case_output(
115    path: &str,
116    reg: &registry::EntityRegistry,
117) -> Result<output::CaseOutput, i32> {
118    let content = match std::fs::read_to_string(path) {
119        Ok(c) => c,
120        Err(e) => {
121            eprintln!("{path}: error reading file: {e}");
122            return Err(2);
123        }
124    };
125
126    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
127        Ok(result) => result,
128        Err(errors) => {
129            for err in &errors {
130                eprintln!("{path}:{err}");
131            }
132            return Err(1);
133        }
134    };
135
136    let referenced_entities = collect_referenced_registry_entities(&rels, &entities, reg);
137
138    let build_result = match output::build_output(
139        &case.id,
140        &case.title,
141        &case.summary,
142        &case.sources,
143        &entities,
144        &rels,
145        &referenced_entities,
146    ) {
147        Ok(out) => out,
148        Err(errors) => {
149            for err in &errors {
150                eprintln!("{path}:{err}");
151            }
152            return Err(1);
153        }
154    };
155
156    let case_output = build_result.output;
157
158    // Write back generated IDs to source case file
159    if !build_result.case_pending.is_empty() {
160        let mut pending = build_result.case_pending;
161        if let Some(modified) = writeback::apply_writebacks(&content, &mut pending) {
162            if let Err(e) = writeback::write_file(std::path::Path::new(path), &modified) {
163                eprintln!("{e}");
164                return Err(2);
165            }
166            let count = pending.len();
167            eprintln!("{path}: wrote {count} generated ID(s) back to file");
168        }
169    }
170
171    // Write back generated IDs to entity files
172    if let Some(code) = writeback_registry_entities(&build_result.registry_pending, reg) {
173        return Err(code);
174    }
175
176    eprintln!(
177        "{path}: built ({} nodes, {} relationships)",
178        case_output.nodes.len(),
179        case_output.relationships.len()
180    );
181    Ok(case_output)
182}
183
184/// Write back generated IDs to registry entity files.
185/// Returns `Some(exit_code)` on error, `None` on success.
186fn writeback_registry_entities(
187    pending: &[(String, writeback::PendingId)],
188    reg: &registry::EntityRegistry,
189) -> Option<i32> {
190    for (entity_name, pending_id) in pending {
191        let Some(entry) = reg.get_by_name(entity_name) else {
192            continue;
193        };
194        let entity_path = &entry.path;
195        let entity_content = match std::fs::read_to_string(entity_path) {
196            Ok(c) => c,
197            Err(e) => {
198                eprintln!("{}: error reading file: {e}", entity_path.display());
199                return Some(2);
200            }
201        };
202
203        // Skip if the file already has an `id:` in its front matter.
204        // This happens when multiple cases reference the same shared entity:
205        // the first case writes the ID, but the in-memory registry still has
206        // `id: None`, so subsequent cases generate a new ID. Re-reading the
207        // file here catches the already-written ID and avoids duplicates.
208        if front_matter_has_id(&entity_content) {
209            continue;
210        }
211
212        let fm_end = writeback::find_front_matter_end(&entity_content);
213        let mut ids = vec![writeback::PendingId {
214            line: fm_end.unwrap_or(2),
215            id: pending_id.id.clone(),
216            kind: writeback::WriteBackKind::EntityFrontMatter,
217        }];
218        if let Some(modified) = writeback::apply_writebacks(&entity_content, &mut ids) {
219            if let Err(e) = writeback::write_file(entity_path, &modified) {
220                eprintln!("{e}");
221                return Some(2);
222            }
223            eprintln!("{}: wrote generated ID back to file", entity_path.display());
224        }
225    }
226    None
227}
228
229/// Check whether a file's YAML front matter already contains an `id:` field.
230fn front_matter_has_id(content: &str) -> bool {
231    let mut in_front_matter = false;
232    for line in content.lines() {
233        let trimmed = line.trim();
234        if trimmed == "---" && !in_front_matter {
235            in_front_matter = true;
236        } else if trimmed == "---" && in_front_matter {
237            return false; // end of front matter, no id found
238        } else if in_front_matter && trimmed.starts_with("id:") {
239            return true;
240        }
241    }
242    false
243}
244
245/// Resolve the content root directory.
246///
247/// Priority: explicit `--root` flag > parent of given path > current directory.
248pub fn resolve_content_root(path: Option<&str>, root: Option<&str>) -> std::path::PathBuf {
249    if let Some(r) = root {
250        return std::path::PathBuf::from(r);
251    }
252    if let Some(p) = path {
253        let p = std::path::Path::new(p);
254        if p.is_file() {
255            if let Some(parent) = p.parent() {
256                for ancestor in parent.ancestors() {
257                    if ancestor.join("cases").is_dir()
258                        || ancestor.join("actors").is_dir()
259                        || ancestor.join("institutions").is_dir()
260                    {
261                        return ancestor.to_path_buf();
262                    }
263                }
264                return parent.to_path_buf();
265            }
266        } else if p.is_dir() {
267            return p.to_path_buf();
268        }
269    }
270    std::path::PathBuf::from(".")
271}
272
273/// Load entity registry from content root. Returns empty registry if no entity dirs exist.
274pub fn load_registry(content_root: &std::path::Path) -> Result<registry::EntityRegistry, i32> {
275    match registry::EntityRegistry::load(content_root) {
276        Ok(reg) => Ok(reg),
277        Err(errors) => {
278            for err in &errors {
279                eprintln!("registry: {err}");
280            }
281            Err(1)
282        }
283    }
284}
285
286/// Resolve case file paths from path argument.
287/// If path is a file, returns just that file.
288/// If path is a directory (or None), auto-discovers `cases/**/*.md`.
289pub fn resolve_case_files(
290    path: Option<&str>,
291    content_root: &std::path::Path,
292) -> Result<Vec<String>, i32> {
293    if let Some(p) = path {
294        let p_path = std::path::Path::new(p);
295        if p_path.is_file() {
296            return Ok(vec![p.to_string()]);
297        }
298        if !p_path.is_dir() {
299            eprintln!("{p}: not a file or directory");
300            return Err(2);
301        }
302    }
303
304    let cases_dir = content_root.join("cases");
305    if !cases_dir.is_dir() {
306        return Ok(Vec::new());
307    }
308
309    let mut files = Vec::new();
310    discover_md_files(&cases_dir, &mut files, 0);
311    files.sort();
312    Ok(files)
313}
314
315/// Recursively discover .md files in a directory (max 3 levels deep for cases/year/topic/).
316fn discover_md_files(dir: &std::path::Path, files: &mut Vec<String>, depth: usize) {
317    const MAX_DEPTH: usize = 3;
318    if depth > MAX_DEPTH {
319        return;
320    }
321
322    let Ok(entries) = std::fs::read_dir(dir) else {
323        return;
324    };
325
326    let mut entries: Vec<_> = entries.filter_map(Result::ok).collect();
327    entries.sort_by_key(std::fs::DirEntry::file_name);
328
329    for entry in entries {
330        let path = entry.path();
331        if path.is_dir() {
332            discover_md_files(&path, files, depth + 1);
333        } else if path.extension().and_then(|e| e.to_str()) == Some("md")
334            && let Some(s) = path.to_str()
335        {
336            files.push(s.to_string());
337        }
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344
345    #[test]
346    fn front_matter_has_id_present() {
347        let content = "---\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
348        assert!(front_matter_has_id(content));
349    }
350
351    #[test]
352    fn front_matter_has_id_absent() {
353        let content = "---\n---\n\n# Test\n";
354        assert!(!front_matter_has_id(content));
355    }
356
357    #[test]
358    fn front_matter_has_id_with_other_fields() {
359        let content = "---\nother: value\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
360        assert!(front_matter_has_id(content));
361    }
362
363    #[test]
364    fn front_matter_has_id_no_front_matter() {
365        let content = "# Test\n\nNo front matter here.\n";
366        assert!(!front_matter_has_id(content));
367    }
368
369    #[test]
370    fn front_matter_has_id_outside_front_matter() {
371        // `id:` appearing in the body should not count
372        let content = "---\n---\n\n# Test\n\n- id: some-value\n";
373        assert!(!front_matter_has_id(content));
374    }
375}