Skip to main content

weave_content/
lib.rs

1#![deny(unsafe_code)]
2#![deny(clippy::unwrap_used)]
3#![deny(clippy::expect_used)]
4#![allow(clippy::missing_errors_doc)]
5
6pub mod build_cache;
7pub mod cache;
8pub mod domain;
9pub mod entity;
10pub mod html;
11pub mod nulid_gen;
12pub mod output;
13pub mod parser;
14pub mod registry;
15pub mod relationship;
16pub mod tags;
17pub mod timeline;
18pub mod verifier;
19pub mod writeback;
20
21use std::collections::HashSet;
22
23use crate::entity::Entity;
24use crate::parser::{ParseError, ParsedCase, SectionKind};
25use crate::relationship::Rel;
26
27/// Parse a case file fully: front matter, entities, relationships, timeline.
28/// Returns the parsed case, inline entities, and relationships (including NEXT from timeline).
29///
30/// When a registry is provided, relationship and timeline names are resolved
31/// against both inline events AND the global entity registry.
32pub fn parse_full(
33    content: &str,
34    reg: Option<&registry::EntityRegistry>,
35) -> Result<(ParsedCase, Vec<Entity>, Vec<Rel>), Vec<ParseError>> {
36    let case = parser::parse(content)?;
37    let mut errors = Vec::new();
38
39    let mut all_entities = Vec::new();
40    for section in &case.sections {
41        if matches!(
42            section.kind,
43            SectionKind::Events | SectionKind::Documents | SectionKind::Assets
44        ) {
45            let entities =
46                entity::parse_entities(&section.body, section.kind, section.line, &mut errors);
47            all_entities.extend(entities);
48        }
49    }
50
51    // Build combined name set: inline events + registry entities
52    let mut entity_names: HashSet<&str> = all_entities.iter().map(|e| e.name.as_str()).collect();
53    if let Some(registry) = reg {
54        for name in registry.names() {
55            entity_names.insert(name);
56        }
57    }
58
59    let event_names: HashSet<&str> = all_entities
60        .iter()
61        .filter(|e| e.label == entity::Label::Event)
62        .map(|e| e.name.as_str())
63        .collect();
64
65    let mut all_rels = Vec::new();
66    for section in &case.sections {
67        if section.kind == SectionKind::Relationships {
68            let rels = relationship::parse_relationships(
69                &section.body,
70                section.line,
71                &entity_names,
72                &case.sources,
73                &mut errors,
74            );
75            all_rels.extend(rels);
76        }
77    }
78
79    for section in &case.sections {
80        if section.kind == SectionKind::Timeline {
81            let rels =
82                timeline::parse_timeline(&section.body, section.line, &event_names, &mut errors);
83            all_rels.extend(rels);
84        }
85    }
86
87    if errors.is_empty() {
88        Ok((case, all_entities, all_rels))
89    } else {
90        Err(errors)
91    }
92}
93
94/// Collect registry entities referenced by relationships in this case.
95pub fn collect_referenced_registry_entities(
96    rels: &[Rel],
97    inline_entities: &[Entity],
98    reg: &registry::EntityRegistry,
99) -> Vec<Entity> {
100    let inline_names: HashSet<&str> = inline_entities.iter().map(|e| e.name.as_str()).collect();
101    let mut referenced = Vec::new();
102    let mut seen_names: HashSet<String> = HashSet::new();
103
104    for rel in rels {
105        for name in [&rel.source_name, &rel.target_name] {
106            if !inline_names.contains(name.as_str())
107                && seen_names.insert(name.clone())
108                && let Some(entry) = reg.get_by_name(name)
109            {
110                referenced.push(entry.entity.clone());
111            }
112        }
113    }
114
115    referenced
116}
117
118/// Build a `CaseOutput` from a case file path.
119/// Handles parsing and ID writeback.
120pub fn build_case_output(
121    path: &str,
122    reg: &registry::EntityRegistry,
123) -> Result<output::CaseOutput, i32> {
124    let mut written = HashSet::new();
125    build_case_output_tracked(path, reg, &mut written)
126}
127
128/// Build a `CaseOutput` from a case file path, tracking which entity files
129/// have already been written back. This avoids re-reading entity files from
130/// disk when multiple cases reference the same shared entity.
131#[allow(clippy::implicit_hasher)]
132pub fn build_case_output_tracked(
133    path: &str,
134    reg: &registry::EntityRegistry,
135    written_entities: &mut HashSet<std::path::PathBuf>,
136) -> Result<output::CaseOutput, i32> {
137    let content = match std::fs::read_to_string(path) {
138        Ok(c) => c,
139        Err(e) => {
140            eprintln!("{path}: error reading file: {e}");
141            return Err(2);
142        }
143    };
144
145    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
146        Ok(result) => result,
147        Err(errors) => {
148            for err in &errors {
149                eprintln!("{path}:{err}");
150            }
151            return Err(1);
152        }
153    };
154
155    let referenced_entities = collect_referenced_registry_entities(&rels, &entities, reg);
156
157    let build_result = match output::build_output(
158        &case.id,
159        &case.title,
160        &case.summary,
161        &case.tags,
162        &case.sources,
163        &entities,
164        &rels,
165        &referenced_entities,
166    ) {
167        Ok(out) => out,
168        Err(errors) => {
169            for err in &errors {
170                eprintln!("{path}:{err}");
171            }
172            return Err(1);
173        }
174    };
175
176    let case_output = build_result.output;
177
178    // Write back generated IDs to source case file
179    if !build_result.case_pending.is_empty() {
180        let mut pending = build_result.case_pending;
181        if let Some(modified) = writeback::apply_writebacks(&content, &mut pending) {
182            if let Err(e) = writeback::write_file(std::path::Path::new(path), &modified) {
183                eprintln!("{e}");
184                return Err(2);
185            }
186            let count = pending.len();
187            eprintln!("{path}: wrote {count} generated ID(s) back to file");
188        }
189    }
190
191    // Write back generated IDs to entity files
192    if let Some(code) =
193        writeback_registry_entities(&build_result.registry_pending, reg, written_entities)
194    {
195        return Err(code);
196    }
197
198    eprintln!(
199        "{path}: built ({} nodes, {} relationships)",
200        case_output.nodes.len(),
201        case_output.relationships.len()
202    );
203    Ok(case_output)
204}
205
206/// Write back generated IDs to registry entity files.
207/// Tracks already-written paths in `written` to avoid redundant disk reads.
208/// Returns `Some(exit_code)` on error, `None` on success.
209fn writeback_registry_entities(
210    pending: &[(String, writeback::PendingId)],
211    reg: &registry::EntityRegistry,
212    written: &mut HashSet<std::path::PathBuf>,
213) -> Option<i32> {
214    for (entity_name, pending_id) in pending {
215        let Some(entry) = reg.get_by_name(entity_name) else {
216            continue;
217        };
218        let entity_path = &entry.path;
219
220        // Skip if this entity file was already written by a previous case.
221        if !written.insert(entity_path.clone()) {
222            continue;
223        }
224
225        // Also skip if the entity already has an ID in the registry
226        // (loaded from file at startup).
227        if entry.entity.id.is_some() {
228            continue;
229        }
230
231        let entity_content = match std::fs::read_to_string(entity_path) {
232            Ok(c) => c,
233            Err(e) => {
234                eprintln!("{}: error reading file: {e}", entity_path.display());
235                return Some(2);
236            }
237        };
238
239        let fm_end = writeback::find_front_matter_end(&entity_content);
240        let mut ids = vec![writeback::PendingId {
241            line: fm_end.unwrap_or(2),
242            id: pending_id.id.clone(),
243            kind: writeback::WriteBackKind::EntityFrontMatter,
244        }];
245        if let Some(modified) = writeback::apply_writebacks(&entity_content, &mut ids) {
246            if let Err(e) = writeback::write_file(entity_path, &modified) {
247                eprintln!("{e}");
248                return Some(2);
249            }
250            eprintln!("{}: wrote generated ID back to file", entity_path.display());
251        }
252    }
253    None
254}
255
256/// Check whether a file's YAML front matter already contains an `id:` field.
257#[cfg(test)]
258fn front_matter_has_id(content: &str) -> bool {
259    let mut in_front_matter = false;
260    for line in content.lines() {
261        let trimmed = line.trim();
262        if trimmed == "---" && !in_front_matter {
263            in_front_matter = true;
264        } else if trimmed == "---" && in_front_matter {
265            return false; // end of front matter, no id found
266        } else if in_front_matter && trimmed.starts_with("id:") {
267            return true;
268        }
269    }
270    false
271}
272
273/// Resolve the content root directory.
274///
275/// Priority: explicit `--root` flag > parent of given path > current directory.
276pub fn resolve_content_root(path: Option<&str>, root: Option<&str>) -> std::path::PathBuf {
277    if let Some(r) = root {
278        return std::path::PathBuf::from(r);
279    }
280    if let Some(p) = path {
281        let p = std::path::Path::new(p);
282        if p.is_file() {
283            if let Some(parent) = p.parent() {
284                for ancestor in parent.ancestors() {
285                    if ancestor.join("cases").is_dir()
286                        || ancestor.join("people").is_dir()
287                        || ancestor.join("organizations").is_dir()
288                    {
289                        return ancestor.to_path_buf();
290                    }
291                }
292                return parent.to_path_buf();
293            }
294        } else if p.is_dir() {
295            return p.to_path_buf();
296        }
297    }
298    std::path::PathBuf::from(".")
299}
300
301/// Load entity registry from content root. Returns empty registry if no entity dirs exist.
302pub fn load_registry(content_root: &std::path::Path) -> Result<registry::EntityRegistry, i32> {
303    match registry::EntityRegistry::load(content_root) {
304        Ok(reg) => Ok(reg),
305        Err(errors) => {
306            for err in &errors {
307                eprintln!("registry: {err}");
308            }
309            Err(1)
310        }
311    }
312}
313
314/// Load tag registry from content root. Returns empty registry if no tags.yaml exists.
315pub fn load_tag_registry(content_root: &std::path::Path) -> Result<tags::TagRegistry, i32> {
316    match tags::TagRegistry::load(content_root) {
317        Ok(reg) => Ok(reg),
318        Err(errors) => {
319            for err in &errors {
320                eprintln!("tags: {err}");
321            }
322            Err(1)
323        }
324    }
325}
326
327/// Resolve case file paths from path argument.
328/// If path is a file, returns just that file.
329/// If path is a directory (or None), auto-discovers `cases/**/*.md`.
330pub fn resolve_case_files(
331    path: Option<&str>,
332    content_root: &std::path::Path,
333) -> Result<Vec<String>, i32> {
334    if let Some(p) = path {
335        let p_path = std::path::Path::new(p);
336        if p_path.is_file() {
337            return Ok(vec![p.to_string()]);
338        }
339        if !p_path.is_dir() {
340            eprintln!("{p}: not a file or directory");
341            return Err(2);
342        }
343    }
344
345    let cases_dir = content_root.join("cases");
346    if !cases_dir.is_dir() {
347        return Ok(Vec::new());
348    }
349
350    let mut files = Vec::new();
351    discover_md_files(&cases_dir, &mut files, 0);
352    files.sort();
353    Ok(files)
354}
355
356/// Recursively discover .md files in a directory (max 5 levels deep for cases/country/category/year/).
357fn discover_md_files(dir: &std::path::Path, files: &mut Vec<String>, depth: usize) {
358    const MAX_DEPTH: usize = 5;
359    if depth > MAX_DEPTH {
360        return;
361    }
362
363    let Ok(entries) = std::fs::read_dir(dir) else {
364        return;
365    };
366
367    let mut entries: Vec<_> = entries.filter_map(Result::ok).collect();
368    entries.sort_by_key(std::fs::DirEntry::file_name);
369
370    for entry in entries {
371        let path = entry.path();
372        if path.is_dir() {
373            discover_md_files(&path, files, depth + 1);
374        } else if path.extension().and_then(|e| e.to_str()) == Some("md")
375            && let Some(s) = path.to_str()
376        {
377            files.push(s.to_string());
378        }
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn front_matter_has_id_present() {
388        let content = "---\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
389        assert!(front_matter_has_id(content));
390    }
391
392    #[test]
393    fn front_matter_has_id_absent() {
394        let content = "---\n---\n\n# Test\n";
395        assert!(!front_matter_has_id(content));
396    }
397
398    #[test]
399    fn front_matter_has_id_with_other_fields() {
400        let content = "---\nother: value\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
401        assert!(front_matter_has_id(content));
402    }
403
404    #[test]
405    fn front_matter_has_id_no_front_matter() {
406        let content = "# Test\n\nNo front matter here.\n";
407        assert!(!front_matter_has_id(content));
408    }
409
410    #[test]
411    fn front_matter_has_id_outside_front_matter() {
412        // `id:` appearing in the body should not count
413        let content = "---\n---\n\n# Test\n\n- id: some-value\n";
414        assert!(!front_matter_has_id(content));
415    }
416}