Skip to main content

weave_content/
lib.rs

1#![deny(unsafe_code)]
2#![deny(clippy::unwrap_used)]
3#![deny(clippy::expect_used)]
4#![allow(clippy::missing_errors_doc)]
5
6pub mod build_cache;
7pub mod cache;
8pub mod domain;
9pub mod entity;
10pub mod html;
11pub mod nulid_gen;
12pub mod output;
13pub mod parser;
14pub mod registry;
15pub mod relationship;
16pub mod tags;
17pub mod timeline;
18pub mod verifier;
19pub mod writeback;
20
21use std::collections::HashSet;
22
23use crate::entity::Entity;
24use crate::parser::{ParseError, ParsedCase, SectionKind};
25use crate::relationship::Rel;
26
27/// Parse a case file fully: front matter, entities, relationships, timeline.
28/// Returns the parsed case, inline entities, and relationships (including NEXT from timeline).
29///
30/// When a registry is provided, relationship and timeline names are resolved
31/// against both inline events AND the global entity registry.
32pub fn parse_full(
33    content: &str,
34    reg: Option<&registry::EntityRegistry>,
35) -> Result<(ParsedCase, Vec<Entity>, Vec<Rel>), Vec<ParseError>> {
36    let case = parser::parse(content)?;
37    let mut errors = Vec::new();
38
39    let mut all_entities = Vec::new();
40    for section in &case.sections {
41        if matches!(
42            section.kind,
43            SectionKind::Events | SectionKind::Documents | SectionKind::Assets
44        ) {
45            let entities =
46                entity::parse_entities(&section.body, section.kind, section.line, &mut errors);
47            all_entities.extend(entities);
48        }
49    }
50
51    // Build combined name set: inline events + registry entities
52    let mut entity_names: HashSet<&str> = all_entities.iter().map(|e| e.name.as_str()).collect();
53    if let Some(registry) = reg {
54        for name in registry.names() {
55            entity_names.insert(name);
56        }
57    }
58
59    let event_names: HashSet<&str> = all_entities
60        .iter()
61        .filter(|e| e.label == entity::Label::Event)
62        .map(|e| e.name.as_str())
63        .collect();
64
65    let mut all_rels = Vec::new();
66    for section in &case.sections {
67        if section.kind == SectionKind::Relationships {
68            let rels = relationship::parse_relationships(
69                &section.body,
70                section.line,
71                &entity_names,
72                &case.sources,
73                &mut errors,
74            );
75            all_rels.extend(rels);
76        }
77    }
78
79    for section in &case.sections {
80        if section.kind == SectionKind::Timeline {
81            let rels =
82                timeline::parse_timeline(&section.body, section.line, &event_names, &mut errors);
83            all_rels.extend(rels);
84        }
85    }
86
87    if errors.is_empty() {
88        Ok((case, all_entities, all_rels))
89    } else {
90        Err(errors)
91    }
92}
93
94/// Collect registry entities referenced by relationships in this case.
95/// Sets the `slug` field on each entity from the registry's file path.
96pub fn collect_referenced_registry_entities(
97    rels: &[Rel],
98    inline_entities: &[Entity],
99    reg: &registry::EntityRegistry,
100) -> Vec<Entity> {
101    let inline_names: HashSet<&str> = inline_entities.iter().map(|e| e.name.as_str()).collect();
102    let mut referenced = Vec::new();
103    let mut seen_names: HashSet<String> = HashSet::new();
104
105    for rel in rels {
106        for name in [&rel.source_name, &rel.target_name] {
107            if !inline_names.contains(name.as_str())
108                && seen_names.insert(name.clone())
109                && let Some(entry) = reg.get_by_name(name)
110            {
111                let mut entity = entry.entity.clone();
112                entity.slug = reg.slug_for(entry);
113                referenced.push(entity);
114            }
115        }
116    }
117
118    referenced
119}
120
121/// Build a `CaseOutput` from a case file path.
122/// Handles parsing and ID writeback.
123pub fn build_case_output(
124    path: &str,
125    reg: &registry::EntityRegistry,
126) -> Result<output::CaseOutput, i32> {
127    let mut written = HashSet::new();
128    build_case_output_tracked(path, reg, &mut written)
129}
130
131/// Build a `CaseOutput` from a case file path, tracking which entity files
132/// have already been written back. This avoids re-reading entity files from
133/// disk when multiple cases reference the same shared entity.
134#[allow(clippy::implicit_hasher)]
135pub fn build_case_output_tracked(
136    path: &str,
137    reg: &registry::EntityRegistry,
138    written_entities: &mut HashSet<std::path::PathBuf>,
139) -> Result<output::CaseOutput, i32> {
140    let content = match std::fs::read_to_string(path) {
141        Ok(c) => c,
142        Err(e) => {
143            eprintln!("{path}: error reading file: {e}");
144            return Err(2);
145        }
146    };
147
148    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
149        Ok(result) => result,
150        Err(errors) => {
151            for err in &errors {
152                eprintln!("{path}:{err}");
153            }
154            return Err(1);
155        }
156    };
157
158    let referenced_entities = collect_referenced_registry_entities(&rels, &entities, reg);
159
160    // Resolve case NULID
161    let (case_nulid, case_nulid_generated) = match nulid_gen::resolve_id(case.nulid.as_deref(), 1) {
162        Ok(result) => result,
163        Err(err) => {
164            eprintln!("{path}:{err}");
165            return Err(1);
166        }
167    };
168    let case_nulid_str = case_nulid.to_string();
169
170    // Compute case slug from file path
171    let case_slug = reg
172        .content_root()
173        .and_then(|root| registry::path_to_slug(std::path::Path::new(path), root));
174
175    let build_result = match output::build_output(
176        &case.id,
177        &case_nulid_str,
178        &case.title,
179        &case.summary,
180        &case.tags,
181        case_slug.as_deref(),
182        case.case_type.as_deref(),
183        case.status.as_deref(),
184        &case.sources,
185        &entities,
186        &rels,
187        &referenced_entities,
188    ) {
189        Ok(out) => out,
190        Err(errors) => {
191            for err in &errors {
192                eprintln!("{path}:{err}");
193            }
194            return Err(1);
195        }
196    };
197
198    let case_output = build_result.output;
199
200    // Write back generated IDs to source case file
201    let mut case_pending = build_result.case_pending;
202    if case_nulid_generated {
203        case_pending.push(writeback::PendingId {
204            line: writeback::find_front_matter_end(&content).unwrap_or(2),
205            id: case_nulid_str.clone(),
206            kind: writeback::WriteBackKind::CaseNulid,
207        });
208    }
209    if !case_pending.is_empty()
210        && let Some(modified) = writeback::apply_writebacks(&content, &mut case_pending)
211    {
212        if let Err(e) = writeback::write_file(std::path::Path::new(path), &modified) {
213            eprintln!("{e}");
214            return Err(2);
215        }
216        let count = case_pending.len();
217        eprintln!("{path}: wrote {count} generated ID(s) back to file");
218    }
219
220    // Write back generated IDs to entity files
221    if let Some(code) =
222        writeback_registry_entities(&build_result.registry_pending, reg, written_entities)
223    {
224        return Err(code);
225    }
226
227    eprintln!(
228        "{path}: built ({} nodes, {} relationships)",
229        case_output.nodes.len(),
230        case_output.relationships.len()
231    );
232    Ok(case_output)
233}
234
235/// Write back generated IDs to registry entity files.
236/// Tracks already-written paths in `written` to avoid redundant disk reads.
237/// Returns `Some(exit_code)` on error, `None` on success.
238fn writeback_registry_entities(
239    pending: &[(String, writeback::PendingId)],
240    reg: &registry::EntityRegistry,
241    written: &mut HashSet<std::path::PathBuf>,
242) -> Option<i32> {
243    for (entity_name, pending_id) in pending {
244        let Some(entry) = reg.get_by_name(entity_name) else {
245            continue;
246        };
247        let entity_path = &entry.path;
248
249        // Skip if this entity file was already written by a previous case.
250        if !written.insert(entity_path.clone()) {
251            continue;
252        }
253
254        // Also skip if the entity already has an ID in the registry
255        // (loaded from file at startup).
256        if entry.entity.id.is_some() {
257            continue;
258        }
259
260        let entity_content = match std::fs::read_to_string(entity_path) {
261            Ok(c) => c,
262            Err(e) => {
263                eprintln!("{}: error reading file: {e}", entity_path.display());
264                return Some(2);
265            }
266        };
267
268        let fm_end = writeback::find_front_matter_end(&entity_content);
269        let mut ids = vec![writeback::PendingId {
270            line: fm_end.unwrap_or(2),
271            id: pending_id.id.clone(),
272            kind: writeback::WriteBackKind::EntityFrontMatter,
273        }];
274        if let Some(modified) = writeback::apply_writebacks(&entity_content, &mut ids) {
275            if let Err(e) = writeback::write_file(entity_path, &modified) {
276                eprintln!("{e}");
277                return Some(2);
278            }
279            eprintln!("{}: wrote generated ID back to file", entity_path.display());
280        }
281    }
282    None
283}
284
285/// Check whether a file's YAML front matter already contains an `id:` field.
286#[cfg(test)]
287fn front_matter_has_id(content: &str) -> bool {
288    let mut in_front_matter = false;
289    for line in content.lines() {
290        let trimmed = line.trim();
291        if trimmed == "---" && !in_front_matter {
292            in_front_matter = true;
293        } else if trimmed == "---" && in_front_matter {
294            return false; // end of front matter, no id found
295        } else if in_front_matter && trimmed.starts_with("id:") {
296            return true;
297        }
298    }
299    false
300}
301
302/// Resolve the content root directory.
303///
304/// Priority: explicit `--root` flag > parent of given path > current directory.
305pub fn resolve_content_root(path: Option<&str>, root: Option<&str>) -> std::path::PathBuf {
306    if let Some(r) = root {
307        return std::path::PathBuf::from(r);
308    }
309    if let Some(p) = path {
310        let p = std::path::Path::new(p);
311        if p.is_file() {
312            if let Some(parent) = p.parent() {
313                for ancestor in parent.ancestors() {
314                    if ancestor.join("cases").is_dir()
315                        || ancestor.join("people").is_dir()
316                        || ancestor.join("organizations").is_dir()
317                    {
318                        return ancestor.to_path_buf();
319                    }
320                }
321                return parent.to_path_buf();
322            }
323        } else if p.is_dir() {
324            return p.to_path_buf();
325        }
326    }
327    std::path::PathBuf::from(".")
328}
329
330/// Load entity registry from content root. Returns empty registry if no entity dirs exist.
331pub fn load_registry(content_root: &std::path::Path) -> Result<registry::EntityRegistry, i32> {
332    match registry::EntityRegistry::load(content_root) {
333        Ok(reg) => Ok(reg),
334        Err(errors) => {
335            for err in &errors {
336                eprintln!("registry: {err}");
337            }
338            Err(1)
339        }
340    }
341}
342
343/// Load tag registry from content root. Returns empty registry if no tags.yaml exists.
344pub fn load_tag_registry(content_root: &std::path::Path) -> Result<tags::TagRegistry, i32> {
345    match tags::TagRegistry::load(content_root) {
346        Ok(reg) => Ok(reg),
347        Err(errors) => {
348            for err in &errors {
349                eprintln!("tags: {err}");
350            }
351            Err(1)
352        }
353    }
354}
355
356/// Resolve case file paths from path argument.
357/// If path is a file, returns just that file.
358/// If path is a directory (or None), auto-discovers `cases/**/*.md`.
359pub fn resolve_case_files(
360    path: Option<&str>,
361    content_root: &std::path::Path,
362) -> Result<Vec<String>, i32> {
363    if let Some(p) = path {
364        let p_path = std::path::Path::new(p);
365        if p_path.is_file() {
366            return Ok(vec![p.to_string()]);
367        }
368        if !p_path.is_dir() {
369            eprintln!("{p}: not a file or directory");
370            return Err(2);
371        }
372    }
373
374    let cases_dir = content_root.join("cases");
375    if !cases_dir.is_dir() {
376        return Ok(Vec::new());
377    }
378
379    let mut files = Vec::new();
380    discover_md_files(&cases_dir, &mut files, 0);
381    files.sort();
382    Ok(files)
383}
384
385/// Recursively discover .md files in a directory (max 5 levels deep for cases/country/category/year/).
386fn discover_md_files(dir: &std::path::Path, files: &mut Vec<String>, depth: usize) {
387    const MAX_DEPTH: usize = 5;
388    if depth > MAX_DEPTH {
389        return;
390    }
391
392    let Ok(entries) = std::fs::read_dir(dir) else {
393        return;
394    };
395
396    let mut entries: Vec<_> = entries.filter_map(Result::ok).collect();
397    entries.sort_by_key(std::fs::DirEntry::file_name);
398
399    for entry in entries {
400        let path = entry.path();
401        if path.is_dir() {
402            discover_md_files(&path, files, depth + 1);
403        } else if path.extension().and_then(|e| e.to_str()) == Some("md")
404            && let Some(s) = path.to_str()
405        {
406            files.push(s.to_string());
407        }
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[test]
416    fn front_matter_has_id_present() {
417        let content = "---\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
418        assert!(front_matter_has_id(content));
419    }
420
421    #[test]
422    fn front_matter_has_id_absent() {
423        let content = "---\n---\n\n# Test\n";
424        assert!(!front_matter_has_id(content));
425    }
426
427    #[test]
428    fn front_matter_has_id_with_other_fields() {
429        let content = "---\nother: value\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
430        assert!(front_matter_has_id(content));
431    }
432
433    #[test]
434    fn front_matter_has_id_no_front_matter() {
435        let content = "# Test\n\nNo front matter here.\n";
436        assert!(!front_matter_has_id(content));
437    }
438
439    #[test]
440    fn front_matter_has_id_outside_front_matter() {
441        // `id:` appearing in the body should not count
442        let content = "---\n---\n\n# Test\n\n- id: some-value\n";
443        assert!(!front_matter_has_id(content));
444    }
445}