Skip to main content

weave_content/
lib.rs

1#![deny(unsafe_code)]
2#![deny(clippy::unwrap_used)]
3#![deny(clippy::expect_used)]
4#![allow(clippy::missing_errors_doc)]
5
6pub mod build_cache;
7pub mod cache;
8pub mod domain;
9pub mod entity;
10pub mod html;
11pub mod nulid_gen;
12pub mod output;
13pub mod parser;
14pub mod registry;
15pub mod relationship;
16pub mod tags;
17pub mod timeline;
18pub mod verifier;
19pub mod writeback;
20
21use std::collections::HashSet;
22
23use crate::entity::Entity;
24use crate::parser::{ParseError, ParsedCase, SectionKind};
25use crate::relationship::Rel;
26
27/// Build a case index: scan case files for front matter `id:` and H1 title.
28///
29/// Returns a map of `case_path → (nulid, title)` used to resolve cross-case
30/// references in `## Related Cases` sections.
31pub fn build_case_index(
32    case_files: &[String],
33    content_root: &std::path::Path,
34) -> Result<std::collections::HashMap<String, (String, String)>, i32> {
35    let mut map = std::collections::HashMap::new();
36    for path in case_files {
37        let content = std::fs::read_to_string(path).map_err(|e| {
38            eprintln!("{path}: {e}");
39            1
40        })?;
41        if let Some(id) = extract_front_matter_id(&content)
42            && let Some(case_path) = case_slug_from_path(std::path::Path::new(path), content_root)
43        {
44            let title = extract_title(&content).unwrap_or_else(|| case_path.clone());
45            map.insert(case_path, (id, title));
46        }
47    }
48    Ok(map)
49}
50
51/// Extract the `id:` value from YAML front matter without full parsing.
52fn extract_front_matter_id(content: &str) -> Option<String> {
53    let content = content.strip_prefix("---\n")?;
54    let end = content.find("\n---")?;
55    let fm = &content[..end];
56    for line in fm.lines() {
57        let trimmed = line.trim();
58        if let Some(id) = trimmed.strip_prefix("id:") {
59            let id = id.trim().trim_matches('"').trim_matches('\'');
60            if !id.is_empty() {
61                return Some(id.to_string());
62            }
63        }
64    }
65    None
66}
67
68/// Extract the first H1 heading (`# Title`) after the front matter closing delimiter.
69fn extract_title(content: &str) -> Option<String> {
70    let content = content.strip_prefix("---\n")?;
71    let end = content.find("\n---")?;
72    let after_fm = &content[end + 4..];
73    for line in after_fm.lines() {
74        if let Some(title) = line.strip_prefix("# ") {
75            let title = title.trim();
76            if !title.is_empty() {
77                return Some(title.to_string());
78            }
79        }
80    }
81    None
82}
83
84/// Derive a case slug from a file path relative to content root.
85///
86/// E.g. `content/cases/id/corruption/2002/foo.md` with content root
87/// `content/` → `id/corruption/2002/foo`.
88pub fn case_slug_from_path(
89    path: &std::path::Path,
90    content_root: &std::path::Path,
91) -> Option<String> {
92    let cases_dir = content_root.join("cases");
93    let rel = path.strip_prefix(&cases_dir).ok()?;
94    let s = rel.to_str()?;
95    Some(s.strip_suffix(".md").unwrap_or(s).to_string())
96}
97
98/// Parse a case file fully: front matter, entities, relationships, timeline.
99/// Returns the parsed case, inline entities, and relationships (including NEXT from timeline).
100///
101/// When a registry is provided, relationship and timeline names are resolved
102/// against both inline events AND the global entity registry.
103pub fn parse_full(
104    content: &str,
105    reg: Option<&registry::EntityRegistry>,
106) -> Result<(ParsedCase, Vec<Entity>, Vec<Rel>), Vec<ParseError>> {
107    let case = parser::parse(content)?;
108    let mut errors = Vec::new();
109
110    let mut all_entities = Vec::new();
111    for section in &case.sections {
112        if matches!(
113            section.kind,
114            SectionKind::Events | SectionKind::Documents | SectionKind::Assets
115        ) {
116            let entities =
117                entity::parse_entities(&section.body, section.kind, section.line, &mut errors);
118            all_entities.extend(entities);
119        }
120    }
121
122    // Build combined name set: inline events + registry entities
123    let mut entity_names: HashSet<&str> = all_entities.iter().map(|e| e.name.as_str()).collect();
124    if let Some(registry) = reg {
125        for name in registry.names() {
126            entity_names.insert(name);
127        }
128    }
129
130    let event_names: HashSet<&str> = all_entities
131        .iter()
132        .filter(|e| e.label == entity::Label::Event)
133        .map(|e| e.name.as_str())
134        .collect();
135
136    let mut all_rels = Vec::new();
137    for section in &case.sections {
138        if section.kind == SectionKind::Relationships {
139            let rels = relationship::parse_relationships(
140                &section.body,
141                section.line,
142                &entity_names,
143                &case.sources,
144                &mut errors,
145            );
146            all_rels.extend(rels);
147        }
148    }
149
150    for section in &case.sections {
151        if section.kind == SectionKind::Timeline {
152            let rels =
153                timeline::parse_timeline(&section.body, section.line, &event_names, &mut errors);
154            all_rels.extend(rels);
155        }
156    }
157
158    if errors.is_empty() {
159        Ok((case, all_entities, all_rels))
160    } else {
161        Err(errors)
162    }
163}
164
165/// Collect registry entities referenced by relationships in this case.
166/// Sets the `slug` field on each entity from the registry's file path.
167pub fn collect_referenced_registry_entities(
168    rels: &[Rel],
169    inline_entities: &[Entity],
170    reg: &registry::EntityRegistry,
171) -> Vec<Entity> {
172    let inline_names: HashSet<&str> = inline_entities.iter().map(|e| e.name.as_str()).collect();
173    let mut referenced = Vec::new();
174    let mut seen_names: HashSet<String> = HashSet::new();
175
176    for rel in rels {
177        for name in [&rel.source_name, &rel.target_name] {
178            if !inline_names.contains(name.as_str())
179                && seen_names.insert(name.clone())
180                && let Some(entry) = reg.get_by_name(name)
181            {
182                let mut entity = entry.entity.clone();
183                entity.slug = reg.slug_for(entry);
184                referenced.push(entity);
185            }
186        }
187    }
188
189    referenced
190}
191
192/// Build a `CaseOutput` from a case file path.
193/// Handles parsing and ID writeback.
194pub fn build_case_output(
195    path: &str,
196    reg: &registry::EntityRegistry,
197) -> Result<output::CaseOutput, i32> {
198    let mut written = HashSet::new();
199    build_case_output_tracked(path, reg, &mut written, &std::collections::HashMap::new())
200}
201
202/// Build a `CaseOutput` from a case file path, tracking which entity files
203/// have already been written back. This avoids re-reading entity files from
204/// disk when multiple cases reference the same shared entity.
205#[allow(clippy::implicit_hasher)]
206pub fn build_case_output_tracked(
207    path: &str,
208    reg: &registry::EntityRegistry,
209    written_entities: &mut HashSet<std::path::PathBuf>,
210    case_nulid_map: &std::collections::HashMap<String, (String, String)>,
211) -> Result<output::CaseOutput, i32> {
212    let content = match std::fs::read_to_string(path) {
213        Ok(c) => c,
214        Err(e) => {
215            eprintln!("{path}: error reading file: {e}");
216            return Err(2);
217        }
218    };
219
220    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
221        Ok(result) => result,
222        Err(errors) => {
223            for err in &errors {
224                eprintln!("{path}:{err}");
225            }
226            return Err(1);
227        }
228    };
229
230    let referenced_entities = collect_referenced_registry_entities(&rels, &entities, reg);
231
232    // Resolve case NULID
233    let (case_nulid, case_nulid_generated) = match nulid_gen::resolve_id(case.id.as_deref(), 1) {
234        Ok(result) => result,
235        Err(err) => {
236            eprintln!("{path}:{err}");
237            return Err(1);
238        }
239    };
240    let case_nulid_str = case_nulid.to_string();
241
242    // Compute case slug from file path
243    let case_slug = reg
244        .content_root()
245        .and_then(|root| registry::path_to_slug(std::path::Path::new(path), root));
246
247    // Derive case_id from slug (filename-based) or fall back to empty string
248    let case_id = case_slug
249        .as_deref()
250        .and_then(|s| s.rsplit('/').next())
251        .unwrap_or_default();
252
253    let build_result = match output::build_output(
254        case_id,
255        &case_nulid_str,
256        &case.title,
257        &case.summary,
258        &case.tags,
259        case_slug.as_deref(),
260        case.case_type.as_deref(),
261        case.status.as_deref(),
262        case.amounts.as_deref(),
263        &case.sources,
264        &case.related_cases,
265        case_nulid_map,
266        &entities,
267        &rels,
268        &referenced_entities,
269    ) {
270        Ok(out) => out,
271        Err(errors) => {
272            for err in &errors {
273                eprintln!("{path}:{err}");
274            }
275            return Err(1);
276        }
277    };
278
279    let case_output = build_result.output;
280
281    // Write back generated IDs to source case file
282    let mut case_pending = build_result.case_pending;
283    if case_nulid_generated {
284        case_pending.push(writeback::PendingId {
285            line: writeback::find_front_matter_end(&content).unwrap_or(2),
286            id: case_nulid_str.clone(),
287            kind: writeback::WriteBackKind::CaseId,
288        });
289    }
290    if !case_pending.is_empty()
291        && let Some(modified) = writeback::apply_writebacks(&content, &mut case_pending)
292    {
293        if let Err(e) = writeback::write_file(std::path::Path::new(path), &modified) {
294            eprintln!("{e}");
295            return Err(2);
296        }
297        let count = case_pending.len();
298        eprintln!("{path}: wrote {count} generated ID(s) back to file");
299    }
300
301    // Write back generated IDs to entity files
302    if let Some(code) =
303        writeback_registry_entities(&build_result.registry_pending, reg, written_entities)
304    {
305        return Err(code);
306    }
307
308    eprintln!(
309        "{path}: built ({} nodes, {} relationships)",
310        case_output.nodes.len(),
311        case_output.relationships.len()
312    );
313    Ok(case_output)
314}
315
316/// Write back generated IDs to registry entity files.
317/// Tracks already-written paths in `written` to avoid redundant disk reads.
318/// Returns `Some(exit_code)` on error, `None` on success.
319fn writeback_registry_entities(
320    pending: &[(String, writeback::PendingId)],
321    reg: &registry::EntityRegistry,
322    written: &mut HashSet<std::path::PathBuf>,
323) -> Option<i32> {
324    for (entity_name, pending_id) in pending {
325        let Some(entry) = reg.get_by_name(entity_name) else {
326            continue;
327        };
328        let entity_path = &entry.path;
329
330        // Skip if this entity file was already written by a previous case.
331        if !written.insert(entity_path.clone()) {
332            continue;
333        }
334
335        // Also skip if the entity already has an ID in the registry
336        // (loaded from file at startup).
337        if entry.entity.id.is_some() {
338            continue;
339        }
340
341        let entity_content = match std::fs::read_to_string(entity_path) {
342            Ok(c) => c,
343            Err(e) => {
344                eprintln!("{}: error reading file: {e}", entity_path.display());
345                return Some(2);
346            }
347        };
348
349        let fm_end = writeback::find_front_matter_end(&entity_content);
350        let mut ids = vec![writeback::PendingId {
351            line: fm_end.unwrap_or(2),
352            id: pending_id.id.clone(),
353            kind: writeback::WriteBackKind::EntityFrontMatter,
354        }];
355        if let Some(modified) = writeback::apply_writebacks(&entity_content, &mut ids) {
356            if let Err(e) = writeback::write_file(entity_path, &modified) {
357                eprintln!("{e}");
358                return Some(2);
359            }
360            eprintln!("{}: wrote generated ID back to file", entity_path.display());
361        }
362    }
363    None
364}
365
366/// Check whether a file's YAML front matter already contains an `id:` field.
367#[cfg(test)]
368fn front_matter_has_id(content: &str) -> bool {
369    let mut in_front_matter = false;
370    for line in content.lines() {
371        let trimmed = line.trim();
372        if trimmed == "---" && !in_front_matter {
373            in_front_matter = true;
374        } else if trimmed == "---" && in_front_matter {
375            return false; // end of front matter, no id found
376        } else if in_front_matter && trimmed.starts_with("id:") {
377            return true;
378        }
379    }
380    false
381}
382
383/// Resolve the content root directory.
384///
385/// Priority: explicit `--root` flag > parent of given path > current directory.
386pub fn resolve_content_root(path: Option<&str>, root: Option<&str>) -> std::path::PathBuf {
387    if let Some(r) = root {
388        return std::path::PathBuf::from(r);
389    }
390    if let Some(p) = path {
391        let p = std::path::Path::new(p);
392        if p.is_file() {
393            if let Some(parent) = p.parent() {
394                for ancestor in parent.ancestors() {
395                    if ancestor.join("cases").is_dir()
396                        || ancestor.join("people").is_dir()
397                        || ancestor.join("organizations").is_dir()
398                    {
399                        return ancestor.to_path_buf();
400                    }
401                }
402                return parent.to_path_buf();
403            }
404        } else if p.is_dir() {
405            return p.to_path_buf();
406        }
407    }
408    std::path::PathBuf::from(".")
409}
410
411/// Load entity registry from content root. Returns empty registry if no entity dirs exist.
412pub fn load_registry(content_root: &std::path::Path) -> Result<registry::EntityRegistry, i32> {
413    match registry::EntityRegistry::load(content_root) {
414        Ok(reg) => Ok(reg),
415        Err(errors) => {
416            for err in &errors {
417                eprintln!("registry: {err}");
418            }
419            Err(1)
420        }
421    }
422}
423
424/// Load tag registry from content root. Returns empty registry if no tags.yaml exists.
425pub fn load_tag_registry(content_root: &std::path::Path) -> Result<tags::TagRegistry, i32> {
426    match tags::TagRegistry::load(content_root) {
427        Ok(reg) => Ok(reg),
428        Err(errors) => {
429            for err in &errors {
430                eprintln!("tags: {err}");
431            }
432            Err(1)
433        }
434    }
435}
436
437/// Resolve case file paths from path argument.
438/// If path is a file, returns just that file.
439/// If path is a directory (or None), auto-discovers `cases/**/*.md`.
440pub fn resolve_case_files(
441    path: Option<&str>,
442    content_root: &std::path::Path,
443) -> Result<Vec<String>, i32> {
444    if let Some(p) = path {
445        let p_path = std::path::Path::new(p);
446        if p_path.is_file() {
447            return Ok(vec![p.to_string()]);
448        }
449        if !p_path.is_dir() {
450            eprintln!("{p}: not a file or directory");
451            return Err(2);
452        }
453    }
454
455    let cases_dir = content_root.join("cases");
456    if !cases_dir.is_dir() {
457        return Ok(Vec::new());
458    }
459
460    let mut files = Vec::new();
461    discover_md_files(&cases_dir, &mut files, 0);
462    files.sort();
463    Ok(files)
464}
465
466/// Recursively discover .md files in a directory (max 5 levels deep for cases/country/category/year/).
467fn discover_md_files(dir: &std::path::Path, files: &mut Vec<String>, depth: usize) {
468    const MAX_DEPTH: usize = 5;
469    if depth > MAX_DEPTH {
470        return;
471    }
472
473    let Ok(entries) = std::fs::read_dir(dir) else {
474        return;
475    };
476
477    let mut entries: Vec<_> = entries.filter_map(Result::ok).collect();
478    entries.sort_by_key(std::fs::DirEntry::file_name);
479
480    for entry in entries {
481        let path = entry.path();
482        if path.is_dir() {
483            discover_md_files(&path, files, depth + 1);
484        } else if path.extension().and_then(|e| e.to_str()) == Some("md")
485            && let Some(s) = path.to_str()
486        {
487            files.push(s.to_string());
488        }
489    }
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495
496    #[test]
497    fn front_matter_has_id_present() {
498        let content = "---\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
499        assert!(front_matter_has_id(content));
500    }
501
502    #[test]
503    fn front_matter_has_id_absent() {
504        let content = "---\n---\n\n# Test\n";
505        assert!(!front_matter_has_id(content));
506    }
507
508    #[test]
509    fn front_matter_has_id_with_other_fields() {
510        let content = "---\nother: value\nid: 01JABC000000000000000000AA\n---\n\n# Test\n";
511        assert!(front_matter_has_id(content));
512    }
513
514    #[test]
515    fn front_matter_has_id_no_front_matter() {
516        let content = "# Test\n\nNo front matter here.\n";
517        assert!(!front_matter_has_id(content));
518    }
519
520    #[test]
521    fn front_matter_has_id_outside_front_matter() {
522        // `id:` appearing in the body should not count
523        let content = "---\n---\n\n# Test\n\n- id: some-value\n";
524        assert!(!front_matter_has_id(content));
525    }
526}