1pub mod cache;
2pub mod resolver;
3pub mod scanner;
4pub mod validator;
5
6use indexmap::IndexMap;
7use rayon::prelude::*;
8use std::collections::BTreeMap;
9use std::path::Path;
10
11use crate::config::Config;
12use crate::error::{Error, Result};
13use crate::model::{Graph, Node, RawEdge};
14use crate::parser::{self, ParsedDocument};
15
16use cache::BuildCache;
17use resolver::{build_id_set, build_path_index, resolve_edges};
18use validator::validate_supersedes_dag;
19
20pub struct BuildResult {
22 pub graph: Graph,
23 pub stats: BuildStats,
24}
25
26#[derive(Debug, serde::Serialize)]
27pub struct BuildStats {
28 pub nodes: usize,
29 pub edges: usize,
30 pub cached: usize,
31 pub parsed: usize,
32 #[serde(skip_serializing_if = "Vec::is_empty")]
33 pub warnings: Vec<String>,
34}
35
36pub fn build(root: &Path, config: &Config, full_rebuild: bool) -> Result<BuildResult> {
38 let paths = scanner::scan_scope(root, config)?;
40
41 let cache_path = root.join(&config.output.dir).join("cache.json");
55 let config_json = serde_json::to_string(config)
56 .expect("Config is defined entirely over serializable primitives");
57 let config_hash = cache::compute_hash(&format!(
58 "nodex={}\n{}",
59 env!("CARGO_PKG_VERSION"),
60 config_json
61 ));
62 let (mut cache, cache_warning) = if full_rebuild {
63 (BuildCache::default(), None)
64 } else {
65 BuildCache::load(&cache_path, &config_hash)
66 };
67 cache.config_hash = config_hash;
68
69 let read_results: Vec<(
71 std::path::PathBuf,
72 std::result::Result<String, std::io::Error>,
73 )> = paths
74 .par_iter()
75 .map(|rel_path| {
76 let abs_path = root.join(rel_path);
77 let result = std::fs::read_to_string(&abs_path);
78 (rel_path.clone(), result)
79 })
80 .collect();
81
82 let mut read_warnings = Vec::new();
83 let mut file_contents: Vec<(std::path::PathBuf, String)> = Vec::new();
84 for (rel_path, result) in read_results {
85 match result {
86 Ok(content) => file_contents.push((rel_path, content)),
87 Err(e) => read_warnings.push(format!("skipped {}: {e}", rel_path.display())),
88 }
89 }
90
91 let mut cached_count = 0usize;
93 let mut parsed_count = 0usize;
94
95 let mut cached_results: Vec<(Node, Vec<RawEdge>)> = Vec::new();
97 let mut to_parse: Vec<(std::path::PathBuf, String)> = Vec::new();
98
99 for (rel_path, content) in &file_contents {
100 if let Some(entry) = cache.get(rel_path, content) {
101 cached_results.push((
102 entry.node.clone(),
103 entry.raw_edges.iter().cloned().map(RawEdge::from).collect(),
104 ));
105 cached_count += 1;
106 } else {
107 to_parse.push((rel_path.clone(), content.clone()));
108 }
109 }
110
111 let fresh_results: Vec<Result<(std::path::PathBuf, String, ParsedDocument)>> = to_parse
113 .par_iter()
114 .map(|(rel_path, content)| {
115 let doc = parser::parse_document(rel_path, content, config)?;
116 Ok((rel_path.clone(), content.clone(), doc))
117 })
118 .collect();
119
120 let mut all_nodes: Vec<(String, Node)> = Vec::new();
121 let mut all_raw_edges: Vec<(String, std::path::PathBuf, Vec<RawEdge>)> = Vec::new();
122
123 for (node, raw_edges) in cached_results {
125 let id = node.id.clone();
126 let path = node.path.clone();
127 all_raw_edges.push((id.clone(), path, raw_edges));
128 all_nodes.push((id, node));
129 }
130
131 for result in fresh_results {
133 let (rel_path, content, doc) = result?;
134 parsed_count += 1;
135
136 cache.insert(rel_path, &content, doc.node.clone(), &doc.raw_edges);
137
138 let id = doc.node.id.clone();
139 let path = doc.node.path.clone();
140 all_raw_edges.push((id.clone(), path, doc.raw_edges));
141 all_nodes.push((id, doc.node));
142 }
143
144 {
146 let mut seen: BTreeMap<&str, &Path> = BTreeMap::new();
147 for (id, node) in &all_nodes {
148 if let Some(&first_path) = seen.get(id.as_str()) {
149 return Err(Error::DuplicateId {
150 id: id.clone(),
151 first: first_path.to_path_buf(),
152 second: node.path.clone(),
153 });
154 }
155 seen.insert(id.as_str(), &node.path);
156 }
157 }
158
159 let path_index = build_path_index(&all_nodes);
161 let id_set = build_id_set(&all_nodes);
162
163 let mut edges = Vec::new();
165 for (source_id, source_path, raw_edges) in all_raw_edges {
166 let resolved = resolve_edges(&source_id, raw_edges, &source_path, &path_index, &id_set);
167 edges.extend(resolved);
168 }
169
170 edges.extend(derive_superseded_by_edges(&all_nodes));
181
182 dedupe_edges(&mut edges);
188
189 validate_supersedes_dag(&edges)?;
191
192 edges.sort_by(|a, b| {
194 a.source
195 .cmp(&b.source)
196 .then_with(|| a.relation.cmp(&b.relation))
197 .then_with(|| a.location.cmp(&b.location))
198 });
199
200 let mut node_map = IndexMap::new();
202 all_nodes.sort_by(|a, b| a.0.cmp(&b.0));
203 for (id, node) in all_nodes {
204 node_map.insert(id, node);
205 }
206
207 let valid_paths: Vec<_> = file_contents.iter().map(|(p, _)| p.clone()).collect();
209 cache.retain_paths(&valid_paths);
210 let mut warnings = read_warnings;
211 if let Some(msg) = cache_warning {
212 warnings.push(msg);
213 }
214 if let Err(e) = cache.save(&cache_path) {
215 warnings.push(format!("cache save failed: {e}"));
216 }
217
218 let stats = BuildStats {
219 nodes: node_map.len(),
220 edges: edges.len(),
221 cached: cached_count,
222 parsed: parsed_count,
223 warnings,
224 };
225
226 Ok(BuildResult {
227 graph: Graph::new(node_map, edges),
228 stats,
229 })
230}
231
232fn derive_superseded_by_edges(
239 all_nodes: &[(String, crate::model::Node)],
240) -> Vec<crate::model::Edge> {
241 use crate::model::{Confidence, Edge, ResolvedTarget};
242 let known_ids: std::collections::BTreeSet<&str> =
243 all_nodes.iter().map(|(id, _)| id.as_str()).collect();
244 let mut out = Vec::new();
245 for (id, node) in all_nodes {
246 let Some(ref successor) = node.superseded_by else {
247 continue;
248 };
249 if !known_ids.contains(successor.as_str()) {
250 continue;
254 }
255 out.push(Edge {
256 source: successor.clone(),
257 target: ResolvedTarget::resolved(id.as_str()),
258 relation: "supersedes".to_string(),
259 confidence: Confidence::Extracted,
260 location: format!("frontmatter:superseded_by@{id}"),
261 });
262 }
263 out
264}
265
266fn dedupe_edges(edges: &mut Vec<crate::model::Edge>) {
271 use crate::model::ResolvedTarget;
272 let mut seen: std::collections::BTreeSet<(String, String, String)> =
273 std::collections::BTreeSet::new();
274 edges.retain(|edge| {
275 let target_key = match &edge.target {
276 ResolvedTarget::Resolved { id } => format!("r:{id}"),
277 ResolvedTarget::Unresolved { raw, .. } => format!("u:{raw}"),
278 };
279 seen.insert((edge.source.clone(), target_key, edge.relation.clone()))
280 });
281}