1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9#[derive(
10 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
11)]
12#[serde(rename_all = "lowercase")]
13pub enum NodeType {
14 Source,
15 Resource,
16 External,
17 Graph,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct EdgeType {
24 parser: String,
25 link_type: String,
26}
27
28impl EdgeType {
29 pub fn new(parser: impl Into<String>, link_type: impl Into<String>) -> Self {
30 Self {
31 parser: parser.into(),
32 link_type: link_type.into(),
33 }
34 }
35
36 #[allow(dead_code)]
37 pub fn parser(&self) -> &str {
38 &self.parser
39 }
40
41 #[allow(dead_code)]
42 pub fn link_type(&self) -> &str {
43 &self.link_type
44 }
45}
46
47impl std::fmt::Display for EdgeType {
48 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
49 write!(f, "{}:{}", self.parser, self.link_type)
50 }
51}
52
53impl std::str::FromStr for EdgeType {
54 type Err = anyhow::Error;
55 fn from_str(s: &str) -> Result<Self> {
56 let (parser, link_type) = s
57 .split_once(':')
58 .ok_or_else(|| anyhow::anyhow!("edge type must be 'parser:type', got '{s}'"))?;
59 Ok(Self::new(parser, link_type))
60 }
61}
62
63impl serde::Serialize for EdgeType {
64 fn serialize<S: serde::Serializer>(
65 &self,
66 serializer: S,
67 ) -> std::result::Result<S::Ok, S::Error> {
68 serializer.serialize_str(&self.to_string())
69 }
70}
71
72impl<'de> serde::Deserialize<'de> for EdgeType {
73 fn deserialize<D: serde::Deserializer<'de>>(
74 deserializer: D,
75 ) -> std::result::Result<Self, D::Error> {
76 let s = String::deserialize(deserializer)?;
77 s.parse().map_err(serde::de::Error::custom)
78 }
79}
80
81#[derive(Debug, Clone, serde::Serialize)]
82pub struct Node {
83 pub path: String,
84 pub node_type: NodeType,
85 pub hash: Option<String>,
86 #[serde(skip_serializing_if = "Option::is_none")]
88 pub graph: Option<String>,
89}
90
91#[derive(Debug, Clone)]
92pub struct Edge {
93 pub source: String,
94 pub target: String,
95 pub edge_type: EdgeType,
96 #[allow(dead_code)]
98 pub synthetic: bool,
99}
100
101#[derive(Debug, Default)]
102pub struct Graph {
103 pub nodes: HashMap<String, Node>,
104 pub edges: Vec<Edge>,
105 pub forward: HashMap<String, Vec<usize>>,
106 pub reverse: HashMap<String, Vec<usize>>,
107 pub child_graphs: Vec<String>,
108 pub interface: Vec<String>,
110}
111
112impl Graph {
113 pub fn new() -> Self {
114 Self::default()
115 }
116
117 #[allow(dead_code)]
119 pub fn children(&self) -> impl Iterator<Item = &Node> {
120 self.nodes
121 .values()
122 .filter(|n| n.node_type == NodeType::Graph)
123 }
124
125 #[allow(dead_code)]
127 pub fn is_interfaced(&self, path: &str) -> bool {
128 self.interface.iter().any(|e| e == path)
129 }
130
131 pub fn add_node(&mut self, node: Node) {
132 self.nodes.insert(node.path.clone(), node);
133 }
134
135 pub fn is_file_node(&self, path: &str) -> bool {
138 self.nodes
139 .get(path)
140 .is_some_and(|n| matches!(n.node_type, NodeType::Source | NodeType::Resource))
141 }
142
143 pub fn add_edge(&mut self, edge: Edge) {
144 let idx = self.edges.len();
145 self.forward
146 .entry(edge.source.clone())
147 .or_default()
148 .push(idx);
149 self.reverse
150 .entry(edge.target.clone())
151 .or_default()
152 .push(idx);
153 self.edges.push(edge);
154 }
155}
156
157pub fn hash_bytes(content: &[u8]) -> String {
159 format!("b3:{}", blake3::hash(content).to_hex())
160}
161
162pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
165 let all_files = discover(root, &config.ignore)?;
166 let child_graphs = find_child_graphs(root, &config.ignore)?;
167 let mut graph = Graph::new();
168 graph.child_graphs = child_graphs;
169 let mut pending_edges = Vec::new();
170
171 let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref());
173
174 for file in &all_files {
176 let matching: Vec<&dyn parsers::Parser> = parser_list
178 .iter()
179 .filter(|p| p.matches(file))
180 .map(|p| p.as_ref())
181 .collect();
182
183 if matching.is_empty() {
184 continue;
185 }
186
187 let file_path = root.join(file);
188 let content = std::fs::read_to_string(&file_path)?;
189 let hash = hash_bytes(content.as_bytes());
190
191 graph.add_node(Node {
192 path: file.clone(),
193 node_type: NodeType::Source,
194 hash: Some(hash),
195 graph: None,
196 });
197
198 for parser in &matching {
200 let links = parser.parse(file, &content);
201 for link in links {
202 let edge_type = EdgeType::new(parser.name(), &link.link_type);
203 if link.is_external {
204 pending_edges.push(Edge {
205 source: file.clone(),
206 target: link.target,
207 edge_type,
208 synthetic: false,
209 });
210 } else {
211 let resolved = resolve_link(file, &link.target);
212 pending_edges.push(Edge {
213 source: file.clone(),
214 target: resolved,
215 edge_type,
216 synthetic: false,
217 });
218 }
219 }
220 }
221 }
222
223 let graph_prefixes: Vec<String> = graph.child_graphs.clone();
226 for graph_dir in &graph_prefixes {
227 graph.add_node(Node {
228 path: graph_dir.clone(),
229 node_type: NodeType::Graph,
230 hash: None,
231 graph: None,
232 });
233 }
234
235 let ignore_set = if config.ignore.is_empty() {
237 None
238 } else {
239 let mut builder = globset::GlobSetBuilder::new();
240 for pattern in &config.ignore {
241 if let Ok(glob) = globset::Glob::new(pattern) {
242 builder.add(glob);
243 }
244 }
245 builder.build().ok()
246 };
247
248 let mut implicit_edges = Vec::new();
250 for edge in &pending_edges {
251 if graph.nodes.contains_key(&edge.target) {
252 continue;
253 }
254 if edge.target.starts_with("http://") || edge.target.starts_with("https://") {
255 graph.add_node(Node {
256 path: edge.target.clone(),
257 node_type: NodeType::External,
258 hash: None,
259 graph: None,
260 });
261 continue;
262 }
263
264 let in_child_graph = graph_prefixes
266 .iter()
267 .find(|s| edge.target.starts_with(s.as_str()));
268 if let Some(graph_prefix) = in_child_graph {
269 let target_path = root.join(&edge.target);
270 if target_path.is_file() {
271 let content = std::fs::read(&target_path)?;
272 let hash = hash_bytes(&content);
273 graph.add_node(Node {
274 path: edge.target.clone(),
275 node_type: NodeType::Resource,
276 hash: Some(hash),
277 graph: Some(graph_prefix.clone()),
278 });
279 implicit_edges.push(Edge {
281 source: edge.target.clone(),
282 target: graph_prefix.clone(),
283 edge_type: edge.edge_type.clone(),
284 synthetic: true,
285 });
286 }
287 continue;
288 }
289
290 if let Some(ref set) = ignore_set
292 && set.is_match(&edge.target)
293 {
294 continue;
295 }
296
297 let target_path = root.join(&edge.target);
299 if target_path.is_file() {
300 let content = std::fs::read(&target_path)?;
301 let hash = hash_bytes(&content);
302 graph.add_node(Node {
303 path: edge.target.clone(),
304 node_type: NodeType::Resource,
305 hash: Some(hash),
306 graph: None,
307 });
308 }
309 }
310
311 pending_edges.extend(implicit_edges);
313 for edge in pending_edges {
314 graph.add_edge(edge);
315 }
316
317 if let Some(ref iface) = config.interface {
319 let mut resolved = Vec::new();
320 for pattern in &iface.nodes {
321 if let Ok(glob) = globset::Glob::new(pattern) {
322 let matcher = glob.compile_matcher();
323 for path in graph.nodes.keys() {
324 if matcher.is_match(path) {
325 resolved.push(path.clone());
326 }
327 }
328 } else {
329 if graph.nodes.contains_key(pattern) {
331 resolved.push(pattern.clone());
332 }
333 }
334 }
335 resolved.sort();
336 resolved.dedup();
337 graph.interface = resolved;
338 }
339
340 Ok(graph)
341}
342
343pub fn normalize_relative_path(path: &str) -> String {
347 let mut parts: Vec<String> = Vec::new();
348 for component in Path::new(path).components() {
349 match component {
350 std::path::Component::CurDir => {}
351 std::path::Component::ParentDir => {
352 if parts.last().is_some_and(|p| p != "..") {
354 parts.pop();
355 } else {
356 parts.push("..".to_string());
357 }
358 }
359 std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
360 _ => {}
361 }
362 }
363 parts.join("/")
364}
365
366pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
369 let source_path = Path::new(source_file);
370 let source_dir = source_path.parent().unwrap_or(Path::new(""));
371 let joined = source_dir.join(raw_target);
372 normalize_relative_path(&joined.to_string_lossy())
373}
374
375#[cfg(test)]
376pub mod test_helpers {
377 use super::*;
378
379 pub fn make_node(path: &str) -> Node {
380 Node {
381 path: path.into(),
382 node_type: NodeType::Source,
383 hash: None,
384 graph: None,
385 }
386 }
387
388 pub fn make_edge(source: &str, target: &str) -> Edge {
389 Edge {
390 source: source.into(),
391 target: target.into(),
392 edge_type: EdgeType::new("markdown", "inline"),
393 synthetic: false,
394 }
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn normalize_simple() {
404 assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
405 }
406
407 #[test]
408 fn normalize_dot() {
409 assert_eq!(normalize_relative_path("./a/./b"), "a/b");
410 }
411
412 #[test]
413 fn normalize_dotdot() {
414 assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
415 }
416
417 #[test]
418 fn normalize_preserves_leading_dotdot() {
419 assert_eq!(normalize_relative_path("../a"), "../a");
420 }
421
422 #[test]
423 fn normalize_deep_escape() {
424 assert_eq!(normalize_relative_path("../../a"), "../../a");
425 }
426
427 #[test]
428 fn normalize_escape_after_descent() {
429 assert_eq!(
431 normalize_relative_path("guides/../../README.md"),
432 "../README.md"
433 );
434 }
435
436 #[test]
437 fn resolve_same_dir() {
438 assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
439 }
440
441 #[test]
442 fn resolve_subdir() {
443 assert_eq!(
444 resolve_link("guides/intro.md", "setup.md"),
445 "guides/setup.md"
446 );
447 }
448
449 #[test]
450 fn resolve_parent() {
451 assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
452 }
453
454 #[test]
455 fn graph_adjacency() {
456 let mut g = Graph::new();
457 g.add_node(Node {
458 path: "a.md".into(),
459 node_type: NodeType::Source,
460 hash: None,
461 graph: None,
462 });
463 g.add_node(Node {
464 path: "b.md".into(),
465 node_type: NodeType::Source,
466 hash: None,
467 graph: None,
468 });
469 g.add_edge(Edge {
470 source: "a.md".into(),
471 target: "b.md".into(),
472 edge_type: EdgeType::new("markdown", "inline"),
473 synthetic: false,
474 });
475 assert_eq!(g.forward["a.md"], vec![0]);
476 assert_eq!(g.reverse["b.md"], vec![0]);
477 assert!(!g.forward.contains_key("b.md"));
478 }
479}