1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9pub fn is_uri(target: &str) -> bool {
12 let bytes = target.as_bytes();
13 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
14 return false;
15 }
16 for &b in &bytes[1..] {
17 if b == b':' {
18 return true;
19 }
20 if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
21 return false;
22 }
23 }
24 false
25}
26
27#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30#[serde(rename_all = "lowercase")]
31pub enum NodeType {
32 File,
33 External,
34 Graph,
35}
36
37#[derive(Debug, Clone, serde::Serialize)]
38pub struct Node {
39 pub path: String,
40 pub node_type: NodeType,
41 pub hash: Option<String>,
42 #[serde(skip_serializing_if = "Option::is_none")]
44 pub graph: Option<String>,
45 #[serde(skip_serializing_if = "HashMap::is_empty")]
47 pub metadata: HashMap<String, serde_json::Value>,
48}
49
50#[derive(Debug, Clone)]
51pub struct Edge {
52 pub source: String,
53 pub target: String,
56 pub link: Option<String>,
59 pub parser: String,
61}
62
63#[derive(Debug, Clone, Default)]
66pub struct TargetProperties {
67 pub is_symlink: bool,
68 pub is_directory: bool,
69 pub symlink_target: Option<String>,
70}
71
72#[derive(Debug, Default)]
73pub struct Graph {
74 pub nodes: HashMap<String, Node>,
75 pub edges: Vec<Edge>,
76 pub forward: HashMap<String, Vec<usize>>,
77 pub reverse: HashMap<String, Vec<usize>>,
78 pub child_graphs: Vec<String>,
79 pub interface: Vec<String>,
81 pub target_properties: HashMap<String, TargetProperties>,
83}
84
85impl Graph {
86 pub fn new() -> Self {
87 Self::default()
88 }
89
90 #[allow(dead_code)]
92 pub fn children(&self) -> impl Iterator<Item = &Node> {
93 self.nodes
94 .values()
95 .filter(|n| n.node_type == NodeType::Graph)
96 }
97
98 #[allow(dead_code)]
100 pub fn is_interfaced(&self, path: &str) -> bool {
101 self.interface.iter().any(|e| e == path)
102 }
103
104 pub fn add_node(&mut self, node: Node) {
105 self.nodes.insert(node.path.clone(), node);
106 }
107
108 pub fn is_file_node(&self, path: &str) -> bool {
111 self.nodes
112 .get(path)
113 .is_some_and(|n| n.node_type == NodeType::File)
114 }
115
116 pub fn add_edge(&mut self, edge: Edge) {
117 let idx = self.edges.len();
118 self.forward
119 .entry(edge.source.clone())
120 .or_default()
121 .push(idx);
122 self.reverse
123 .entry(edge.target.clone())
124 .or_default()
125 .push(idx);
126 self.edges.push(edge);
127 }
128
129 pub fn target_props(&self, target: &str) -> Option<&TargetProperties> {
131 self.target_properties.get(target)
132 }
133}
134
135pub fn hash_bytes(content: &[u8]) -> String {
137 format!("b3:{}", blake3::hash(content).to_hex())
138}
139
140pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
147 let included_files = discover(root, &config.include, &config.exclude)?;
148 let child_graphs = find_child_graphs(root, &config.exclude)?;
149 let mut graph = Graph::new();
150 graph.child_graphs = child_graphs;
151 let mut pending_edges = Vec::new();
152
153 let mut file_text: HashMap<String, String> = HashMap::new(); for file in &included_files {
158 let file_path = root.join(file);
159 let raw = std::fs::read(&file_path)?;
160 let hash = hash_bytes(&raw);
161
162 graph.add_node(Node {
163 path: file.clone(),
164 node_type: NodeType::File,
165 hash: Some(hash),
166 graph: None,
167 metadata: HashMap::new(),
168 });
169
170 if let Ok(text) = String::from_utf8(raw) {
172 file_text.insert(file.clone(), text);
173 }
174 }
175
176 let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref(), root);
178 let mut parser_files: Vec<Vec<String>> = vec![Vec::new(); parser_list.len()];
179
180 for file in &included_files {
181 for (i, parser) in parser_list.iter().enumerate() {
182 if parser.matches(file) {
183 parser_files[i].push(file.clone());
184 }
185 }
186 }
187
188 for (i, parser) in parser_list.iter().enumerate() {
190 let files: Vec<(&str, &str)> = parser_files[i]
191 .iter()
192 .filter_map(|path| {
193 file_text
194 .get(path)
195 .map(|content| (path.as_str(), content.as_str()))
196 })
197 .collect();
198
199 if files.is_empty() {
200 continue;
201 }
202
203 let batch_results = parser.parse_batch(&files);
204
205 for (file, result) in batch_results {
206 if let Some(metadata) = result.metadata
208 && let Some(node) = graph.nodes.get_mut(&file)
209 {
210 node.metadata.insert(parser.name().to_string(), metadata);
211 }
212
213 for link in result.links {
214 let normalized = match normalize_link_target(&link) {
215 Some(n) => n,
216 None => continue, };
218
219 let target = if is_uri(&normalized.target) {
220 normalized.target
221 } else {
222 resolve_link(&file, &normalized.target)
223 };
224 let link = normalized.fragment.map(|frag| format!("{target}{frag}"));
226 pending_edges.push(Edge {
227 source: file.clone(),
228 target,
229 link,
230 parser: parser.name().to_string(),
231 });
232 }
233 }
234 }
235
236 let graph_prefixes: Vec<String> = graph.child_graphs.clone();
238 for graph_dir in &graph_prefixes {
239 graph.add_node(Node {
240 path: graph_dir.clone(),
241 node_type: NodeType::Graph,
242 hash: None,
243 graph: None,
244 metadata: HashMap::new(),
245 });
246 }
247
248 let mut implicit_edges = Vec::new();
255 for edge in &pending_edges {
256 if graph.nodes.contains_key(&edge.target) {
257 continue;
258 }
259
260 if is_uri(&edge.target) {
262 graph.add_node(Node {
263 path: edge.target.clone(),
264 node_type: NodeType::External,
265 hash: None,
266 graph: None,
267 metadata: HashMap::new(),
268 });
269 continue;
270 }
271
272 let in_child_graph = graph_prefixes
274 .iter()
275 .find(|s| edge.target.starts_with(s.as_str()));
276 if let Some(graph_prefix) = in_child_graph {
277 let target_path = root.join(&edge.target);
278 if target_path.is_file() {
279 graph.add_node(Node {
280 path: edge.target.clone(),
281 node_type: NodeType::External,
282 hash: None,
283 graph: Some(graph_prefix.clone()),
284 metadata: HashMap::new(),
285 });
286 implicit_edges.push(Edge {
288 source: edge.target.clone(),
289 target: graph_prefix.clone(),
290 link: None,
291 parser: edge.parser.clone(),
292 });
293 }
294 continue;
295 }
296
297 let target_path = root.join(&edge.target);
299 if target_path.is_file() {
300 graph.add_node(Node {
301 path: edge.target.clone(),
302 node_type: NodeType::External,
303 hash: None,
304 graph: None,
305 metadata: HashMap::new(),
306 });
307 }
308 }
311
312 pending_edges.extend(implicit_edges);
314 for edge in &pending_edges {
315 if is_uri(&edge.target) || graph.target_properties.contains_key(&edge.target) {
316 continue;
317 }
318 let target_path = root.join(&edge.target);
319 let is_symlink = target_path.is_symlink();
320 let is_directory = target_path.is_dir();
321 let symlink_target = if is_symlink {
322 std::fs::read_link(&target_path)
323 .ok()
324 .map(|p| p.to_string_lossy().to_string())
325 } else {
326 None
327 };
328 graph.target_properties.insert(
329 edge.target.clone(),
330 TargetProperties {
331 is_symlink,
332 is_directory,
333 symlink_target,
334 },
335 );
336 }
337
338 for edge in pending_edges {
340 graph.add_edge(edge);
341 }
342
343 if let Some(ref iface) = config.interface {
345 let mut resolved = Vec::new();
346 for pattern in &iface.nodes {
347 if let Ok(glob) = globset::Glob::new(pattern) {
348 let matcher = glob.compile_matcher();
349 for path in graph.nodes.keys() {
350 if matcher.is_match(path) {
351 resolved.push(path.clone());
352 }
353 }
354 } else {
355 if graph.nodes.contains_key(pattern) {
357 resolved.push(pattern.clone());
358 }
359 }
360 }
361 resolved.sort();
362 resolved.dedup();
363 graph.interface = resolved;
364 }
365
366 Ok(graph)
367}
368
369pub fn normalize_relative_path(path: &str) -> String {
373 let mut parts: Vec<String> = Vec::new();
374 for component in Path::new(path).components() {
375 match component {
376 std::path::Component::CurDir => {}
377 std::path::Component::ParentDir => {
378 if parts.last().is_some_and(|p| p != "..") {
380 parts.pop();
381 } else {
382 parts.push("..".to_string());
383 }
384 }
385 std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
386 _ => {}
387 }
388 }
389 parts.join("/")
390}
391
392struct NormalizedTarget {
394 target: String,
396 fragment: Option<String>,
398}
399
400fn normalize_link_target(raw: &str) -> Option<NormalizedTarget> {
404 let target = raw.trim();
405 if target.is_empty() {
406 return None;
407 }
408
409 if target.starts_with('#') {
411 return None;
412 }
413
414 let (base, fragment) = match target.find('#') {
416 Some(idx) => (&target[..idx], Some(target[idx..].to_string())),
417 None => (target, None),
418 };
419
420 if base.is_empty() {
422 return None;
423 }
424
425 Some(NormalizedTarget {
426 target: base.to_string(),
427 fragment,
428 })
429}
430
431pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
434 let source_path = Path::new(source_file);
435 let source_dir = source_path.parent().unwrap_or(Path::new(""));
436 let joined = source_dir.join(raw_target);
437 normalize_relative_path(&joined.to_string_lossy())
438}
439
440#[cfg(test)]
441pub mod test_helpers {
442 use super::*;
443
444 pub fn make_node(path: &str) -> Node {
445 Node {
446 path: path.into(),
447 node_type: NodeType::File,
448 hash: None,
449 graph: None,
450 metadata: HashMap::new(),
451 }
452 }
453
454 pub fn make_edge(source: &str, target: &str) -> Edge {
455 Edge {
456 source: source.into(),
457 target: target.into(),
458 link: None,
459 parser: "markdown".into(),
460 }
461 }
462}
463
464#[cfg(test)]
465mod tests {
466 use super::*;
467
468 #[test]
469 fn normalize_simple() {
470 assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
471 }
472
473 #[test]
474 fn normalize_dot() {
475 assert_eq!(normalize_relative_path("./a/./b"), "a/b");
476 }
477
478 #[test]
479 fn normalize_dotdot() {
480 assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
481 }
482
483 #[test]
484 fn normalize_preserves_leading_dotdot() {
485 assert_eq!(normalize_relative_path("../a"), "../a");
486 }
487
488 #[test]
489 fn normalize_deep_escape() {
490 assert_eq!(normalize_relative_path("../../a"), "../../a");
491 }
492
493 #[test]
494 fn normalize_escape_after_descent() {
495 assert_eq!(
497 normalize_relative_path("guides/../../README.md"),
498 "../README.md"
499 );
500 }
501
502 #[test]
503 fn resolve_same_dir() {
504 assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
505 }
506
507 #[test]
508 fn resolve_subdir() {
509 assert_eq!(
510 resolve_link("guides/intro.md", "setup.md"),
511 "guides/setup.md"
512 );
513 }
514
515 #[test]
516 fn resolve_parent() {
517 assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
518 }
519
520 #[test]
521 fn graph_adjacency() {
522 let mut g = Graph::new();
523 g.add_node(Node {
524 path: "a.md".into(),
525 node_type: NodeType::File,
526 hash: None,
527 graph: None,
528 metadata: HashMap::new(),
529 });
530 g.add_node(Node {
531 path: "b.md".into(),
532 node_type: NodeType::File,
533 hash: None,
534 graph: None,
535 metadata: HashMap::new(),
536 });
537 g.add_edge(Edge {
538 source: "a.md".into(),
539 target: "b.md".into(),
540 link: None,
541 parser: "markdown".into(),
542 });
543 assert_eq!(g.forward["a.md"], vec![0]);
544 assert_eq!(g.reverse["b.md"], vec![0]);
545 assert!(!g.forward.contains_key("b.md"));
546 }
547
548 #[test]
549 fn fragment_edge_resolves_to_node() {
550 let mut g = Graph::new();
551 g.add_node(test_helpers::make_node("a.md"));
552 g.add_node(test_helpers::make_node("b.md"));
553 g.add_edge(Edge {
554 source: "a.md".into(),
555 target: "b.md".into(),
556 link: Some("b.md#heading".into()),
557 parser: "markdown".into(),
558 });
559 assert_eq!(g.edges[0].target, "b.md");
561 assert_eq!(g.edges[0].link.as_deref(), Some("b.md#heading"));
563 assert_eq!(g.reverse["b.md"], vec![0]);
565 }
566
567 #[test]
568 fn is_uri_detects_schemes() {
569 assert!(is_uri("http://example.com"));
570 assert!(is_uri("https://example.com"));
571 assert!(is_uri("mailto:user@example.com"));
572 assert!(is_uri("ftp://files.example.com"));
573 assert!(is_uri("tel:+1234567890"));
574 assert!(is_uri("ssh://git@github.com"));
575 assert!(is_uri("custom+scheme://foo"));
576 }
577
578 #[test]
579 fn is_uri_rejects_paths() {
580 assert!(!is_uri("setup.md"));
581 assert!(!is_uri("./relative/path.md"));
582 assert!(!is_uri("../parent.md"));
583 assert!(!is_uri("#heading"));
584 assert!(!is_uri(""));
585 assert!(!is_uri("path/with:colon.md")); }
587
588 #[test]
589 fn normalize_strips_fragment() {
590 let n = normalize_link_target("file.md#heading").unwrap();
591 assert_eq!(n.target, "file.md");
592 assert_eq!(n.fragment.as_deref(), Some("#heading"));
593 }
594
595 #[test]
596 fn normalize_strips_uri_fragment() {
597 let n = normalize_link_target("https://example.com/page#section").unwrap();
598 assert_eq!(n.target, "https://example.com/page");
599 assert_eq!(n.fragment.as_deref(), Some("#section"));
600 }
601
602 #[test]
603 fn normalize_drops_anchor_only() {
604 assert!(normalize_link_target("#heading").is_none());
605 }
606
607 #[test]
608 fn normalize_drops_empty() {
609 assert!(normalize_link_target("").is_none());
610 assert!(normalize_link_target(" ").is_none());
611 }
612
613 #[test]
614 fn normalize_preserves_mailto() {
615 let n = normalize_link_target("mailto:user@example.com").unwrap();
616 assert_eq!(n.target, "mailto:user@example.com");
617 assert!(n.fragment.is_none());
618 }
619}