1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9pub fn is_uri(target: &str) -> bool {
12 let bytes = target.as_bytes();
13 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
14 return false;
15 }
16 for &b in &bytes[1..] {
17 if b == b':' {
18 return true;
19 }
20 if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
21 return false;
22 }
23 }
24 false
25}
26
27#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30#[serde(rename_all = "lowercase")]
31pub enum NodeType {
32 File,
33 Directory,
34 External,
35}
36
37#[derive(Debug, Clone, serde::Serialize)]
38pub struct Node {
39 pub path: String,
40 pub node_type: NodeType,
41 pub hash: Option<String>,
42 #[serde(skip_serializing_if = "Option::is_none")]
45 pub graph: Option<String>,
46 #[serde(skip_serializing_if = "std::ops::Not::not")]
48 pub is_graph: bool,
49 #[serde(skip_serializing_if = "HashMap::is_empty")]
51 pub metadata: HashMap<String, serde_json::Value>,
52}
53
54#[derive(Debug, Clone)]
55pub struct Edge {
56 pub source: String,
57 pub target: String,
60 pub link: Option<String>,
63 pub parser: String,
65}
66
67#[derive(Debug, Clone, Default)]
70pub struct TargetProperties {
71 pub is_symlink: bool,
72 pub is_directory: bool,
73 pub symlink_target: Option<String>,
74}
75
76#[derive(Debug, Default)]
77pub struct Graph {
78 pub nodes: HashMap<String, Node>,
79 pub edges: Vec<Edge>,
80 pub forward: HashMap<String, Vec<usize>>,
81 pub reverse: HashMap<String, Vec<usize>>,
82 pub child_graphs: Vec<String>,
83 pub interface: Vec<String>,
85 pub target_properties: HashMap<String, TargetProperties>,
87}
88
89impl Graph {
90 pub fn new() -> Self {
91 Self::default()
92 }
93
94 pub fn add_node(&mut self, node: Node) {
95 self.nodes.insert(node.path.clone(), node);
96 }
97
98 pub fn is_file_node(&self, path: &str) -> bool {
101 self.nodes
102 .get(path)
103 .is_some_and(|n| n.node_type == NodeType::File)
104 }
105
106 pub fn add_edge(&mut self, edge: Edge) {
107 let idx = self.edges.len();
108 self.forward
109 .entry(edge.source.clone())
110 .or_default()
111 .push(idx);
112 self.reverse
113 .entry(edge.target.clone())
114 .or_default()
115 .push(idx);
116 self.edges.push(edge);
117 }
118
119 pub fn target_props(&self, target: &str) -> Option<&TargetProperties> {
121 self.target_properties.get(target)
122 }
123}
124
125pub fn hash_bytes(content: &[u8]) -> String {
127 format!("b3:{}", blake3::hash(content).to_hex())
128}
129
130fn promote_interface_files(
134 root: &Path,
135 child_name: &str,
136 graph: &mut Graph,
137 implicit_edges: &mut Vec<Edge>,
138) {
139 let child_dir = root.join(child_name);
140 let config = match Config::load(&child_dir) {
141 Ok(c) => c,
142 Err(_) => return,
143 };
144
145 let (interface_files, interface_ignore) = match &config.interface {
146 Some(iface) => (&iface.files, &iface.ignore),
147 None => return,
148 };
149
150 let mut exclude_patterns = config.exclude.clone();
152 exclude_patterns.extend(interface_ignore.iter().cloned());
153
154 let included = match discover(&child_dir, interface_files, &exclude_patterns) {
155 Ok(files) => files,
156 Err(_) => return,
157 };
158
159 for file in included {
160 let node_path = format!("{child_name}/{file}");
161 if graph.nodes.contains_key(&node_path) {
162 continue;
163 }
164 let file_path = child_dir.join(&file);
165 let hash = std::fs::read(&file_path).ok().map(|c| hash_bytes(&c));
166 graph.add_node(Node {
167 path: node_path.clone(),
168 node_type: NodeType::External,
169 hash,
170 graph: Some(child_name.into()),
171 is_graph: false,
172 metadata: HashMap::new(),
173 });
174 implicit_edges.push(Edge {
175 source: node_path,
176 target: child_name.into(),
177 link: None,
178 parser: String::new(),
179 });
180 }
181}
182
183pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
190 let included_files = discover(root, &config.include, &config.exclude)?;
191 let child_graphs = find_child_graphs(root, &config.exclude)?;
192 let mut graph = Graph::new();
193 graph.child_graphs = child_graphs;
194 let mut pending_edges = Vec::new();
195
196 let mut file_text: HashMap<String, String> = HashMap::new(); for file in &included_files {
201 let file_path = root.join(file);
202 let raw = std::fs::read(&file_path)?;
203 let hash = hash_bytes(&raw);
204
205 graph.add_node(Node {
206 path: file.clone(),
207 node_type: NodeType::File,
208 hash: Some(hash),
209 graph: Some(".".into()),
210 is_graph: false,
211 metadata: HashMap::new(),
212 });
213
214 if let Ok(text) = String::from_utf8(raw) {
216 file_text.insert(file.clone(), text);
217 }
218 }
219
220 let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref(), root);
222 let mut parser_files: Vec<Vec<String>> = vec![Vec::new(); parser_list.len()];
223
224 for file in &included_files {
225 for (i, parser) in parser_list.iter().enumerate() {
226 if parser.matches(file) {
227 parser_files[i].push(file.clone());
228 }
229 }
230 }
231
232 for (i, parser) in parser_list.iter().enumerate() {
234 let files: Vec<(&str, &str)> = parser_files[i]
235 .iter()
236 .filter_map(|path| {
237 file_text
238 .get(path)
239 .map(|content| (path.as_str(), content.as_str()))
240 })
241 .collect();
242
243 if files.is_empty() {
244 continue;
245 }
246
247 let batch_results = parser.parse_batch(&files);
248
249 for (file, result) in batch_results {
250 if let Some(metadata) = result.metadata
252 && let Some(node) = graph.nodes.get_mut(&file)
253 {
254 node.metadata.insert(parser.name().to_string(), metadata);
255 }
256
257 for link in result.links {
258 let normalized = match normalize_link_target(&link) {
259 Some(n) => n,
260 None => continue, };
262
263 let target = if is_uri(&normalized.target) {
264 normalized.target
265 } else {
266 resolve_link(&file, &normalized.target)
267 };
268 let link = normalized.fragment.map(|frag| format!("{target}{frag}"));
270 pending_edges.push(Edge {
271 source: file.clone(),
272 target,
273 link,
274 parser: parser.name().to_string(),
275 });
276 }
277 }
278 }
279
280 let graph_prefixes: Vec<String> = graph.child_graphs.clone();
285
286 let mut implicit_edges = Vec::new();
294 for edge in &pending_edges {
295 if graph.nodes.contains_key(&edge.target) {
296 continue;
297 }
298
299 if is_uri(&edge.target) {
301 graph.add_node(Node {
302 path: edge.target.clone(),
303 node_type: NodeType::External,
304 hash: None,
305 graph: None,
306 is_graph: false,
307 metadata: HashMap::new(),
308 });
309 continue;
310 }
311
312 if edge.target.starts_with("../") || edge.target == ".." {
314 graph.add_node(Node {
315 path: edge.target.clone(),
316 node_type: NodeType::External,
317 hash: None,
318 graph: Some("..".into()),
319 is_graph: false,
320 metadata: HashMap::new(),
321 });
322 continue;
323 }
324
325 let in_child_graph = graph_prefixes
327 .iter()
328 .find(|s| edge.target.starts_with(&format!("{s}/")));
329 if let Some(child_name) = in_child_graph {
330 let target_path = root.join(&edge.target);
331 if target_path.is_file() {
332 let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
333 graph.add_node(Node {
334 path: edge.target.clone(),
335 node_type: NodeType::External,
336 hash,
337 graph: Some(child_name.clone()),
338 is_graph: false,
339 metadata: HashMap::new(),
340 });
341 if !graph.nodes.contains_key(child_name) {
343 let child_dir = root.join(child_name);
344 let config_hash = std::fs::read(child_dir.join("drft.toml"))
345 .ok()
346 .map(|c| hash_bytes(&c));
347 graph.add_node(Node {
348 path: child_name.clone(),
349 node_type: NodeType::Directory,
350 hash: config_hash,
351 graph: Some(".".into()),
352 is_graph: true,
353 metadata: HashMap::new(),
354 });
355 promote_interface_files(root, child_name, &mut graph, &mut implicit_edges);
357 }
358 implicit_edges.push(Edge {
360 source: edge.target.clone(),
361 target: child_name.clone(),
362 link: None,
363 parser: edge.parser.clone(),
364 });
365 }
366 continue;
367 }
368
369 let target_path = root.join(&edge.target);
370
371 if target_path.is_dir() {
373 let has_config = target_path.join("drft.toml").exists();
374 let hash = if has_config {
375 std::fs::read(target_path.join("drft.toml"))
376 .ok()
377 .map(|c| hash_bytes(&c))
378 } else {
379 None
380 };
381 graph.add_node(Node {
382 path: edge.target.clone(),
383 node_type: NodeType::Directory,
384 hash,
385 graph: Some(".".into()),
386 is_graph: has_config,
387 metadata: HashMap::new(),
388 });
389 if has_config {
390 promote_interface_files(root, &edge.target, &mut graph, &mut implicit_edges);
391 }
392 continue;
393 }
394
395 if target_path.is_file() {
397 let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
398 graph.add_node(Node {
399 path: edge.target.clone(),
400 node_type: NodeType::External,
401 hash,
402 graph: Some(".".into()),
403 is_graph: false,
404 metadata: HashMap::new(),
405 });
406 }
407 }
409
410 pending_edges.extend(implicit_edges);
412 for edge in &pending_edges {
413 if is_uri(&edge.target) || graph.target_properties.contains_key(&edge.target) {
414 continue;
415 }
416 let target_path = root.join(&edge.target);
417 let is_symlink = target_path.is_symlink();
418 let is_directory = target_path.is_dir();
419 let symlink_target = if is_symlink {
420 std::fs::read_link(&target_path)
421 .ok()
422 .map(|p| p.to_string_lossy().to_string())
423 } else {
424 None
425 };
426 graph.target_properties.insert(
427 edge.target.clone(),
428 TargetProperties {
429 is_symlink,
430 is_directory,
431 symlink_target,
432 },
433 );
434 }
435
436 for edge in pending_edges {
438 graph.add_edge(edge);
439 }
440
441 if let Some(ref iface) = config.interface {
443 let ignore_set = crate::config::compile_globs(&iface.ignore)?;
444
445 let mut resolved = Vec::new();
446 for pattern in &iface.files {
447 if let Ok(glob) = globset::Glob::new(pattern) {
448 let matcher = glob.compile_matcher();
449 for path in graph.nodes.keys() {
450 if matcher.is_match(path) {
451 resolved.push(path.clone());
452 }
453 }
454 } else {
455 if graph.nodes.contains_key(pattern) {
456 resolved.push(pattern.clone());
457 }
458 }
459 }
460
461 if let Some(ref ignore) = ignore_set {
462 resolved.retain(|p| !ignore.is_match(p));
463 }
464
465 resolved.sort();
466 resolved.dedup();
467 graph.interface = resolved;
468 }
469
470 Ok(graph)
471}
472
473pub fn normalize_relative_path(path: &str) -> String {
477 let mut parts: Vec<String> = Vec::new();
478 for component in Path::new(path).components() {
479 match component {
480 std::path::Component::CurDir => {}
481 std::path::Component::ParentDir => {
482 if parts.last().is_some_and(|p| p != "..") {
484 parts.pop();
485 } else {
486 parts.push("..".to_string());
487 }
488 }
489 std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
490 _ => {}
491 }
492 }
493 parts.join("/")
494}
495
496struct NormalizedTarget {
498 target: String,
500 fragment: Option<String>,
502}
503
504fn normalize_link_target(raw: &str) -> Option<NormalizedTarget> {
508 let target = raw.trim();
509 if target.is_empty() {
510 return None;
511 }
512
513 if target.starts_with('#') {
515 return None;
516 }
517
518 let (base, fragment) = match target.find('#') {
520 Some(idx) => (&target[..idx], Some(target[idx..].to_string())),
521 None => (target, None),
522 };
523
524 if base.is_empty() {
526 return None;
527 }
528
529 Some(NormalizedTarget {
530 target: base.to_string(),
531 fragment,
532 })
533}
534
535pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
538 let source_path = Path::new(source_file);
539 let source_dir = source_path.parent().unwrap_or(Path::new(""));
540 let joined = source_dir.join(raw_target);
541 normalize_relative_path(&joined.to_string_lossy())
542}
543
544#[cfg(test)]
545pub mod test_helpers {
546 use super::*;
547
548 pub fn make_node(path: &str) -> Node {
549 Node {
550 path: path.into(),
551 node_type: NodeType::File,
552 hash: None,
553 graph: Some(".".into()),
554 is_graph: false,
555 metadata: HashMap::new(),
556 }
557 }
558
559 pub fn make_edge(source: &str, target: &str) -> Edge {
560 Edge {
561 source: source.into(),
562 target: target.into(),
563 link: None,
564 parser: "markdown".into(),
565 }
566 }
567
568 pub fn make_enriched(graph: Graph) -> crate::analyses::EnrichedGraph {
569 crate::analyses::enrich_graph(
570 graph,
571 std::path::Path::new("."),
572 &crate::config::Config::defaults(),
573 None,
574 )
575 }
576
577 pub fn make_enriched_with_root(
578 graph: Graph,
579 root: &std::path::Path,
580 ) -> crate::analyses::EnrichedGraph {
581 crate::analyses::enrich_graph(graph, root, &crate::config::Config::defaults(), None)
582 }
583}
584
585#[cfg(test)]
586mod tests {
587 use super::*;
588
589 #[test]
590 fn normalize_simple() {
591 assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
592 }
593
594 #[test]
595 fn normalize_dot() {
596 assert_eq!(normalize_relative_path("./a/./b"), "a/b");
597 }
598
599 #[test]
600 fn normalize_dotdot() {
601 assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
602 }
603
604 #[test]
605 fn normalize_preserves_leading_dotdot() {
606 assert_eq!(normalize_relative_path("../a"), "../a");
607 }
608
609 #[test]
610 fn normalize_deep_escape() {
611 assert_eq!(normalize_relative_path("../../a"), "../../a");
612 }
613
614 #[test]
615 fn normalize_escape_after_descent() {
616 assert_eq!(
618 normalize_relative_path("guides/../../README.md"),
619 "../README.md"
620 );
621 }
622
623 #[test]
624 fn resolve_same_dir() {
625 assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
626 }
627
628 #[test]
629 fn resolve_subdir() {
630 assert_eq!(
631 resolve_link("guides/intro.md", "setup.md"),
632 "guides/setup.md"
633 );
634 }
635
636 #[test]
637 fn resolve_parent() {
638 assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
639 }
640
641 #[test]
642 fn graph_adjacency() {
643 let mut g = Graph::new();
644 g.add_node(Node {
645 path: "a.md".into(),
646 node_type: NodeType::File,
647 hash: None,
648 graph: None,
649 is_graph: false,
650 metadata: HashMap::new(),
651 });
652 g.add_node(Node {
653 path: "b.md".into(),
654 node_type: NodeType::File,
655 hash: None,
656 graph: None,
657 is_graph: false,
658 metadata: HashMap::new(),
659 });
660 g.add_edge(Edge {
661 source: "a.md".into(),
662 target: "b.md".into(),
663 link: None,
664 parser: "markdown".into(),
665 });
666 assert_eq!(g.forward["a.md"], vec![0]);
667 assert_eq!(g.reverse["b.md"], vec![0]);
668 assert!(!g.forward.contains_key("b.md"));
669 }
670
671 #[test]
672 fn fragment_edge_resolves_to_node() {
673 let mut g = Graph::new();
674 g.add_node(test_helpers::make_node("a.md"));
675 g.add_node(test_helpers::make_node("b.md"));
676 g.add_edge(Edge {
677 source: "a.md".into(),
678 target: "b.md".into(),
679 link: Some("b.md#heading".into()),
680 parser: "markdown".into(),
681 });
682 assert_eq!(g.edges[0].target, "b.md");
684 assert_eq!(g.edges[0].link.as_deref(), Some("b.md#heading"));
686 assert_eq!(g.reverse["b.md"], vec![0]);
688 }
689
690 #[test]
691 fn is_uri_detects_schemes() {
692 assert!(is_uri("http://example.com"));
693 assert!(is_uri("https://example.com"));
694 assert!(is_uri("mailto:user@example.com"));
695 assert!(is_uri("ftp://files.example.com"));
696 assert!(is_uri("tel:+1234567890"));
697 assert!(is_uri("ssh://git@github.com"));
698 assert!(is_uri("custom+scheme://foo"));
699 }
700
701 #[test]
702 fn is_uri_rejects_paths() {
703 assert!(!is_uri("setup.md"));
704 assert!(!is_uri("./relative/path.md"));
705 assert!(!is_uri("../parent.md"));
706 assert!(!is_uri("#heading"));
707 assert!(!is_uri(""));
708 assert!(!is_uri("path/with:colon.md")); }
710
711 #[test]
712 fn normalize_strips_fragment() {
713 let n = normalize_link_target("file.md#heading").unwrap();
714 assert_eq!(n.target, "file.md");
715 assert_eq!(n.fragment.as_deref(), Some("#heading"));
716 }
717
718 #[test]
719 fn normalize_strips_uri_fragment() {
720 let n = normalize_link_target("https://example.com/page#section").unwrap();
721 assert_eq!(n.target, "https://example.com/page");
722 assert_eq!(n.fragment.as_deref(), Some("#section"));
723 }
724
725 #[test]
726 fn normalize_drops_anchor_only() {
727 assert!(normalize_link_target("#heading").is_none());
728 }
729
730 #[test]
731 fn normalize_drops_empty() {
732 assert!(normalize_link_target("").is_none());
733 assert!(normalize_link_target(" ").is_none());
734 }
735
736 #[test]
737 fn normalize_preserves_mailto() {
738 let n = normalize_link_target("mailto:user@example.com").unwrap();
739 assert_eq!(n.target, "mailto:user@example.com");
740 assert!(n.fragment.is_none());
741 }
742}