1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9pub fn is_uri(target: &str) -> bool {
12 let bytes = target.as_bytes();
13 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
14 return false;
15 }
16 for &b in &bytes[1..] {
17 if b == b':' {
18 return true;
19 }
20 if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
21 return false;
22 }
23 }
24 false
25}
26
27#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30#[serde(rename_all = "lowercase")]
31pub enum NodeType {
32 File,
33 Directory,
34 External,
35}
36
37#[derive(Debug, Clone, serde::Serialize)]
38pub struct Node {
39 pub path: String,
40 pub node_type: NodeType,
41 pub hash: Option<String>,
42 #[serde(skip_serializing_if = "Option::is_none")]
45 pub graph: Option<String>,
46 #[serde(skip_serializing_if = "std::ops::Not::not")]
48 pub is_graph: bool,
49 #[serde(skip_serializing_if = "HashMap::is_empty")]
51 pub metadata: HashMap<String, serde_json::Value>,
52}
53
54#[derive(Debug, Clone)]
55pub struct Edge {
56 pub source: String,
57 pub target: String,
60 pub link: Option<String>,
63 pub parser: String,
65}
66
67#[derive(Debug, Clone, Default)]
70pub struct TargetProperties {
71 pub is_symlink: bool,
72 pub is_directory: bool,
73 pub symlink_target: Option<String>,
74}
75
76#[derive(Debug, Default)]
77pub struct Graph {
78 pub nodes: HashMap<String, Node>,
79 pub edges: Vec<Edge>,
80 pub forward: HashMap<String, Vec<usize>>,
81 pub reverse: HashMap<String, Vec<usize>>,
82 pub child_graphs: Vec<String>,
83 pub interface: Vec<String>,
85 pub target_properties: HashMap<String, TargetProperties>,
87}
88
89impl Graph {
90 pub fn new() -> Self {
91 Self::default()
92 }
93
94 pub fn add_node(&mut self, node: Node) {
95 self.nodes.insert(node.path.clone(), node);
96 }
97
98 pub fn is_file_node(&self, path: &str) -> bool {
101 self.nodes
102 .get(path)
103 .is_some_and(|n| n.node_type == NodeType::File)
104 }
105
106 pub fn add_edge(&mut self, edge: Edge) {
107 let idx = self.edges.len();
108 self.forward
109 .entry(edge.source.clone())
110 .or_default()
111 .push(idx);
112 self.reverse
113 .entry(edge.target.clone())
114 .or_default()
115 .push(idx);
116 self.edges.push(edge);
117 }
118
119 pub fn target_props(&self, target: &str) -> Option<&TargetProperties> {
121 self.target_properties.get(target)
122 }
123
124 pub fn filter_by_parsers(&self, parsers: &[String]) -> Graph {
127 let mut filtered = Graph {
128 nodes: self.nodes.clone(),
129 child_graphs: self.child_graphs.clone(),
130 interface: self.interface.clone(),
131 target_properties: self.target_properties.clone(),
132 ..Default::default()
133 };
134
135 for edge in &self.edges {
136 if parsers.iter().any(|p| p == &edge.parser) {
137 filtered.add_edge(edge.clone());
138 }
139 }
140
141 filtered
142 }
143}
144
145pub fn hash_bytes(content: &[u8]) -> String {
147 format!("b3:{}", blake3::hash(content).to_hex())
148}
149
150fn promote_interface_files(
154 root: &Path,
155 child_name: &str,
156 graph: &mut Graph,
157 implicit_edges: &mut Vec<Edge>,
158) {
159 let child_dir = root.join(child_name);
160 let config = match Config::load(&child_dir) {
161 Ok(c) => c,
162 Err(_) => return,
163 };
164
165 let (interface_files, interface_ignore) = match &config.interface {
166 Some(iface) => (&iface.files, &iface.ignore),
167 None => return,
168 };
169
170 let mut exclude_patterns = config.exclude.clone();
172 exclude_patterns.extend(interface_ignore.iter().cloned());
173
174 let included = match discover(&child_dir, interface_files, &exclude_patterns) {
175 Ok(files) => files,
176 Err(_) => return,
177 };
178
179 for file in included {
180 let node_path = format!("{child_name}/{file}");
181 if graph.nodes.contains_key(&node_path) {
182 continue;
183 }
184 let file_path = child_dir.join(&file);
185 let hash = std::fs::read(&file_path).ok().map(|c| hash_bytes(&c));
186 graph.add_node(Node {
187 path: node_path.clone(),
188 node_type: NodeType::External,
189 hash,
190 graph: Some(child_name.into()),
191 is_graph: false,
192 metadata: HashMap::new(),
193 });
194 implicit_edges.push(Edge {
195 source: node_path,
196 target: child_name.into(),
197 link: None,
198 parser: String::new(),
199 });
200 }
201}
202
203pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
210 let included_files = discover(root, &config.include, &config.exclude)?;
211 let child_graphs = find_child_graphs(root, &config.exclude)?;
212 let mut graph = Graph::new();
213 graph.child_graphs = child_graphs;
214 let mut pending_edges = Vec::new();
215
216 let mut file_text: HashMap<String, String> = HashMap::new(); for file in &included_files {
221 let file_path = root.join(file);
222 let raw = std::fs::read(&file_path)?;
223 let hash = hash_bytes(&raw);
224
225 graph.add_node(Node {
226 path: file.clone(),
227 node_type: NodeType::File,
228 hash: Some(hash),
229 graph: Some(".".into()),
230 is_graph: false,
231 metadata: HashMap::new(),
232 });
233
234 if let Ok(text) = String::from_utf8(raw) {
236 file_text.insert(file.clone(), text);
237 }
238 }
239
240 let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref(), root);
242 let mut parser_files: Vec<Vec<String>> = vec![Vec::new(); parser_list.len()];
243
244 for file in &included_files {
245 for (i, parser) in parser_list.iter().enumerate() {
246 if parser.matches(file) {
247 parser_files[i].push(file.clone());
248 }
249 }
250 }
251
252 for (i, parser) in parser_list.iter().enumerate() {
254 let files: Vec<(&str, &str)> = parser_files[i]
255 .iter()
256 .filter_map(|path| {
257 file_text
258 .get(path)
259 .map(|content| (path.as_str(), content.as_str()))
260 })
261 .collect();
262
263 if files.is_empty() {
264 continue;
265 }
266
267 let batch_results = parser.parse_batch(&files);
268
269 for (file, result) in batch_results {
270 if let Some(metadata) = result.metadata
272 && let Some(node) = graph.nodes.get_mut(&file)
273 {
274 node.metadata.insert(parser.name().to_string(), metadata);
275 }
276
277 for link in result.links {
278 let normalized = match normalize_link_target(&link) {
279 Some(n) => n,
280 None => continue, };
282
283 let target = if is_uri(&normalized.target) {
284 normalized.target
285 } else {
286 resolve_link(&file, &normalized.target)
287 };
288 let link = normalized.fragment.map(|frag| format!("{target}{frag}"));
290 pending_edges.push(Edge {
291 source: file.clone(),
292 target,
293 link,
294 parser: parser.name().to_string(),
295 });
296 }
297 }
298 }
299
300 let graph_prefixes: Vec<String> = graph.child_graphs.clone();
305
306 let mut implicit_edges = Vec::new();
314 for edge in &pending_edges {
315 if graph.nodes.contains_key(&edge.target) {
316 continue;
317 }
318
319 if is_uri(&edge.target) {
321 graph.add_node(Node {
322 path: edge.target.clone(),
323 node_type: NodeType::External,
324 hash: None,
325 graph: None,
326 is_graph: false,
327 metadata: HashMap::new(),
328 });
329 continue;
330 }
331
332 if edge.target.starts_with("../") || edge.target == ".." {
334 graph.add_node(Node {
335 path: edge.target.clone(),
336 node_type: NodeType::External,
337 hash: None,
338 graph: Some("..".into()),
339 is_graph: false,
340 metadata: HashMap::new(),
341 });
342 continue;
343 }
344
345 let in_child_graph = graph_prefixes
347 .iter()
348 .find(|s| edge.target.starts_with(&format!("{s}/")));
349 if let Some(child_name) = in_child_graph {
350 let target_path = root.join(&edge.target);
351 if target_path.is_file() {
352 let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
353 graph.add_node(Node {
354 path: edge.target.clone(),
355 node_type: NodeType::External,
356 hash,
357 graph: Some(child_name.clone()),
358 is_graph: false,
359 metadata: HashMap::new(),
360 });
361 if !graph.nodes.contains_key(child_name) {
363 let child_dir = root.join(child_name);
364 let config_hash = std::fs::read(child_dir.join("drft.toml"))
365 .ok()
366 .map(|c| hash_bytes(&c));
367 graph.add_node(Node {
368 path: child_name.clone(),
369 node_type: NodeType::Directory,
370 hash: config_hash,
371 graph: Some(".".into()),
372 is_graph: true,
373 metadata: HashMap::new(),
374 });
375 promote_interface_files(root, child_name, &mut graph, &mut implicit_edges);
377 }
378 implicit_edges.push(Edge {
380 source: edge.target.clone(),
381 target: child_name.clone(),
382 link: None,
383 parser: edge.parser.clone(),
384 });
385 }
386 continue;
387 }
388
389 let target_path = root.join(&edge.target);
390
391 if target_path.is_dir() {
393 let has_config = target_path.join("drft.toml").exists();
394 let hash = if has_config {
395 std::fs::read(target_path.join("drft.toml"))
396 .ok()
397 .map(|c| hash_bytes(&c))
398 } else {
399 None
400 };
401 graph.add_node(Node {
402 path: edge.target.clone(),
403 node_type: NodeType::Directory,
404 hash,
405 graph: Some(".".into()),
406 is_graph: has_config,
407 metadata: HashMap::new(),
408 });
409 if has_config {
410 promote_interface_files(root, &edge.target, &mut graph, &mut implicit_edges);
411 }
412 continue;
413 }
414
415 if target_path.is_file() {
417 let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
418 graph.add_node(Node {
419 path: edge.target.clone(),
420 node_type: NodeType::External,
421 hash,
422 graph: Some(".".into()),
423 is_graph: false,
424 metadata: HashMap::new(),
425 });
426 }
427 }
429
430 pending_edges.extend(implicit_edges);
432 for edge in &pending_edges {
433 if is_uri(&edge.target) || graph.target_properties.contains_key(&edge.target) {
434 continue;
435 }
436 let target_path = root.join(&edge.target);
437 let is_symlink = target_path.is_symlink();
438 let is_directory = target_path.is_dir();
439 let symlink_target = if is_symlink {
440 std::fs::read_link(&target_path)
441 .ok()
442 .map(|p| p.to_string_lossy().to_string())
443 } else {
444 None
445 };
446 graph.target_properties.insert(
447 edge.target.clone(),
448 TargetProperties {
449 is_symlink,
450 is_directory,
451 symlink_target,
452 },
453 );
454 }
455
456 for edge in pending_edges {
458 graph.add_edge(edge);
459 }
460
461 if let Some(ref iface) = config.interface {
463 let ignore_set = crate::config::compile_globs(&iface.ignore)?;
464
465 let mut resolved = Vec::new();
466 for pattern in &iface.files {
467 if let Ok(glob) = globset::Glob::new(pattern) {
468 let matcher = glob.compile_matcher();
469 for path in graph.nodes.keys() {
470 if matcher.is_match(path) {
471 resolved.push(path.clone());
472 }
473 }
474 } else {
475 if graph.nodes.contains_key(pattern) {
476 resolved.push(pattern.clone());
477 }
478 }
479 }
480
481 if let Some(ref ignore) = ignore_set {
482 resolved.retain(|p| !ignore.is_match(p));
483 }
484
485 resolved.sort();
486 resolved.dedup();
487 graph.interface = resolved;
488 }
489
490 Ok(graph)
491}
492
493pub fn normalize_relative_path(path: &str) -> String {
497 let mut parts: Vec<String> = Vec::new();
498 for component in Path::new(path).components() {
499 match component {
500 std::path::Component::CurDir => {}
501 std::path::Component::ParentDir => {
502 if parts.last().is_some_and(|p| p != "..") {
504 parts.pop();
505 } else {
506 parts.push("..".to_string());
507 }
508 }
509 std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
510 _ => {}
511 }
512 }
513 parts.join("/")
514}
515
516struct NormalizedTarget {
518 target: String,
520 fragment: Option<String>,
522}
523
524fn normalize_link_target(raw: &str) -> Option<NormalizedTarget> {
528 let target = raw.trim();
529 if target.is_empty() {
530 return None;
531 }
532
533 if target.starts_with('#') {
535 return None;
536 }
537
538 let (base, fragment) = match target.find('#') {
540 Some(idx) => (&target[..idx], Some(target[idx..].to_string())),
541 None => (target, None),
542 };
543
544 if base.is_empty() {
546 return None;
547 }
548
549 Some(NormalizedTarget {
550 target: base.to_string(),
551 fragment,
552 })
553}
554
555pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
558 let source_path = Path::new(source_file);
559 let source_dir = source_path.parent().unwrap_or(Path::new(""));
560 let joined = source_dir.join(raw_target);
561 normalize_relative_path(&joined.to_string_lossy())
562}
563
564#[cfg(test)]
565pub mod test_helpers {
566 use super::*;
567
568 pub fn make_node(path: &str) -> Node {
569 Node {
570 path: path.into(),
571 node_type: NodeType::File,
572 hash: None,
573 graph: Some(".".into()),
574 is_graph: false,
575 metadata: HashMap::new(),
576 }
577 }
578
579 pub fn make_edge(source: &str, target: &str) -> Edge {
580 Edge {
581 source: source.into(),
582 target: target.into(),
583 link: None,
584 parser: "markdown".into(),
585 }
586 }
587
588 pub fn make_enriched(graph: Graph) -> crate::analyses::EnrichedGraph {
589 crate::analyses::enrich_graph(
590 graph,
591 std::path::Path::new("."),
592 &crate::config::Config::defaults(),
593 None,
594 )
595 }
596
597 pub fn make_enriched_with_root(
598 graph: Graph,
599 root: &std::path::Path,
600 ) -> crate::analyses::EnrichedGraph {
601 crate::analyses::enrich_graph(graph, root, &crate::config::Config::defaults(), None)
602 }
603}
604
605#[cfg(test)]
606mod tests {
607 use super::*;
608
609 #[test]
610 fn normalize_simple() {
611 assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
612 }
613
614 #[test]
615 fn normalize_dot() {
616 assert_eq!(normalize_relative_path("./a/./b"), "a/b");
617 }
618
619 #[test]
620 fn normalize_dotdot() {
621 assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
622 }
623
624 #[test]
625 fn normalize_preserves_leading_dotdot() {
626 assert_eq!(normalize_relative_path("../a"), "../a");
627 }
628
629 #[test]
630 fn normalize_deep_escape() {
631 assert_eq!(normalize_relative_path("../../a"), "../../a");
632 }
633
634 #[test]
635 fn normalize_escape_after_descent() {
636 assert_eq!(
638 normalize_relative_path("guides/../../README.md"),
639 "../README.md"
640 );
641 }
642
643 #[test]
644 fn resolve_same_dir() {
645 assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
646 }
647
648 #[test]
649 fn resolve_subdir() {
650 assert_eq!(
651 resolve_link("guides/intro.md", "setup.md"),
652 "guides/setup.md"
653 );
654 }
655
656 #[test]
657 fn resolve_parent() {
658 assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
659 }
660
661 #[test]
662 fn graph_adjacency() {
663 let mut g = Graph::new();
664 g.add_node(Node {
665 path: "a.md".into(),
666 node_type: NodeType::File,
667 hash: None,
668 graph: None,
669 is_graph: false,
670 metadata: HashMap::new(),
671 });
672 g.add_node(Node {
673 path: "b.md".into(),
674 node_type: NodeType::File,
675 hash: None,
676 graph: None,
677 is_graph: false,
678 metadata: HashMap::new(),
679 });
680 g.add_edge(Edge {
681 source: "a.md".into(),
682 target: "b.md".into(),
683 link: None,
684 parser: "markdown".into(),
685 });
686 assert_eq!(g.forward["a.md"], vec![0]);
687 assert_eq!(g.reverse["b.md"], vec![0]);
688 assert!(!g.forward.contains_key("b.md"));
689 }
690
691 #[test]
692 fn fragment_edge_resolves_to_node() {
693 let mut g = Graph::new();
694 g.add_node(test_helpers::make_node("a.md"));
695 g.add_node(test_helpers::make_node("b.md"));
696 g.add_edge(Edge {
697 source: "a.md".into(),
698 target: "b.md".into(),
699 link: Some("b.md#heading".into()),
700 parser: "markdown".into(),
701 });
702 assert_eq!(g.edges[0].target, "b.md");
704 assert_eq!(g.edges[0].link.as_deref(), Some("b.md#heading"));
706 assert_eq!(g.reverse["b.md"], vec![0]);
708 }
709
710 #[test]
711 fn is_uri_detects_schemes() {
712 assert!(is_uri("http://example.com"));
713 assert!(is_uri("https://example.com"));
714 assert!(is_uri("mailto:user@example.com"));
715 assert!(is_uri("ftp://files.example.com"));
716 assert!(is_uri("tel:+1234567890"));
717 assert!(is_uri("ssh://git@github.com"));
718 assert!(is_uri("custom+scheme://foo"));
719 }
720
721 #[test]
722 fn is_uri_rejects_paths() {
723 assert!(!is_uri("setup.md"));
724 assert!(!is_uri("./relative/path.md"));
725 assert!(!is_uri("../parent.md"));
726 assert!(!is_uri("#heading"));
727 assert!(!is_uri(""));
728 assert!(!is_uri("path/with:colon.md")); }
730
731 #[test]
732 fn normalize_strips_fragment() {
733 let n = normalize_link_target("file.md#heading").unwrap();
734 assert_eq!(n.target, "file.md");
735 assert_eq!(n.fragment.as_deref(), Some("#heading"));
736 }
737
738 #[test]
739 fn normalize_strips_uri_fragment() {
740 let n = normalize_link_target("https://example.com/page#section").unwrap();
741 assert_eq!(n.target, "https://example.com/page");
742 assert_eq!(n.fragment.as_deref(), Some("#section"));
743 }
744
745 #[test]
746 fn normalize_drops_anchor_only() {
747 assert!(normalize_link_target("#heading").is_none());
748 }
749
750 #[test]
751 fn normalize_drops_empty() {
752 assert!(normalize_link_target("").is_none());
753 assert!(normalize_link_target(" ").is_none());
754 }
755
756 #[test]
757 fn normalize_preserves_mailto() {
758 let n = normalize_link_target("mailto:user@example.com").unwrap();
759 assert_eq!(n.target, "mailto:user@example.com");
760 assert!(n.fragment.is_none());
761 }
762
763 #[test]
764 fn filter_by_single_parser() {
765 let mut g = Graph::new();
766 g.add_node(test_helpers::make_node("a.md"));
767 g.add_node(test_helpers::make_node("b.md"));
768 g.add_node(test_helpers::make_node("c.md"));
769 g.add_edge(Edge {
770 source: "a.md".into(),
771 target: "b.md".into(),
772 link: None,
773 parser: "markdown".into(),
774 });
775 g.add_edge(Edge {
776 source: "a.md".into(),
777 target: "c.md".into(),
778 link: None,
779 parser: "frontmatter".into(),
780 });
781
782 let filtered = g.filter_by_parsers(&["frontmatter".into()]);
783 assert_eq!(filtered.edges.len(), 1);
784 assert_eq!(filtered.edges[0].target, "c.md");
785 assert_eq!(filtered.edges[0].parser, "frontmatter");
786 }
787
788 #[test]
789 fn filter_preserves_all_nodes() {
790 let mut g = Graph::new();
791 g.add_node(test_helpers::make_node("a.md"));
792 g.add_node(test_helpers::make_node("b.md"));
793 g.add_edge(Edge {
794 source: "a.md".into(),
795 target: "b.md".into(),
796 link: None,
797 parser: "markdown".into(),
798 });
799
800 let filtered = g.filter_by_parsers(&["frontmatter".into()]);
801 assert_eq!(filtered.nodes.len(), 2);
802 assert!(filtered.nodes.contains_key("a.md"));
803 assert!(filtered.nodes.contains_key("b.md"));
804 assert!(filtered.edges.is_empty());
805 }
806
807 #[test]
808 fn filter_rebuilds_adjacency_maps() {
809 let mut g = Graph::new();
810 g.add_node(test_helpers::make_node("a.md"));
811 g.add_node(test_helpers::make_node("b.md"));
812 g.add_node(test_helpers::make_node("c.md"));
813 g.add_edge(Edge {
814 source: "a.md".into(),
815 target: "b.md".into(),
816 link: None,
817 parser: "markdown".into(),
818 });
819 g.add_edge(Edge {
820 source: "a.md".into(),
821 target: "c.md".into(),
822 link: None,
823 parser: "frontmatter".into(),
824 });
825
826 let filtered = g.filter_by_parsers(&["frontmatter".into()]);
827 assert_eq!(filtered.forward["a.md"], vec![0]);
828 assert_eq!(filtered.reverse["c.md"], vec![0]);
829 assert!(!filtered.reverse.contains_key("b.md"));
830 }
831
832 #[test]
833 fn filter_by_multiple_parsers() {
834 let mut g = Graph::new();
835 g.add_node(test_helpers::make_node("a.md"));
836 g.add_node(test_helpers::make_node("b.md"));
837 g.add_node(test_helpers::make_node("c.md"));
838 g.add_edge(Edge {
839 source: "a.md".into(),
840 target: "b.md".into(),
841 link: None,
842 parser: "markdown".into(),
843 });
844 g.add_edge(Edge {
845 source: "a.md".into(),
846 target: "c.md".into(),
847 link: None,
848 parser: "frontmatter".into(),
849 });
850
851 let filtered = g.filter_by_parsers(&["markdown".into(), "frontmatter".into()]);
852 assert_eq!(filtered.edges.len(), 2);
853 }
854
855 #[test]
856 fn filter_empty_parsers_removes_all_edges() {
857 let mut g = Graph::new();
858 g.add_node(test_helpers::make_node("a.md"));
859 g.add_node(test_helpers::make_node("b.md"));
860 g.add_edge(test_helpers::make_edge("a.md", "b.md"));
861
862 let filtered = g.filter_by_parsers(&[]);
863 assert!(filtered.edges.is_empty());
864 assert_eq!(filtered.nodes.len(), 2);
865 }
866}