Skip to main content

drft/
graph.rs

1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9/// Check if a target string is a valid URI.
10///
11/// Uses the `url` crate (WHATWG URL Standard) for parsing, then filters to
12/// URIs that either have authority (`://`) or use a known opaque scheme.
13/// Without this filter, any `word:stuff` passes WHATWG parsing — e.g.,
14/// YAML values like `name: foo` would be treated as URIs with scheme `name`.
15pub fn is_uri(target: &str) -> bool {
16    match url::Url::parse(target) {
17        Ok(url) => {
18            if url.has_authority() {
19                return true;
20            }
21            matches!(
22                url.scheme(),
23                "mailto" | "tel" | "data" | "urn" | "javascript"
24            )
25        }
26        Err(_) => false,
27    }
28}
29
30#[derive(
31    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
32)]
33#[serde(rename_all = "lowercase")]
34pub enum NodeType {
35    File,
36    Directory,
37    External,
38}
39
40#[derive(Debug, Clone, serde::Serialize)]
41pub struct Node {
42    pub path: String,
43    pub node_type: NodeType,
44    pub hash: Option<String>,
45    /// Which graph this node belongs to — mirrors filesystem directory entries:
46    /// `"."` = local, `".."` = parent (escape), `"child"` = child graph, `None` = not on filesystem.
47    #[serde(skip_serializing_if = "Option::is_none")]
48    pub graph: Option<String>,
49    /// True when this Directory node has a drft.toml (is a drft graph).
50    #[serde(skip_serializing_if = "std::ops::Not::not")]
51    pub is_graph: bool,
52    /// Structured metadata from parsers, keyed by parser name.
53    #[serde(skip_serializing_if = "HashMap::is_empty")]
54    pub metadata: HashMap<String, serde_json::Value>,
55    /// True when this node was matched by `include` during discovery.
56    /// False for nodes discovered via edge targets (outside include, child graph files, etc.).
57    #[serde(default)]
58    pub included: bool,
59}
60
61#[derive(Debug, Clone)]
62pub struct Edge {
63    pub source: String,
64    /// Node identity — always matches a key in `graph.nodes` (or is a dangling target).
65    /// Fragment-stripped: `bar.md`, not `bar.md#heading`.
66    pub target: String,
67    /// Original link when it differs from target (e.g., `bar.md#heading`).
68    /// Absent when the link resolved to exactly the node ID.
69    pub link: Option<String>,
70    /// Which parser discovered this edge (provenance).
71    pub parser: String,
72}
73
74/// Filesystem properties of an edge target, probed during graph building.
75/// Stored per-target on the Graph, not per-edge.
76#[derive(Debug, Clone, Default, serde::Serialize)]
77pub struct TargetProperties {
78    pub is_symlink: bool,
79    pub is_directory: bool,
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub symlink_target: Option<String>,
82}
83
84#[derive(Debug, Default)]
85pub struct Graph {
86    pub nodes: HashMap<String, Node>,
87    pub edges: Vec<Edge>,
88    pub forward: HashMap<String, Vec<usize>>,
89    pub reverse: HashMap<String, Vec<usize>>,
90    pub child_graphs: Vec<String>,
91    /// Resolved interface nodes from config (empty = open graph).
92    pub interface: Vec<String>,
93    /// Filesystem properties of edge targets, keyed by node identity (fragment-stripped).
94    pub target_properties: HashMap<String, TargetProperties>,
95}
96
97impl Graph {
98    pub fn new() -> Self {
99        Self::default()
100    }
101
102    pub fn add_node(&mut self, node: Node) {
103        self.nodes.insert(node.path.clone(), node);
104    }
105
106    /// Returns true for File nodes (excludes External and Directory).
107    pub fn is_file_node(&self, path: &str) -> bool {
108        self.nodes
109            .get(path)
110            .is_some_and(|n| n.node_type == NodeType::File)
111    }
112
113    /// Returns true when the node was matched by `include` during discovery.
114    pub fn is_included_node(&self, path: &str) -> bool {
115        self.nodes.get(path).is_some_and(|n| n.included)
116    }
117
118    /// Returns true when both endpoints are included nodes (the edge stays
119    /// within the declared scope of the graph).
120    pub fn is_internal_edge(&self, edge: &Edge) -> bool {
121        self.is_included_node(&edge.source) && self.is_included_node(&edge.target)
122    }
123
124    pub fn add_edge(&mut self, edge: Edge) {
125        let idx = self.edges.len();
126        self.forward
127            .entry(edge.source.clone())
128            .or_default()
129            .push(idx);
130        self.reverse
131            .entry(edge.target.clone())
132            .or_default()
133            .push(idx);
134        self.edges.push(edge);
135    }
136
137    /// Get filesystem properties for an edge target.
138    pub fn target_props(&self, target: &str) -> Option<&TargetProperties> {
139        self.target_properties.get(target)
140    }
141
142    /// Create a new graph containing only edges from the specified parsers.
143    /// All nodes are preserved. Adjacency maps are rebuilt for the filtered edge set.
144    pub fn filter_by_parsers(&self, parsers: &[String]) -> Graph {
145        let mut filtered = Graph {
146            nodes: self.nodes.clone(),
147            child_graphs: self.child_graphs.clone(),
148            interface: self.interface.clone(),
149            target_properties: self.target_properties.clone(),
150            ..Default::default()
151        };
152
153        for edge in &self.edges {
154            if parsers.iter().any(|p| p == &edge.parser) {
155                filtered.add_edge(edge.clone());
156            }
157        }
158
159        filtered
160    }
161}
162
163/// Hash file contents with BLAKE3, returning `b3:<hex>`.
164pub fn hash_bytes(content: &[u8]) -> String {
165    format!("b3:{}", blake3::hash(content).to_hex())
166}
167
168/// Load a child graph's config, and if it declares an `[interface]`,
169/// add each interface file as a File node with a coupling edge
170/// to the child's Directory node.
171fn promote_interface_files(
172    root: &Path,
173    child_name: &str,
174    graph: &mut Graph,
175    implicit_edges: &mut Vec<Edge>,
176) {
177    let child_dir = root.join(child_name);
178    let config = match Config::load(&child_dir) {
179        Ok(c) => c,
180        Err(_) => return,
181    };
182
183    let (interface_files, interface_ignore) = match &config.interface {
184        Some(iface) => (&iface.files, &iface.ignore),
185        None => return,
186    };
187
188    // Resolve interface globs to actual files, honoring child excludes and interface ignores
189    let mut exclude_patterns = config.exclude.clone();
190    exclude_patterns.extend(interface_ignore.iter().cloned());
191
192    let included = match discover(&child_dir, interface_files, &exclude_patterns) {
193        Ok(files) => files,
194        Err(_) => return,
195    };
196
197    for file in included {
198        let node_path = format!("{child_name}/{file}");
199        if graph.nodes.contains_key(&node_path) {
200            continue;
201        }
202        let file_path = child_dir.join(&file);
203        let hash = std::fs::read(&file_path).ok().map(|c| hash_bytes(&c));
204        graph.add_node(Node {
205            path: node_path.clone(),
206            node_type: NodeType::File,
207            hash,
208            graph: Some(child_name.into()),
209            is_graph: false,
210            metadata: HashMap::new(),
211            included: false,
212        });
213        implicit_edges.push(Edge {
214            source: node_path,
215            target: child_name.into(),
216            link: None,
217            parser: String::new(),
218        });
219    }
220}
221
222/// Returns true if `target_path` resolves to a location within `canonical_root`.
223/// Uses canonicalization to resolve symlinks and normalize paths.
224/// Returns false if the path doesn't exist or escapes the root.
225fn is_within_root(target_path: &Path, canonical_root: &Path) -> bool {
226    target_path
227        .canonicalize()
228        .is_ok_and(|canonical| canonical.starts_with(canonical_root))
229}
230
231/// Build a graph from files in `root`.
232///
233/// 1. Discover File nodes via `include`/`exclude` — hash raw bytes for all.
234/// 2. Read text content for parser input (graceful skip for binary files).
235/// 3. Run parsers to extract edges.
236/// 4. Edge targets outside `include` become File nodes (`included: false`).
237pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
238    let canonical_root = root.canonicalize()?;
239    let included_files = discover(root, &config.include, &config.exclude)?;
240    let child_graphs = find_child_graphs(root, &config.exclude)?;
241    let mut graph = Graph::new();
242    graph.child_graphs = child_graphs;
243    let mut pending_edges = Vec::new();
244
245    // 1. Create File nodes for everything in include — hash raw bytes.
246    //    Separately read text content for files parsers will need.
247    let mut file_text: HashMap<String, String> = HashMap::new(); // path → text content
248
249    for file in &included_files {
250        let file_path = root.join(file);
251
252        // Safety: don't read files that resolve outside the graph root (e.g. symlinks).
253        // Still create the node so it's visible, but warn.
254        if !is_within_root(&file_path, &canonical_root) {
255            eprintln!(
256                "warn: included file '{file}' resolves outside the graph root and was not read"
257            );
258            graph.add_node(Node {
259                path: file.clone(),
260                node_type: NodeType::File,
261                hash: None,
262                graph: Some(".".into()),
263                is_graph: false,
264                metadata: HashMap::new(),
265                included: true,
266            });
267            continue;
268        }
269
270        let raw = std::fs::read(&file_path)?;
271        let hash = hash_bytes(&raw);
272
273        graph.add_node(Node {
274            path: file.clone(),
275            node_type: NodeType::File,
276            hash: Some(hash),
277            graph: Some(".".into()),
278            is_graph: false,
279            metadata: HashMap::new(),
280            included: true,
281        });
282
283        // Try to read as text for parser input — binary files just won't have text
284        if let Ok(text) = String::from_utf8(raw) {
285            file_text.insert(file.clone(), text);
286        }
287    }
288
289    // 2. Build parser registry and determine which files each parser receives
290    let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref(), root);
291    let mut parser_files: Vec<Vec<String>> = vec![Vec::new(); parser_list.len()];
292
293    for file in &included_files {
294        for (i, parser) in parser_list.iter().enumerate() {
295            if parser.matches(file) {
296                parser_files[i].push(file.clone());
297            }
298        }
299    }
300
301    // 3. Run each parser in batch mode
302    for (i, parser) in parser_list.iter().enumerate() {
303        let files: Vec<(&str, &str)> = parser_files[i]
304            .iter()
305            .filter_map(|path| {
306                file_text
307                    .get(path)
308                    .map(|content| (path.as_str(), content.as_str()))
309            })
310            .collect();
311
312        if files.is_empty() {
313            continue;
314        }
315
316        let batch_results = parser.parse_batch(&files);
317
318        for (file, result) in batch_results {
319            // Attach metadata to node if parser returned it
320            if let Some(metadata) = result.metadata
321                && let Some(node) = graph.nodes.get_mut(&file)
322            {
323                node.metadata.insert(parser.name().to_string(), metadata);
324            }
325
326            for link in result.links {
327                let normalized = match normalize_link_target(&link) {
328                    Some(n) => n,
329                    None => continue, // filtered (empty, anchor-only)
330                };
331
332                let target = if is_uri(&normalized.target) {
333                    normalized.target
334                } else {
335                    resolve_link(&file, &normalized.target)
336                };
337                // link carries the full original when it has a fragment
338                let link = normalized.fragment.map(|frag| format!("{target}{frag}"));
339                pending_edges.push(Edge {
340                    source: file.clone(),
341                    target,
342                    link,
343                    parser: parser.name().to_string(),
344                });
345            }
346        }
347    }
348
349    // 4. Edge-driven node creation.
350    //    Child graph directories are NOT pre-created — they only get nodes when
351    //    an edge references them or a file inside them. This keeps the model
352    //    uniform: edges create nodes, discovery creates files.
353    let graph_prefixes: Vec<String> = graph.child_graphs.clone();
354
355    // 5. Classify edge targets not already in the graph.
356    //    edge.target is already the node identity (fragment-stripped).
357    //    graph field uses filesystem-relative convention:
358    //      "."           — belongs to current graph
359    //      ".."          — escaped to parent graph
360    //      "research"    — belongs to child graph "research"
361    //      None          — not on the filesystem (URI)
362    let mut implicit_edges = Vec::new();
363    for edge in &pending_edges {
364        if graph.nodes.contains_key(&edge.target) {
365            continue;
366        }
367
368        // URIs → External (not on filesystem)
369        if is_uri(&edge.target) {
370            graph.add_node(Node {
371                path: edge.target.clone(),
372                node_type: NodeType::External,
373                hash: None,
374                graph: None,
375                is_graph: false,
376                metadata: HashMap::new(),
377                included: false,
378            });
379            continue;
380        }
381
382        let target_path = root.join(&edge.target);
383
384        // Safety: targets that logically escape the graph root (../, absolute paths)
385        // get a node but no filesystem access. Prevents directory traversal.
386        // Non-existent targets within root fall through to normal classification
387        // (no node created = dangling-edge).
388        let escapes_root =
389            edge.target.starts_with("../") || edge.target == ".." || edge.target.starts_with('/');
390        if escapes_root {
391            let graph_field = if edge.target.starts_with("../") || edge.target == ".." {
392                Some("..".into())
393            } else {
394                None
395            };
396            graph.add_node(Node {
397                path: edge.target.clone(),
398                node_type: NodeType::File,
399                hash: None,
400                graph: graph_field,
401                is_graph: false,
402                metadata: HashMap::new(),
403                included: false,
404            });
405            continue;
406        }
407
408        // From here: target is logically within root.
409        // Symlink check: verify the resolved path stays within root before any filesystem access.
410        if target_path.exists() && !is_within_root(&target_path, &canonical_root) {
411            graph.add_node(Node {
412                path: edge.target.clone(),
413                node_type: NodeType::File,
414                hash: None,
415                graph: Some(".".into()),
416                is_graph: false,
417                metadata: HashMap::new(),
418                included: false,
419            });
420            continue;
421        }
422
423        // Determine which graph this target belongs to.
424        // (../ and absolute paths already handled by escapes_root above)
425        let graph_field = graph_prefixes
426            .iter()
427            .find(|s| edge.target.starts_with(&format!("{s}/")))
428            .cloned();
429
430        // Child graph target
431        if let Some(ref membership) = graph_field {
432            if target_path.is_file() {
433                let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
434                graph.add_node(Node {
435                    path: edge.target.clone(),
436                    node_type: NodeType::File,
437                    hash,
438                    graph: Some(membership.clone()),
439                    is_graph: false,
440                    metadata: HashMap::new(),
441                    included: false,
442                });
443                // Ensure Directory node exists + coupling edge
444                if !graph.nodes.contains_key(membership.as_str()) {
445                    let child_dir = root.join(membership);
446                    let config_hash = std::fs::read(child_dir.join("drft.toml"))
447                        .ok()
448                        .map(|c| hash_bytes(&c));
449                    graph.add_node(Node {
450                        path: membership.clone(),
451                        node_type: NodeType::Directory,
452                        hash: config_hash,
453                        graph: Some(".".into()),
454                        is_graph: true,
455                        metadata: HashMap::new(),
456                        included: false,
457                    });
458                    promote_interface_files(root, membership, &mut graph, &mut implicit_edges);
459                }
460                implicit_edges.push(Edge {
461                    source: edge.target.clone(),
462                    target: membership.clone(),
463                    link: None,
464                    parser: edge.parser.clone(),
465                });
466            } else if target_path.is_dir() {
467                let has_config = target_path.join("drft.toml").exists();
468                graph.add_node(Node {
469                    path: edge.target.clone(),
470                    node_type: NodeType::Directory,
471                    hash: None,
472                    graph: Some(membership.clone()),
473                    is_graph: has_config,
474                    metadata: HashMap::new(),
475                    included: false,
476                });
477            }
478            // If target doesn't exist: no node created. dangling-edge handles it.
479            continue;
480        }
481
482        // Local target (within current graph scope)
483
484        // Directory on disk → Directory node
485        if target_path.is_dir() {
486            let has_config = target_path.join("drft.toml").exists();
487            let hash = if has_config {
488                std::fs::read(target_path.join("drft.toml"))
489                    .ok()
490                    .map(|c| hash_bytes(&c))
491            } else {
492                None
493            };
494            graph.add_node(Node {
495                path: edge.target.clone(),
496                node_type: NodeType::Directory,
497                hash,
498                graph: Some(".".into()),
499                is_graph: has_config,
500                metadata: HashMap::new(),
501                included: false,
502            });
503            if has_config {
504                promote_interface_files(root, &edge.target, &mut graph, &mut implicit_edges);
505            }
506            continue;
507        }
508
509        // File exists on disk but not in include → File (local, not tracked)
510        if target_path.is_file() {
511            let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
512            graph.add_node(Node {
513                path: edge.target.clone(),
514                node_type: NodeType::File,
515                hash,
516                graph: Some(".".into()),
517                is_graph: false,
518                metadata: HashMap::new(),
519                included: false,
520            });
521        }
522        // If doesn't exist: no node created. dangling-edge rule handles this.
523    }
524
525    // Probe filesystem properties for non-URI edge targets (stored per-target, not per-edge).
526    // Only probe targets within the graph root — no filesystem access for escaped targets.
527    pending_edges.extend(implicit_edges);
528    for edge in &pending_edges {
529        if is_uri(&edge.target) || graph.target_properties.contains_key(&edge.target) {
530            continue;
531        }
532        let target_path = root.join(&edge.target);
533        if !is_within_root(&target_path, &canonical_root) {
534            continue;
535        }
536        let is_symlink = target_path.is_symlink();
537        let is_directory = target_path.is_dir();
538        let symlink_target = if is_symlink {
539            std::fs::read_link(&target_path)
540                .ok()
541                .map(|p| p.to_string_lossy().to_string())
542        } else {
543            None
544        };
545        graph.target_properties.insert(
546            edge.target.clone(),
547            TargetProperties {
548                is_symlink,
549                is_directory,
550                symlink_target,
551            },
552        );
553    }
554
555    // Add all edges (explicit + implicit) to the graph.
556    for edge in pending_edges {
557        graph.add_edge(edge);
558    }
559
560    // Resolve interface from config (files included, ignore excluded)
561    if let Some(ref iface) = config.interface {
562        let ignore_set = crate::config::compile_globs(&iface.ignore)?;
563
564        let mut resolved = Vec::new();
565        for pattern in &iface.files {
566            if let Ok(glob) = globset::Glob::new(pattern) {
567                let matcher = glob.compile_matcher();
568                for path in graph.nodes.keys() {
569                    if matcher.is_match(path) {
570                        resolved.push(path.clone());
571                    }
572                }
573            } else {
574                if graph.nodes.contains_key(pattern) {
575                    resolved.push(pattern.clone());
576                }
577            }
578        }
579
580        if let Some(ref ignore) = ignore_set {
581            resolved.retain(|p| !ignore.is_match(p));
582        }
583
584        resolved.sort();
585        resolved.dedup();
586        graph.interface = resolved;
587    }
588
589    Ok(graph)
590}
591
592/// Normalize a relative path by resolving `.` and `..` components using Path APIs.
593/// Does not touch the filesystem. Always returns forward-slash separated paths.
594/// Preserves leading `..` that escape above the root — these indicate graph escape.
595pub fn normalize_relative_path(path: &str) -> String {
596    let mut parts: Vec<String> = Vec::new();
597    for component in Path::new(path).components() {
598        match component {
599            std::path::Component::CurDir => {}
600            std::path::Component::ParentDir => {
601                // Only pop if there's a normal component to pop (not a leading ..)
602                if parts.last().is_some_and(|p| p != "..") {
603                    parts.pop();
604                } else {
605                    parts.push("..".to_string());
606                }
607            }
608            std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
609            _ => {}
610        }
611    }
612    parts.join("/")
613}
614
615/// Normalized link target: the node identity and optional fragment metadata.
616struct NormalizedTarget {
617    /// The target path or URI with fragment stripped (used for node identity).
618    target: String,
619    /// The fragment portion (e.g., `#heading`), if any. Preserved as edge metadata.
620    fragment: Option<String>,
621}
622
623/// Normalize a raw link target from a parser.
624/// Returns None for targets that should be filtered (empty, anchor-only with no file target).
625/// Strips fragments for node identity but preserves them as metadata.
626fn normalize_link_target(raw: &str) -> Option<NormalizedTarget> {
627    let target = raw.trim();
628    if target.is_empty() {
629        return None;
630    }
631
632    // Anchor-only links (#heading) have no file target — drop them
633    if target.starts_with('#') {
634        return None;
635    }
636
637    // Split target and fragment at the first #
638    let (base, fragment) = match target.find('#') {
639        Some(idx) => (&target[..idx], Some(target[idx..].to_string())),
640        None => (target, None),
641    };
642
643    // After stripping fragment, if nothing remains, drop
644    if base.is_empty() {
645        return None;
646    }
647
648    Some(NormalizedTarget {
649        target: base.to_string(),
650        fragment,
651    })
652}
653
654/// Resolve a link target relative to a source file, producing a path relative to the graph root.
655/// Uses Path::join for correct platform-aware path handling.
656pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
657    let source_path = Path::new(source_file);
658    let source_dir = source_path.parent().unwrap_or(Path::new(""));
659    let joined = source_dir.join(raw_target);
660    normalize_relative_path(&joined.to_string_lossy())
661}
662
663#[cfg(test)]
664pub mod test_helpers {
665    use super::*;
666
667    pub fn make_node(path: &str) -> Node {
668        Node {
669            path: path.into(),
670            node_type: NodeType::File,
671            hash: None,
672            graph: Some(".".into()),
673            is_graph: false,
674            metadata: HashMap::new(),
675            included: true,
676        }
677    }
678
679    pub fn make_edge(source: &str, target: &str) -> Edge {
680        Edge {
681            source: source.into(),
682            target: target.into(),
683            link: None,
684            parser: "markdown".into(),
685        }
686    }
687
688    pub fn make_enriched(graph: Graph) -> crate::analyses::EnrichedGraph {
689        crate::analyses::enrich_graph(
690            graph,
691            std::path::Path::new("."),
692            &crate::config::Config::defaults(),
693            None,
694        )
695    }
696
697    pub fn make_enriched_with_root(
698        graph: Graph,
699        root: &std::path::Path,
700    ) -> crate::analyses::EnrichedGraph {
701        crate::analyses::enrich_graph(graph, root, &crate::config::Config::defaults(), None)
702    }
703}
704
705#[cfg(test)]
706mod tests {
707    use super::*;
708
709    #[test]
710    fn normalize_simple() {
711        assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
712    }
713
714    #[test]
715    fn normalize_dot() {
716        assert_eq!(normalize_relative_path("./a/./b"), "a/b");
717    }
718
719    #[test]
720    fn normalize_dotdot() {
721        assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
722    }
723
724    #[test]
725    fn normalize_preserves_leading_dotdot() {
726        assert_eq!(normalize_relative_path("../a"), "../a");
727    }
728
729    #[test]
730    fn normalize_deep_escape() {
731        assert_eq!(normalize_relative_path("../../a"), "../../a");
732    }
733
734    #[test]
735    fn normalize_escape_after_descent() {
736        // guides/../../README.md -> ../README.md (one level above root)
737        assert_eq!(
738            normalize_relative_path("guides/../../README.md"),
739            "../README.md"
740        );
741    }
742
743    #[test]
744    fn resolve_same_dir() {
745        assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
746    }
747
748    #[test]
749    fn resolve_subdir() {
750        assert_eq!(
751            resolve_link("guides/intro.md", "setup.md"),
752            "guides/setup.md"
753        );
754    }
755
756    #[test]
757    fn resolve_parent() {
758        assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
759    }
760
761    #[test]
762    fn graph_adjacency() {
763        let mut g = Graph::new();
764        g.add_node(Node {
765            path: "a.md".into(),
766            node_type: NodeType::File,
767            hash: None,
768            graph: None,
769            is_graph: false,
770            metadata: HashMap::new(),
771            included: false,
772        });
773        g.add_node(Node {
774            path: "b.md".into(),
775            node_type: NodeType::File,
776            hash: None,
777            graph: None,
778            is_graph: false,
779            metadata: HashMap::new(),
780            included: false,
781        });
782        g.add_edge(Edge {
783            source: "a.md".into(),
784            target: "b.md".into(),
785            link: None,
786            parser: "markdown".into(),
787        });
788        assert_eq!(g.forward["a.md"], vec![0]);
789        assert_eq!(g.reverse["b.md"], vec![0]);
790        assert!(!g.forward.contains_key("b.md"));
791    }
792
793    #[test]
794    fn fragment_edge_resolves_to_node() {
795        let mut g = Graph::new();
796        g.add_node(test_helpers::make_node("a.md"));
797        g.add_node(test_helpers::make_node("b.md"));
798        g.add_edge(Edge {
799            source: "a.md".into(),
800            target: "b.md".into(),
801            link: Some("b.md#heading".into()),
802            parser: "markdown".into(),
803        });
804        // target is the node ID
805        assert_eq!(g.edges[0].target, "b.md");
806        // reference carries the full original
807        assert_eq!(g.edges[0].link.as_deref(), Some("b.md#heading"));
808        // reverse map works directly on target
809        assert_eq!(g.reverse["b.md"], vec![0]);
810    }
811
812    #[test]
813    fn is_uri_detects_schemes() {
814        assert!(is_uri("http://example.com"));
815        assert!(is_uri("https://example.com"));
816        assert!(is_uri("mailto:user@example.com"));
817        assert!(is_uri("ftp://files.example.com"));
818        assert!(is_uri("tel:+1234567890"));
819        assert!(is_uri("ssh://git@github.com"));
820        assert!(is_uri("custom+scheme://foo"));
821    }
822
823    #[test]
824    fn is_uri_rejects_paths() {
825        assert!(!is_uri("setup.md"));
826        assert!(!is_uri("./relative/path.md"));
827        assert!(!is_uri("../parent.md"));
828        assert!(!is_uri("#heading"));
829        assert!(!is_uri(""));
830        assert!(!is_uri("path/with:colon.md"));
831    }
832
833    #[test]
834    fn is_uri_rejects_bare_schemes() {
835        // WHATWG parses any `word:stuff` as a valid URL, so we require
836        // authority (://) or a known opaque scheme (mailto, tel, etc.)
837        assert!(!is_uri("name: foo bar bazz"));
838        assert!(!is_uri("status: draft"));
839        assert!(!is_uri("title: My Document"));
840        assert!(!is_uri("name:foo"));
841        assert!(!is_uri("x:y"));
842    }
843
844    #[test]
845    fn normalize_strips_fragment() {
846        let n = normalize_link_target("file.md#heading").unwrap();
847        assert_eq!(n.target, "file.md");
848        assert_eq!(n.fragment.as_deref(), Some("#heading"));
849    }
850
851    #[test]
852    fn normalize_strips_uri_fragment() {
853        let n = normalize_link_target("https://example.com/page#section").unwrap();
854        assert_eq!(n.target, "https://example.com/page");
855        assert_eq!(n.fragment.as_deref(), Some("#section"));
856    }
857
858    #[test]
859    fn normalize_drops_anchor_only() {
860        assert!(normalize_link_target("#heading").is_none());
861    }
862
863    #[test]
864    fn normalize_drops_empty() {
865        assert!(normalize_link_target("").is_none());
866        assert!(normalize_link_target("  ").is_none());
867    }
868
869    #[test]
870    fn normalize_preserves_mailto() {
871        let n = normalize_link_target("mailto:user@example.com").unwrap();
872        assert_eq!(n.target, "mailto:user@example.com");
873        assert!(n.fragment.is_none());
874    }
875
876    #[test]
877    fn filter_by_single_parser() {
878        let mut g = Graph::new();
879        g.add_node(test_helpers::make_node("a.md"));
880        g.add_node(test_helpers::make_node("b.md"));
881        g.add_node(test_helpers::make_node("c.md"));
882        g.add_edge(Edge {
883            source: "a.md".into(),
884            target: "b.md".into(),
885            link: None,
886            parser: "markdown".into(),
887        });
888        g.add_edge(Edge {
889            source: "a.md".into(),
890            target: "c.md".into(),
891            link: None,
892            parser: "frontmatter".into(),
893        });
894
895        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
896        assert_eq!(filtered.edges.len(), 1);
897        assert_eq!(filtered.edges[0].target, "c.md");
898        assert_eq!(filtered.edges[0].parser, "frontmatter");
899    }
900
901    #[test]
902    fn filter_preserves_all_nodes() {
903        let mut g = Graph::new();
904        g.add_node(test_helpers::make_node("a.md"));
905        g.add_node(test_helpers::make_node("b.md"));
906        g.add_edge(Edge {
907            source: "a.md".into(),
908            target: "b.md".into(),
909            link: None,
910            parser: "markdown".into(),
911        });
912
913        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
914        assert_eq!(filtered.nodes.len(), 2);
915        assert!(filtered.nodes.contains_key("a.md"));
916        assert!(filtered.nodes.contains_key("b.md"));
917        assert!(filtered.edges.is_empty());
918    }
919
920    #[test]
921    fn filter_rebuilds_adjacency_maps() {
922        let mut g = Graph::new();
923        g.add_node(test_helpers::make_node("a.md"));
924        g.add_node(test_helpers::make_node("b.md"));
925        g.add_node(test_helpers::make_node("c.md"));
926        g.add_edge(Edge {
927            source: "a.md".into(),
928            target: "b.md".into(),
929            link: None,
930            parser: "markdown".into(),
931        });
932        g.add_edge(Edge {
933            source: "a.md".into(),
934            target: "c.md".into(),
935            link: None,
936            parser: "frontmatter".into(),
937        });
938
939        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
940        assert_eq!(filtered.forward["a.md"], vec![0]);
941        assert_eq!(filtered.reverse["c.md"], vec![0]);
942        assert!(!filtered.reverse.contains_key("b.md"));
943    }
944
945    #[test]
946    fn filter_by_multiple_parsers() {
947        let mut g = Graph::new();
948        g.add_node(test_helpers::make_node("a.md"));
949        g.add_node(test_helpers::make_node("b.md"));
950        g.add_node(test_helpers::make_node("c.md"));
951        g.add_edge(Edge {
952            source: "a.md".into(),
953            target: "b.md".into(),
954            link: None,
955            parser: "markdown".into(),
956        });
957        g.add_edge(Edge {
958            source: "a.md".into(),
959            target: "c.md".into(),
960            link: None,
961            parser: "frontmatter".into(),
962        });
963
964        let filtered = g.filter_by_parsers(&["markdown".into(), "frontmatter".into()]);
965        assert_eq!(filtered.edges.len(), 2);
966    }
967
968    #[test]
969    fn filter_empty_parsers_removes_all_edges() {
970        let mut g = Graph::new();
971        g.add_node(test_helpers::make_node("a.md"));
972        g.add_node(test_helpers::make_node("b.md"));
973        g.add_edge(test_helpers::make_edge("a.md", "b.md"));
974
975        let filtered = g.filter_by_parsers(&[]);
976        assert!(filtered.edges.is_empty());
977        assert_eq!(filtered.nodes.len(), 2);
978    }
979}