Skip to main content

drft/
graph.rs

1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9/// Check if a target string is a URI (has a scheme per RFC 3986).
10/// A scheme is `[a-zA-Z][a-zA-Z0-9+.-]*:` — e.g., `http:`, `mailto:`, `ftp:`, `tel:`.
11pub fn is_uri(target: &str) -> bool {
12    let bytes = target.as_bytes();
13    if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
14        return false;
15    }
16    for &b in &bytes[1..] {
17        if b == b':' {
18            return true;
19        }
20        if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
21            return false;
22        }
23    }
24    false
25}
26
27#[derive(
28    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30#[serde(rename_all = "lowercase")]
31pub enum NodeType {
32    File,
33    Directory,
34    External,
35}
36
37#[derive(Debug, Clone, serde::Serialize)]
38pub struct Node {
39    pub path: String,
40    pub node_type: NodeType,
41    pub hash: Option<String>,
42    /// Which graph this node belongs to — mirrors filesystem directory entries:
43    /// `"."` = local, `".."` = parent (escape), `"child"` = child graph, `None` = not on filesystem.
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub graph: Option<String>,
46    /// True when this Directory node has a drft.toml (is a drft graph).
47    #[serde(skip_serializing_if = "std::ops::Not::not")]
48    pub is_graph: bool,
49    /// Structured metadata from parsers, keyed by parser name.
50    #[serde(skip_serializing_if = "HashMap::is_empty")]
51    pub metadata: HashMap<String, serde_json::Value>,
52    /// True when this node was matched by `include` during discovery.
53    /// False for nodes discovered via edge targets (outside include, child graph files, etc.).
54    #[serde(default)]
55    pub included: bool,
56}
57
58#[derive(Debug, Clone)]
59pub struct Edge {
60    pub source: String,
61    /// Node identity — always matches a key in `graph.nodes` (or is a dangling target).
62    /// Fragment-stripped: `bar.md`, not `bar.md#heading`.
63    pub target: String,
64    /// Original link when it differs from target (e.g., `bar.md#heading`).
65    /// Absent when the link resolved to exactly the node ID.
66    pub link: Option<String>,
67    /// Which parser discovered this edge (provenance).
68    pub parser: String,
69}
70
71/// Filesystem properties of an edge target, probed during graph building.
72/// Stored per-target on the Graph, not per-edge.
73#[derive(Debug, Clone, Default)]
74pub struct TargetProperties {
75    pub is_symlink: bool,
76    pub is_directory: bool,
77    pub symlink_target: Option<String>,
78}
79
80#[derive(Debug, Default)]
81pub struct Graph {
82    pub nodes: HashMap<String, Node>,
83    pub edges: Vec<Edge>,
84    pub forward: HashMap<String, Vec<usize>>,
85    pub reverse: HashMap<String, Vec<usize>>,
86    pub child_graphs: Vec<String>,
87    /// Resolved interface nodes from config (empty = open graph).
88    pub interface: Vec<String>,
89    /// Filesystem properties of edge targets, keyed by node identity (fragment-stripped).
90    pub target_properties: HashMap<String, TargetProperties>,
91}
92
93impl Graph {
94    pub fn new() -> Self {
95        Self::default()
96    }
97
98    pub fn add_node(&mut self, node: Node) {
99        self.nodes.insert(node.path.clone(), node);
100    }
101
102    /// Returns true for File nodes (excludes External and Directory).
103    pub fn is_file_node(&self, path: &str) -> bool {
104        self.nodes
105            .get(path)
106            .is_some_and(|n| n.node_type == NodeType::File)
107    }
108
109    /// Returns true when the node was matched by `include` during discovery.
110    pub fn is_included_node(&self, path: &str) -> bool {
111        self.nodes.get(path).is_some_and(|n| n.included)
112    }
113
114    /// Returns true when both endpoints are included nodes (the edge stays
115    /// within the declared scope of the graph).
116    pub fn is_internal_edge(&self, edge: &Edge) -> bool {
117        self.is_included_node(&edge.source) && self.is_included_node(&edge.target)
118    }
119
120    pub fn add_edge(&mut self, edge: Edge) {
121        let idx = self.edges.len();
122        self.forward
123            .entry(edge.source.clone())
124            .or_default()
125            .push(idx);
126        self.reverse
127            .entry(edge.target.clone())
128            .or_default()
129            .push(idx);
130        self.edges.push(edge);
131    }
132
133    /// Get filesystem properties for an edge target.
134    pub fn target_props(&self, target: &str) -> Option<&TargetProperties> {
135        self.target_properties.get(target)
136    }
137
138    /// Create a new graph containing only edges from the specified parsers.
139    /// All nodes are preserved. Adjacency maps are rebuilt for the filtered edge set.
140    pub fn filter_by_parsers(&self, parsers: &[String]) -> Graph {
141        let mut filtered = Graph {
142            nodes: self.nodes.clone(),
143            child_graphs: self.child_graphs.clone(),
144            interface: self.interface.clone(),
145            target_properties: self.target_properties.clone(),
146            ..Default::default()
147        };
148
149        for edge in &self.edges {
150            if parsers.iter().any(|p| p == &edge.parser) {
151                filtered.add_edge(edge.clone());
152            }
153        }
154
155        filtered
156    }
157}
158
159/// Hash file contents with BLAKE3, returning `b3:<hex>`.
160pub fn hash_bytes(content: &[u8]) -> String {
161    format!("b3:{}", blake3::hash(content).to_hex())
162}
163
164/// Load a child graph's config, and if it declares an `[interface]`,
165/// add each interface file as a File node with a coupling edge
166/// to the child's Directory node.
167fn promote_interface_files(
168    root: &Path,
169    child_name: &str,
170    graph: &mut Graph,
171    implicit_edges: &mut Vec<Edge>,
172) {
173    let child_dir = root.join(child_name);
174    let config = match Config::load(&child_dir) {
175        Ok(c) => c,
176        Err(_) => return,
177    };
178
179    let (interface_files, interface_ignore) = match &config.interface {
180        Some(iface) => (&iface.files, &iface.ignore),
181        None => return,
182    };
183
184    // Resolve interface globs to actual files, honoring child excludes and interface ignores
185    let mut exclude_patterns = config.exclude.clone();
186    exclude_patterns.extend(interface_ignore.iter().cloned());
187
188    let included = match discover(&child_dir, interface_files, &exclude_patterns) {
189        Ok(files) => files,
190        Err(_) => return,
191    };
192
193    for file in included {
194        let node_path = format!("{child_name}/{file}");
195        if graph.nodes.contains_key(&node_path) {
196            continue;
197        }
198        let file_path = child_dir.join(&file);
199        let hash = std::fs::read(&file_path).ok().map(|c| hash_bytes(&c));
200        graph.add_node(Node {
201            path: node_path.clone(),
202            node_type: NodeType::File,
203            hash,
204            graph: Some(child_name.into()),
205            is_graph: false,
206            metadata: HashMap::new(),
207            included: false,
208        });
209        implicit_edges.push(Edge {
210            source: node_path,
211            target: child_name.into(),
212            link: None,
213            parser: String::new(),
214        });
215    }
216}
217
218/// Returns true if `target_path` resolves to a location within `canonical_root`.
219/// Uses canonicalization to resolve symlinks and normalize paths.
220/// Returns false if the path doesn't exist or escapes the root.
221fn is_within_root(target_path: &Path, canonical_root: &Path) -> bool {
222    target_path
223        .canonicalize()
224        .is_ok_and(|canonical| canonical.starts_with(canonical_root))
225}
226
227/// Build a graph from files in `root`.
228///
229/// 1. Discover File nodes via `include`/`exclude` — hash raw bytes for all.
230/// 2. Read text content for parser input (graceful skip for binary files).
231/// 3. Run parsers to extract edges.
232/// 4. Edge targets outside `include` become File nodes (`included: false`).
233pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
234    let canonical_root = root.canonicalize()?;
235    let included_files = discover(root, &config.include, &config.exclude)?;
236    let child_graphs = find_child_graphs(root, &config.exclude)?;
237    let mut graph = Graph::new();
238    graph.child_graphs = child_graphs;
239    let mut pending_edges = Vec::new();
240
241    // 1. Create File nodes for everything in include — hash raw bytes.
242    //    Separately read text content for files parsers will need.
243    let mut file_text: HashMap<String, String> = HashMap::new(); // path → text content
244
245    for file in &included_files {
246        let file_path = root.join(file);
247
248        // Safety: don't read files that resolve outside the graph root (e.g. symlinks).
249        // Still create the node so it's visible, but warn.
250        if !is_within_root(&file_path, &canonical_root) {
251            eprintln!(
252                "warn: included file '{file}' resolves outside the graph root and was not read"
253            );
254            graph.add_node(Node {
255                path: file.clone(),
256                node_type: NodeType::File,
257                hash: None,
258                graph: Some(".".into()),
259                is_graph: false,
260                metadata: HashMap::new(),
261                included: true,
262            });
263            continue;
264        }
265
266        let raw = std::fs::read(&file_path)?;
267        let hash = hash_bytes(&raw);
268
269        graph.add_node(Node {
270            path: file.clone(),
271            node_type: NodeType::File,
272            hash: Some(hash),
273            graph: Some(".".into()),
274            is_graph: false,
275            metadata: HashMap::new(),
276            included: true,
277        });
278
279        // Try to read as text for parser input — binary files just won't have text
280        if let Ok(text) = String::from_utf8(raw) {
281            file_text.insert(file.clone(), text);
282        }
283    }
284
285    // 2. Build parser registry and determine which files each parser receives
286    let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref(), root);
287    let mut parser_files: Vec<Vec<String>> = vec![Vec::new(); parser_list.len()];
288
289    for file in &included_files {
290        for (i, parser) in parser_list.iter().enumerate() {
291            if parser.matches(file) {
292                parser_files[i].push(file.clone());
293            }
294        }
295    }
296
297    // 3. Run each parser in batch mode
298    for (i, parser) in parser_list.iter().enumerate() {
299        let files: Vec<(&str, &str)> = parser_files[i]
300            .iter()
301            .filter_map(|path| {
302                file_text
303                    .get(path)
304                    .map(|content| (path.as_str(), content.as_str()))
305            })
306            .collect();
307
308        if files.is_empty() {
309            continue;
310        }
311
312        let batch_results = parser.parse_batch(&files);
313
314        for (file, result) in batch_results {
315            // Attach metadata to node if parser returned it
316            if let Some(metadata) = result.metadata
317                && let Some(node) = graph.nodes.get_mut(&file)
318            {
319                node.metadata.insert(parser.name().to_string(), metadata);
320            }
321
322            for link in result.links {
323                let normalized = match normalize_link_target(&link) {
324                    Some(n) => n,
325                    None => continue, // filtered (empty, anchor-only)
326                };
327
328                let target = if is_uri(&normalized.target) {
329                    normalized.target
330                } else {
331                    resolve_link(&file, &normalized.target)
332                };
333                // link carries the full original when it has a fragment
334                let link = normalized.fragment.map(|frag| format!("{target}{frag}"));
335                pending_edges.push(Edge {
336                    source: file.clone(),
337                    target,
338                    link,
339                    parser: parser.name().to_string(),
340                });
341            }
342        }
343    }
344
345    // 4. Edge-driven node creation.
346    //    Child graph directories are NOT pre-created — they only get nodes when
347    //    an edge references them or a file inside them. This keeps the model
348    //    uniform: edges create nodes, discovery creates files.
349    let graph_prefixes: Vec<String> = graph.child_graphs.clone();
350
351    // 5. Classify edge targets not already in the graph.
352    //    edge.target is already the node identity (fragment-stripped).
353    //    graph field uses filesystem-relative convention:
354    //      "."           — belongs to current graph
355    //      ".."          — escaped to parent graph
356    //      "research"    — belongs to child graph "research"
357    //      None          — not on the filesystem (URI)
358    let mut implicit_edges = Vec::new();
359    for edge in &pending_edges {
360        if graph.nodes.contains_key(&edge.target) {
361            continue;
362        }
363
364        // URIs → External (not on filesystem)
365        if is_uri(&edge.target) {
366            graph.add_node(Node {
367                path: edge.target.clone(),
368                node_type: NodeType::External,
369                hash: None,
370                graph: None,
371                is_graph: false,
372                metadata: HashMap::new(),
373                included: false,
374            });
375            continue;
376        }
377
378        let target_path = root.join(&edge.target);
379
380        // Safety: targets that logically escape the graph root (../, absolute paths)
381        // get a node but no filesystem access. Prevents directory traversal.
382        // Non-existent targets within root fall through to normal classification
383        // (no node created = dangling-edge).
384        let escapes_root =
385            edge.target.starts_with("../") || edge.target == ".." || edge.target.starts_with('/');
386        if escapes_root {
387            let graph_field = if edge.target.starts_with("../") || edge.target == ".." {
388                Some("..".into())
389            } else {
390                None
391            };
392            graph.add_node(Node {
393                path: edge.target.clone(),
394                node_type: NodeType::File,
395                hash: None,
396                graph: graph_field,
397                is_graph: false,
398                metadata: HashMap::new(),
399                included: false,
400            });
401            continue;
402        }
403
404        // From here: target is logically within root.
405        // Symlink check: verify the resolved path stays within root before any filesystem access.
406        if target_path.exists() && !is_within_root(&target_path, &canonical_root) {
407            graph.add_node(Node {
408                path: edge.target.clone(),
409                node_type: NodeType::File,
410                hash: None,
411                graph: Some(".".into()),
412                is_graph: false,
413                metadata: HashMap::new(),
414                included: false,
415            });
416            continue;
417        }
418
419        // Determine which graph this target belongs to.
420        // (../ and absolute paths already handled by escapes_root above)
421        let graph_field = graph_prefixes
422            .iter()
423            .find(|s| edge.target.starts_with(&format!("{s}/")))
424            .cloned();
425
426        // Child graph target
427        if let Some(ref membership) = graph_field {
428            if target_path.is_file() {
429                let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
430                graph.add_node(Node {
431                    path: edge.target.clone(),
432                    node_type: NodeType::File,
433                    hash,
434                    graph: Some(membership.clone()),
435                    is_graph: false,
436                    metadata: HashMap::new(),
437                    included: false,
438                });
439                // Ensure Directory node exists + coupling edge
440                if !graph.nodes.contains_key(membership.as_str()) {
441                    let child_dir = root.join(membership);
442                    let config_hash = std::fs::read(child_dir.join("drft.toml"))
443                        .ok()
444                        .map(|c| hash_bytes(&c));
445                    graph.add_node(Node {
446                        path: membership.clone(),
447                        node_type: NodeType::Directory,
448                        hash: config_hash,
449                        graph: Some(".".into()),
450                        is_graph: true,
451                        metadata: HashMap::new(),
452                        included: false,
453                    });
454                    promote_interface_files(root, membership, &mut graph, &mut implicit_edges);
455                }
456                implicit_edges.push(Edge {
457                    source: edge.target.clone(),
458                    target: membership.clone(),
459                    link: None,
460                    parser: edge.parser.clone(),
461                });
462            } else if target_path.is_dir() {
463                let has_config = target_path.join("drft.toml").exists();
464                graph.add_node(Node {
465                    path: edge.target.clone(),
466                    node_type: NodeType::Directory,
467                    hash: None,
468                    graph: Some(membership.clone()),
469                    is_graph: has_config,
470                    metadata: HashMap::new(),
471                    included: false,
472                });
473            }
474            // If target doesn't exist: no node created. dangling-edge handles it.
475            continue;
476        }
477
478        // Local target (within current graph scope)
479
480        // Directory on disk → Directory node
481        if target_path.is_dir() {
482            let has_config = target_path.join("drft.toml").exists();
483            let hash = if has_config {
484                std::fs::read(target_path.join("drft.toml"))
485                    .ok()
486                    .map(|c| hash_bytes(&c))
487            } else {
488                None
489            };
490            graph.add_node(Node {
491                path: edge.target.clone(),
492                node_type: NodeType::Directory,
493                hash,
494                graph: Some(".".into()),
495                is_graph: has_config,
496                metadata: HashMap::new(),
497                included: false,
498            });
499            if has_config {
500                promote_interface_files(root, &edge.target, &mut graph, &mut implicit_edges);
501            }
502            continue;
503        }
504
505        // File exists on disk but not in include → File (local, not tracked)
506        if target_path.is_file() {
507            let hash = std::fs::read(&target_path).ok().map(|c| hash_bytes(&c));
508            graph.add_node(Node {
509                path: edge.target.clone(),
510                node_type: NodeType::File,
511                hash,
512                graph: Some(".".into()),
513                is_graph: false,
514                metadata: HashMap::new(),
515                included: false,
516            });
517        }
518        // If doesn't exist: no node created. dangling-edge rule handles this.
519    }
520
521    // Probe filesystem properties for non-URI edge targets (stored per-target, not per-edge).
522    // Only probe targets within the graph root — no filesystem access for escaped targets.
523    pending_edges.extend(implicit_edges);
524    for edge in &pending_edges {
525        if is_uri(&edge.target) || graph.target_properties.contains_key(&edge.target) {
526            continue;
527        }
528        let target_path = root.join(&edge.target);
529        if !is_within_root(&target_path, &canonical_root) {
530            continue;
531        }
532        let is_symlink = target_path.is_symlink();
533        let is_directory = target_path.is_dir();
534        let symlink_target = if is_symlink {
535            std::fs::read_link(&target_path)
536                .ok()
537                .map(|p| p.to_string_lossy().to_string())
538        } else {
539            None
540        };
541        graph.target_properties.insert(
542            edge.target.clone(),
543            TargetProperties {
544                is_symlink,
545                is_directory,
546                symlink_target,
547            },
548        );
549    }
550
551    // Add all edges (explicit + implicit) to the graph.
552    for edge in pending_edges {
553        graph.add_edge(edge);
554    }
555
556    // Resolve interface from config (files included, ignore excluded)
557    if let Some(ref iface) = config.interface {
558        let ignore_set = crate::config::compile_globs(&iface.ignore)?;
559
560        let mut resolved = Vec::new();
561        for pattern in &iface.files {
562            if let Ok(glob) = globset::Glob::new(pattern) {
563                let matcher = glob.compile_matcher();
564                for path in graph.nodes.keys() {
565                    if matcher.is_match(path) {
566                        resolved.push(path.clone());
567                    }
568                }
569            } else {
570                if graph.nodes.contains_key(pattern) {
571                    resolved.push(pattern.clone());
572                }
573            }
574        }
575
576        if let Some(ref ignore) = ignore_set {
577            resolved.retain(|p| !ignore.is_match(p));
578        }
579
580        resolved.sort();
581        resolved.dedup();
582        graph.interface = resolved;
583    }
584
585    Ok(graph)
586}
587
588/// Normalize a relative path by resolving `.` and `..` components using Path APIs.
589/// Does not touch the filesystem. Always returns forward-slash separated paths.
590/// Preserves leading `..` that escape above the root — these indicate graph escape.
591pub fn normalize_relative_path(path: &str) -> String {
592    let mut parts: Vec<String> = Vec::new();
593    for component in Path::new(path).components() {
594        match component {
595            std::path::Component::CurDir => {}
596            std::path::Component::ParentDir => {
597                // Only pop if there's a normal component to pop (not a leading ..)
598                if parts.last().is_some_and(|p| p != "..") {
599                    parts.pop();
600                } else {
601                    parts.push("..".to_string());
602                }
603            }
604            std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
605            _ => {}
606        }
607    }
608    parts.join("/")
609}
610
611/// Normalized link target: the node identity and optional fragment metadata.
612struct NormalizedTarget {
613    /// The target path or URI with fragment stripped (used for node identity).
614    target: String,
615    /// The fragment portion (e.g., `#heading`), if any. Preserved as edge metadata.
616    fragment: Option<String>,
617}
618
619/// Normalize a raw link target from a parser.
620/// Returns None for targets that should be filtered (empty, anchor-only with no file target).
621/// Strips fragments for node identity but preserves them as metadata.
622fn normalize_link_target(raw: &str) -> Option<NormalizedTarget> {
623    let target = raw.trim();
624    if target.is_empty() {
625        return None;
626    }
627
628    // Anchor-only links (#heading) have no file target — drop them
629    if target.starts_with('#') {
630        return None;
631    }
632
633    // Split target and fragment at the first #
634    let (base, fragment) = match target.find('#') {
635        Some(idx) => (&target[..idx], Some(target[idx..].to_string())),
636        None => (target, None),
637    };
638
639    // After stripping fragment, if nothing remains, drop
640    if base.is_empty() {
641        return None;
642    }
643
644    Some(NormalizedTarget {
645        target: base.to_string(),
646        fragment,
647    })
648}
649
650/// Resolve a link target relative to a source file, producing a path relative to the graph root.
651/// Uses Path::join for correct platform-aware path handling.
652pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
653    let source_path = Path::new(source_file);
654    let source_dir = source_path.parent().unwrap_or(Path::new(""));
655    let joined = source_dir.join(raw_target);
656    normalize_relative_path(&joined.to_string_lossy())
657}
658
659#[cfg(test)]
660pub mod test_helpers {
661    use super::*;
662
663    pub fn make_node(path: &str) -> Node {
664        Node {
665            path: path.into(),
666            node_type: NodeType::File,
667            hash: None,
668            graph: Some(".".into()),
669            is_graph: false,
670            metadata: HashMap::new(),
671            included: true,
672        }
673    }
674
675    pub fn make_edge(source: &str, target: &str) -> Edge {
676        Edge {
677            source: source.into(),
678            target: target.into(),
679            link: None,
680            parser: "markdown".into(),
681        }
682    }
683
684    pub fn make_enriched(graph: Graph) -> crate::analyses::EnrichedGraph {
685        crate::analyses::enrich_graph(
686            graph,
687            std::path::Path::new("."),
688            &crate::config::Config::defaults(),
689            None,
690        )
691    }
692
693    pub fn make_enriched_with_root(
694        graph: Graph,
695        root: &std::path::Path,
696    ) -> crate::analyses::EnrichedGraph {
697        crate::analyses::enrich_graph(graph, root, &crate::config::Config::defaults(), None)
698    }
699}
700
701#[cfg(test)]
702mod tests {
703    use super::*;
704
705    #[test]
706    fn normalize_simple() {
707        assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
708    }
709
710    #[test]
711    fn normalize_dot() {
712        assert_eq!(normalize_relative_path("./a/./b"), "a/b");
713    }
714
715    #[test]
716    fn normalize_dotdot() {
717        assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
718    }
719
720    #[test]
721    fn normalize_preserves_leading_dotdot() {
722        assert_eq!(normalize_relative_path("../a"), "../a");
723    }
724
725    #[test]
726    fn normalize_deep_escape() {
727        assert_eq!(normalize_relative_path("../../a"), "../../a");
728    }
729
730    #[test]
731    fn normalize_escape_after_descent() {
732        // guides/../../README.md -> ../README.md (one level above root)
733        assert_eq!(
734            normalize_relative_path("guides/../../README.md"),
735            "../README.md"
736        );
737    }
738
739    #[test]
740    fn resolve_same_dir() {
741        assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
742    }
743
744    #[test]
745    fn resolve_subdir() {
746        assert_eq!(
747            resolve_link("guides/intro.md", "setup.md"),
748            "guides/setup.md"
749        );
750    }
751
752    #[test]
753    fn resolve_parent() {
754        assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
755    }
756
757    #[test]
758    fn graph_adjacency() {
759        let mut g = Graph::new();
760        g.add_node(Node {
761            path: "a.md".into(),
762            node_type: NodeType::File,
763            hash: None,
764            graph: None,
765            is_graph: false,
766            metadata: HashMap::new(),
767            included: false,
768        });
769        g.add_node(Node {
770            path: "b.md".into(),
771            node_type: NodeType::File,
772            hash: None,
773            graph: None,
774            is_graph: false,
775            metadata: HashMap::new(),
776            included: false,
777        });
778        g.add_edge(Edge {
779            source: "a.md".into(),
780            target: "b.md".into(),
781            link: None,
782            parser: "markdown".into(),
783        });
784        assert_eq!(g.forward["a.md"], vec![0]);
785        assert_eq!(g.reverse["b.md"], vec![0]);
786        assert!(!g.forward.contains_key("b.md"));
787    }
788
789    #[test]
790    fn fragment_edge_resolves_to_node() {
791        let mut g = Graph::new();
792        g.add_node(test_helpers::make_node("a.md"));
793        g.add_node(test_helpers::make_node("b.md"));
794        g.add_edge(Edge {
795            source: "a.md".into(),
796            target: "b.md".into(),
797            link: Some("b.md#heading".into()),
798            parser: "markdown".into(),
799        });
800        // target is the node ID
801        assert_eq!(g.edges[0].target, "b.md");
802        // reference carries the full original
803        assert_eq!(g.edges[0].link.as_deref(), Some("b.md#heading"));
804        // reverse map works directly on target
805        assert_eq!(g.reverse["b.md"], vec![0]);
806    }
807
808    #[test]
809    fn is_uri_detects_schemes() {
810        assert!(is_uri("http://example.com"));
811        assert!(is_uri("https://example.com"));
812        assert!(is_uri("mailto:user@example.com"));
813        assert!(is_uri("ftp://files.example.com"));
814        assert!(is_uri("tel:+1234567890"));
815        assert!(is_uri("ssh://git@github.com"));
816        assert!(is_uri("custom+scheme://foo"));
817    }
818
819    #[test]
820    fn is_uri_rejects_paths() {
821        assert!(!is_uri("setup.md"));
822        assert!(!is_uri("./relative/path.md"));
823        assert!(!is_uri("../parent.md"));
824        assert!(!is_uri("#heading"));
825        assert!(!is_uri(""));
826        assert!(!is_uri("path/with:colon.md")); // colon after slash = not a scheme
827    }
828
829    #[test]
830    fn normalize_strips_fragment() {
831        let n = normalize_link_target("file.md#heading").unwrap();
832        assert_eq!(n.target, "file.md");
833        assert_eq!(n.fragment.as_deref(), Some("#heading"));
834    }
835
836    #[test]
837    fn normalize_strips_uri_fragment() {
838        let n = normalize_link_target("https://example.com/page#section").unwrap();
839        assert_eq!(n.target, "https://example.com/page");
840        assert_eq!(n.fragment.as_deref(), Some("#section"));
841    }
842
843    #[test]
844    fn normalize_drops_anchor_only() {
845        assert!(normalize_link_target("#heading").is_none());
846    }
847
848    #[test]
849    fn normalize_drops_empty() {
850        assert!(normalize_link_target("").is_none());
851        assert!(normalize_link_target("  ").is_none());
852    }
853
854    #[test]
855    fn normalize_preserves_mailto() {
856        let n = normalize_link_target("mailto:user@example.com").unwrap();
857        assert_eq!(n.target, "mailto:user@example.com");
858        assert!(n.fragment.is_none());
859    }
860
861    #[test]
862    fn filter_by_single_parser() {
863        let mut g = Graph::new();
864        g.add_node(test_helpers::make_node("a.md"));
865        g.add_node(test_helpers::make_node("b.md"));
866        g.add_node(test_helpers::make_node("c.md"));
867        g.add_edge(Edge {
868            source: "a.md".into(),
869            target: "b.md".into(),
870            link: None,
871            parser: "markdown".into(),
872        });
873        g.add_edge(Edge {
874            source: "a.md".into(),
875            target: "c.md".into(),
876            link: None,
877            parser: "frontmatter".into(),
878        });
879
880        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
881        assert_eq!(filtered.edges.len(), 1);
882        assert_eq!(filtered.edges[0].target, "c.md");
883        assert_eq!(filtered.edges[0].parser, "frontmatter");
884    }
885
886    #[test]
887    fn filter_preserves_all_nodes() {
888        let mut g = Graph::new();
889        g.add_node(test_helpers::make_node("a.md"));
890        g.add_node(test_helpers::make_node("b.md"));
891        g.add_edge(Edge {
892            source: "a.md".into(),
893            target: "b.md".into(),
894            link: None,
895            parser: "markdown".into(),
896        });
897
898        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
899        assert_eq!(filtered.nodes.len(), 2);
900        assert!(filtered.nodes.contains_key("a.md"));
901        assert!(filtered.nodes.contains_key("b.md"));
902        assert!(filtered.edges.is_empty());
903    }
904
905    #[test]
906    fn filter_rebuilds_adjacency_maps() {
907        let mut g = Graph::new();
908        g.add_node(test_helpers::make_node("a.md"));
909        g.add_node(test_helpers::make_node("b.md"));
910        g.add_node(test_helpers::make_node("c.md"));
911        g.add_edge(Edge {
912            source: "a.md".into(),
913            target: "b.md".into(),
914            link: None,
915            parser: "markdown".into(),
916        });
917        g.add_edge(Edge {
918            source: "a.md".into(),
919            target: "c.md".into(),
920            link: None,
921            parser: "frontmatter".into(),
922        });
923
924        let filtered = g.filter_by_parsers(&["frontmatter".into()]);
925        assert_eq!(filtered.forward["a.md"], vec![0]);
926        assert_eq!(filtered.reverse["c.md"], vec![0]);
927        assert!(!filtered.reverse.contains_key("b.md"));
928    }
929
930    #[test]
931    fn filter_by_multiple_parsers() {
932        let mut g = Graph::new();
933        g.add_node(test_helpers::make_node("a.md"));
934        g.add_node(test_helpers::make_node("b.md"));
935        g.add_node(test_helpers::make_node("c.md"));
936        g.add_edge(Edge {
937            source: "a.md".into(),
938            target: "b.md".into(),
939            link: None,
940            parser: "markdown".into(),
941        });
942        g.add_edge(Edge {
943            source: "a.md".into(),
944            target: "c.md".into(),
945            link: None,
946            parser: "frontmatter".into(),
947        });
948
949        let filtered = g.filter_by_parsers(&["markdown".into(), "frontmatter".into()]);
950        assert_eq!(filtered.edges.len(), 2);
951    }
952
953    #[test]
954    fn filter_empty_parsers_removes_all_edges() {
955        let mut g = Graph::new();
956        g.add_node(test_helpers::make_node("a.md"));
957        g.add_node(test_helpers::make_node("b.md"));
958        g.add_edge(test_helpers::make_edge("a.md", "b.md"));
959
960        let filtered = g.filter_by_parsers(&[]);
961        assert!(filtered.edges.is_empty());
962        assert_eq!(filtered.nodes.len(), 2);
963    }
964}