Skip to main content

drft/
graph.rs

1use anyhow::Result;
2use std::collections::HashMap;
3use std::path::Path;
4
5use crate::config::Config;
6use crate::discovery::{discover, find_child_graphs};
7use crate::parsers;
8
9#[derive(
10    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
11)]
12#[serde(rename_all = "lowercase")]
13pub enum NodeType {
14    Source,
15    Resource,
16    External,
17    Graph,
18}
19
20/// Namespaced edge type in the format `parser:type`.
21/// Validated on construction — accessors are infallible.
22#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct EdgeType {
24    parser: String,
25    link_type: String,
26}
27
28impl EdgeType {
29    pub fn new(parser: impl Into<String>, link_type: impl Into<String>) -> Self {
30        Self {
31            parser: parser.into(),
32            link_type: link_type.into(),
33        }
34    }
35
36    #[allow(dead_code)]
37    pub fn parser(&self) -> &str {
38        &self.parser
39    }
40
41    #[allow(dead_code)]
42    pub fn link_type(&self) -> &str {
43        &self.link_type
44    }
45}
46
47impl std::fmt::Display for EdgeType {
48    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
49        write!(f, "{}:{}", self.parser, self.link_type)
50    }
51}
52
53impl std::str::FromStr for EdgeType {
54    type Err = anyhow::Error;
55    fn from_str(s: &str) -> Result<Self> {
56        let (parser, link_type) = s
57            .split_once(':')
58            .ok_or_else(|| anyhow::anyhow!("edge type must be 'parser:type', got '{s}'"))?;
59        Ok(Self::new(parser, link_type))
60    }
61}
62
63impl serde::Serialize for EdgeType {
64    fn serialize<S: serde::Serializer>(
65        &self,
66        serializer: S,
67    ) -> std::result::Result<S::Ok, S::Error> {
68        serializer.serialize_str(&self.to_string())
69    }
70}
71
72impl<'de> serde::Deserialize<'de> for EdgeType {
73    fn deserialize<D: serde::Deserializer<'de>>(
74        deserializer: D,
75    ) -> std::result::Result<Self, D::Error> {
76        let s = String::deserialize(deserializer)?;
77        s.parse().map_err(serde::de::Error::custom)
78    }
79}
80
81#[derive(Debug, Clone, serde::Serialize)]
82pub struct Node {
83    pub path: String,
84    pub node_type: NodeType,
85    pub hash: Option<String>,
86    /// If set, this node lives in a child graph (value is the child graph's path).
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub graph: Option<String>,
89}
90
91#[derive(Debug, Clone)]
92pub struct Edge {
93    pub source: String,
94    pub target: String,
95    pub edge_type: EdgeType,
96    /// True if created by graph builder (e.g., child-graph coupling edges), not parsed from content.
97    #[allow(dead_code)]
98    pub synthetic: bool,
99}
100
101#[derive(Debug, Default)]
102pub struct Graph {
103    pub nodes: HashMap<String, Node>,
104    pub edges: Vec<Edge>,
105    pub forward: HashMap<String, Vec<usize>>,
106    pub reverse: HashMap<String, Vec<usize>>,
107    pub child_graphs: Vec<String>,
108    /// Resolved interface nodes from config (empty = open graph).
109    pub interface: Vec<String>,
110}
111
112impl Graph {
113    pub fn new() -> Self {
114        Self::default()
115    }
116
117    /// Child graphs (nodes with type Graph).
118    #[allow(dead_code)]
119    pub fn children(&self) -> impl Iterator<Item = &Node> {
120        self.nodes
121            .values()
122            .filter(|n| n.node_type == NodeType::Graph)
123    }
124
125    /// Whether a node is part of this graph's interface.
126    #[allow(dead_code)]
127    pub fn is_interfaced(&self, path: &str) -> bool {
128        self.interface.iter().any(|e| e == path)
129    }
130
131    pub fn add_node(&mut self, node: Node) {
132        self.nodes.insert(node.path.clone(), node);
133    }
134
135    /// Returns true for Source and Resource nodes (excludes External and Graph).
136    /// Used by structural analyses that operate only on file-backed nodes.
137    pub fn is_file_node(&self, path: &str) -> bool {
138        self.nodes
139            .get(path)
140            .is_some_and(|n| matches!(n.node_type, NodeType::Source | NodeType::Resource))
141    }
142
143    pub fn add_edge(&mut self, edge: Edge) {
144        let idx = self.edges.len();
145        self.forward
146            .entry(edge.source.clone())
147            .or_default()
148            .push(idx);
149        self.reverse
150            .entry(edge.target.clone())
151            .or_default()
152            .push(idx);
153        self.edges.push(edge);
154    }
155}
156
157/// Hash file contents with BLAKE3, returning `b3:<hex>`.
158pub fn hash_bytes(content: &[u8]) -> String {
159    format!("b3:{}", blake3::hash(content).to_hex())
160}
161
162/// Build a graph from files in `root`, using configured parsers to extract links.
163/// Computes BLAKE3 content hashes for all nodes.
164pub fn build_graph(root: &Path, config: &Config) -> Result<Graph> {
165    let all_files = discover(root, &config.ignore)?;
166    let child_graphs = find_child_graphs(root, &config.ignore)?;
167    let mut graph = Graph::new();
168    graph.child_graphs = child_graphs;
169    let mut pending_edges = Vec::new();
170
171    // Build parser registry from config
172    let parser_list = parsers::build_parsers(&config.parsers, config.config_dir.as_deref());
173
174    // For each file, find matching parsers, run them, collect links
175    for file in &all_files {
176        // Find all parsers that match this file
177        let matching: Vec<&dyn parsers::Parser> = parser_list
178            .iter()
179            .filter(|p| p.matches(file))
180            .map(|p| p.as_ref())
181            .collect();
182
183        if matching.is_empty() {
184            continue;
185        }
186
187        let file_path = root.join(file);
188        let content = std::fs::read_to_string(&file_path)?;
189        let hash = hash_bytes(content.as_bytes());
190
191        graph.add_node(Node {
192            path: file.clone(),
193            node_type: NodeType::Source,
194            hash: Some(hash),
195            graph: None,
196        });
197
198        // Run all matching parsers
199        for parser in &matching {
200            let links = parser.parse(file, &content);
201            for link in links {
202                let edge_type = EdgeType::new(parser.name(), &link.link_type);
203                if link.is_external {
204                    pending_edges.push(Edge {
205                        source: file.clone(),
206                        target: link.target,
207                        edge_type,
208                        synthetic: false,
209                    });
210                } else {
211                    let resolved = resolve_link(file, &link.target);
212                    pending_edges.push(Edge {
213                        source: file.clone(),
214                        target: resolved,
215                        edge_type,
216                        synthetic: false,
217                    });
218                }
219            }
220        }
221    }
222
223    // Create Graph nodes for child graphs — any directory with drft.toml or drft.lock.
224    // No hash: staleness within a child graph is the child's concern, not the parent's.
225    let graph_prefixes: Vec<String> = graph.child_graphs.clone();
226    for graph_dir in &graph_prefixes {
227        graph.add_node(Node {
228            path: graph_dir.clone(),
229            node_type: NodeType::Graph,
230            hash: None,
231            graph: None,
232        });
233    }
234
235    // Build ignore set for filtering asset nodes
236    let ignore_set = if config.ignore.is_empty() {
237        None
238    } else {
239        let mut builder = globset::GlobSetBuilder::new();
240        for pattern in &config.ignore {
241            if let Ok(glob) = globset::Glob::new(pattern) {
242                builder.add(glob);
243            }
244        }
245        builder.build().ok()
246    };
247
248    // Create External, child-graph projection, and Resource nodes for non-Source targets
249    let mut implicit_edges = Vec::new();
250    for edge in &pending_edges {
251        if graph.nodes.contains_key(&edge.target) {
252            continue;
253        }
254        if edge.target.starts_with("http://") || edge.target.starts_with("https://") {
255            graph.add_node(Node {
256                path: edge.target.clone(),
257                node_type: NodeType::External,
258                hash: None,
259                graph: None,
260            });
261            continue;
262        }
263
264        // Check if target is inside a child graph → Source/Resource with graph field
265        let in_child_graph = graph_prefixes
266            .iter()
267            .find(|s| edge.target.starts_with(s.as_str()));
268        if let Some(graph_prefix) = in_child_graph {
269            let target_path = root.join(&edge.target);
270            if target_path.is_file() {
271                let content = std::fs::read(&target_path)?;
272                let hash = hash_bytes(&content);
273                graph.add_node(Node {
274                    path: edge.target.clone(),
275                    node_type: NodeType::Resource,
276                    hash: Some(hash),
277                    graph: Some(graph_prefix.clone()),
278                });
279                // Synthetic coupling edge: child-graph node → Graph node
280                implicit_edges.push(Edge {
281                    source: edge.target.clone(),
282                    target: graph_prefix.clone(),
283                    edge_type: edge.edge_type.clone(),
284                    synthetic: true,
285                });
286            }
287            continue;
288        }
289
290        // Skip ignored files — they should not become Resource nodes
291        if let Some(ref set) = ignore_set
292            && set.is_match(&edge.target)
293        {
294            continue;
295        }
296
297        // Regular Resource node
298        let target_path = root.join(&edge.target);
299        if target_path.is_file() {
300            let content = std::fs::read(&target_path)?;
301            let hash = hash_bytes(&content);
302            graph.add_node(Node {
303                path: edge.target.clone(),
304                node_type: NodeType::Resource,
305                hash: Some(hash),
306                graph: None,
307            });
308        }
309    }
310
311    // Add all edges (explicit + implicit) to the graph
312    pending_edges.extend(implicit_edges);
313    for edge in pending_edges {
314        graph.add_edge(edge);
315    }
316
317    // Resolve interface from config
318    if let Some(ref iface) = config.interface {
319        let mut resolved = Vec::new();
320        for pattern in &iface.nodes {
321            if let Ok(glob) = globset::Glob::new(pattern) {
322                let matcher = glob.compile_matcher();
323                for path in graph.nodes.keys() {
324                    if matcher.is_match(path) {
325                        resolved.push(path.clone());
326                    }
327                }
328            } else {
329                // Treat as literal path
330                if graph.nodes.contains_key(pattern) {
331                    resolved.push(pattern.clone());
332                }
333            }
334        }
335        resolved.sort();
336        resolved.dedup();
337        graph.interface = resolved;
338    }
339
340    Ok(graph)
341}
342
343/// Normalize a relative path by resolving `.` and `..` components using Path APIs.
344/// Does not touch the filesystem. Always returns forward-slash separated paths.
345/// Preserves leading `..` that escape above the root — these indicate graph escape.
346pub fn normalize_relative_path(path: &str) -> String {
347    let mut parts: Vec<String> = Vec::new();
348    for component in Path::new(path).components() {
349        match component {
350            std::path::Component::CurDir => {}
351            std::path::Component::ParentDir => {
352                // Only pop if there's a normal component to pop (not a leading ..)
353                if parts.last().is_some_and(|p| p != "..") {
354                    parts.pop();
355                } else {
356                    parts.push("..".to_string());
357                }
358            }
359            std::path::Component::Normal(c) => parts.push(c.to_string_lossy().to_string()),
360            _ => {}
361        }
362    }
363    parts.join("/")
364}
365
366/// Resolve a link target relative to a source file, producing a path relative to the graph root.
367/// Uses Path::join for correct platform-aware path handling.
368pub fn resolve_link(source_file: &str, raw_target: &str) -> String {
369    let source_path = Path::new(source_file);
370    let source_dir = source_path.parent().unwrap_or(Path::new(""));
371    let joined = source_dir.join(raw_target);
372    normalize_relative_path(&joined.to_string_lossy())
373}
374
375#[cfg(test)]
376pub mod test_helpers {
377    use super::*;
378
379    pub fn make_node(path: &str) -> Node {
380        Node {
381            path: path.into(),
382            node_type: NodeType::Source,
383            hash: None,
384            graph: None,
385        }
386    }
387
388    pub fn make_edge(source: &str, target: &str) -> Edge {
389        Edge {
390            source: source.into(),
391            target: target.into(),
392            edge_type: EdgeType::new("markdown", "inline"),
393            synthetic: false,
394        }
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    fn normalize_simple() {
404        assert_eq!(normalize_relative_path("a/b/c"), "a/b/c");
405    }
406
407    #[test]
408    fn normalize_dot() {
409        assert_eq!(normalize_relative_path("./a/./b"), "a/b");
410    }
411
412    #[test]
413    fn normalize_dotdot() {
414        assert_eq!(normalize_relative_path("a/b/../c"), "a/c");
415    }
416
417    #[test]
418    fn normalize_preserves_leading_dotdot() {
419        assert_eq!(normalize_relative_path("../a"), "../a");
420    }
421
422    #[test]
423    fn normalize_deep_escape() {
424        assert_eq!(normalize_relative_path("../../a"), "../../a");
425    }
426
427    #[test]
428    fn normalize_escape_after_descent() {
429        // guides/../../README.md -> ../README.md (one level above root)
430        assert_eq!(
431            normalize_relative_path("guides/../../README.md"),
432            "../README.md"
433        );
434    }
435
436    #[test]
437    fn resolve_same_dir() {
438        assert_eq!(resolve_link("index.md", "setup.md"), "setup.md");
439    }
440
441    #[test]
442    fn resolve_subdir() {
443        assert_eq!(
444            resolve_link("guides/intro.md", "setup.md"),
445            "guides/setup.md"
446        );
447    }
448
449    #[test]
450    fn resolve_parent() {
451        assert_eq!(resolve_link("guides/intro.md", "../config.md"), "config.md");
452    }
453
454    #[test]
455    fn graph_adjacency() {
456        let mut g = Graph::new();
457        g.add_node(Node {
458            path: "a.md".into(),
459            node_type: NodeType::Source,
460            hash: None,
461            graph: None,
462        });
463        g.add_node(Node {
464            path: "b.md".into(),
465            node_type: NodeType::Source,
466            hash: None,
467            graph: None,
468        });
469        g.add_edge(Edge {
470            source: "a.md".into(),
471            target: "b.md".into(),
472            edge_type: EdgeType::new("markdown", "inline"),
473            synthetic: false,
474        });
475        assert_eq!(g.forward["a.md"], vec![0]);
476        assert_eq!(g.reverse["b.md"], vec![0]);
477        assert!(!g.forward.contains_key("b.md"));
478    }
479}