Skip to main content

ought_spec/
graph.rs

1use std::collections::{HashMap, VecDeque};
2use std::path::{Component, Path, PathBuf};
3
4use crate::parser::{OughtMdParser, Parser};
5use crate::types::{ParseError, Spec};
6
7/// A directed graph of spec files, built from `requires:` references.
8///
9/// Resolves cross-file dependencies at construction time, rejects cycles,
10/// and provides topological ordering for downstream execution.
11#[derive(Debug)]
12pub struct SpecGraph {
13    specs: Vec<Spec>,
14    /// Directed edges `(dependent_idx, dependency_idx)` into `specs`. Resolved
15    /// once at construction and reused by `topological_order` and anything
16    /// else that needs to walk the `requires:` graph.
17    edges: Vec<(usize, usize)>,
18}
19
20impl SpecGraph {
21    /// Discover and parse every `.ought.md` file under the given roots using
22    /// the default [`OughtMdParser`], then build the graph.
23    pub fn from_roots(roots: &[PathBuf]) -> Result<Self, Vec<ParseError>> {
24        Self::from_roots_with(&OughtMdParser, roots)
25    }
26
27    /// Same as [`SpecGraph::from_roots`] but parses with a caller-supplied
28    /// [`Parser`]. Useful for tests and for plugging in alternative spec
29    /// formats.
30    pub fn from_roots_with(
31        parser: &dyn Parser,
32        roots: &[PathBuf],
33    ) -> Result<Self, Vec<ParseError>> {
34        let files = collect_files(roots);
35        let (specs, mut errors) = parse_all(parser, &files);
36
37        match Self::from_specs(specs) {
38            Ok(graph) => {
39                if errors.is_empty() {
40                    Ok(graph)
41                } else {
42                    Err(errors)
43                }
44            }
45            Err(graph_errors) => {
46                errors.extend(graph_errors);
47                Err(errors)
48            }
49        }
50    }
51
52    /// Build a graph from already-parsed specs. Validates that every
53    /// `requires:` entry resolves to a spec in the set and rejects cycles.
54    ///
55    /// Prefer this when callers have their own parsing pipeline (tests, the
56    /// viewer, any consumer that wants to inspect or transform specs before
57    /// graph construction).
58    pub fn from_specs(specs: Vec<Spec>) -> Result<Self, Vec<ParseError>> {
59        let (edges, mut errors) = resolve_references(&specs);
60        errors.extend(detect_cycles(&specs, &edges));
61
62        if errors.is_empty() {
63            Ok(Self { specs, edges })
64        } else {
65            Err(errors)
66        }
67    }
68
69    /// All parsed specs.
70    pub fn specs(&self) -> &[Spec] {
71        &self.specs
72    }
73
74    /// Specs in topological order (dependencies before dependents). Uses
75    /// Kahn's algorithm over the pre-resolved edges.
76    pub fn topological_order(&self) -> Vec<&Spec> {
77        let n = self.specs.len();
78        if n == 0 {
79            return Vec::new();
80        }
81
82        let mut in_degree = vec![0usize; n];
83        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); n];
84
85        for &(dependent, dependency) in &self.edges {
86            // Edge direction for ordering: dependency must come before
87            // dependent, so walk from dependency → dependent.
88            adjacency[dependency].push(dependent);
89            in_degree[dependent] += 1;
90        }
91
92        let mut queue: VecDeque<usize> = in_degree
93            .iter()
94            .enumerate()
95            .filter_map(|(i, &d)| if d == 0 { Some(i) } else { None })
96            .collect();
97
98        let mut order = Vec::with_capacity(n);
99        while let Some(node) = queue.pop_front() {
100            order.push(&self.specs[node]);
101            for &neighbor in &adjacency[node] {
102                in_degree[neighbor] -= 1;
103                if in_degree[neighbor] == 0 {
104                    queue.push_back(neighbor);
105                }
106            }
107        }
108
109        // If order.len() < n there's a cycle, but construction already
110        // rejects those — so in practice order always covers every spec.
111        order
112    }
113
114    /// Look up a spec by its source file path.
115    pub fn get_by_path(&self, path: &Path) -> Option<&Spec> {
116        self.specs.iter().find(|s| s.source_path == path)
117    }
118}
119
120/// Walk each root directory and gather every `*.ought.md` file, deduplicated
121/// and sorted for determinism.
122fn collect_files(roots: &[PathBuf]) -> Vec<PathBuf> {
123    let mut all_files = Vec::new();
124    for root in roots {
125        all_files.extend(collect_ought_files(root));
126    }
127    all_files.sort();
128    all_files.dedup();
129    all_files
130}
131
132/// Parse every file with the given parser, accumulating specs and any
133/// per-file parse errors separately.
134fn parse_all(parser: &dyn Parser, files: &[PathBuf]) -> (Vec<Spec>, Vec<ParseError>) {
135    let mut specs = Vec::new();
136    let mut errors = Vec::new();
137    for file in files {
138        match parser.parse_file(file) {
139            Ok(spec) => specs.push(spec),
140            Err(errs) => errors.extend(errs),
141        }
142    }
143    (specs, errors)
144}
145
146/// Recursively walk a directory and collect all files matching `*.ought.md`.
147fn collect_ought_files(dir: &Path) -> Vec<PathBuf> {
148    let mut results = Vec::new();
149    if let Ok(entries) = std::fs::read_dir(dir) {
150        for entry in entries.flatten() {
151            let path = entry.path();
152            if path.is_dir() {
153                results.extend(collect_ought_files(&path));
154            } else if let Some(name) = path.file_name().and_then(|n| n.to_str())
155                && name.ends_with(".ought.md")
156            {
157                results.push(path);
158            }
159        }
160    }
161    results
162}
163
164/// Collapse `.` and `..` components without touching the filesystem so a
165/// path like `ought/analysis/../engine/parser.ought.md` compares equal to the
166/// `ought/engine/parser.ought.md` produced by directory traversal.
167fn normalize_path(path: &Path) -> PathBuf {
168    let mut out = PathBuf::new();
169    for component in path.components() {
170        match component {
171            Component::CurDir => {}
172            Component::ParentDir => {
173                if !out.pop() {
174                    out.push("..");
175                }
176            }
177            other => out.push(other.as_os_str()),
178        }
179    }
180    out
181}
182
183/// Resolve every `requires:` reference in `specs` to a target index.
184///
185/// Returns `(edges, errors)` where each edge is `(dependent_idx, dependency_idx)`
186/// and errors flag references that didn't match any spec in the set.
187fn resolve_references(specs: &[Spec]) -> (Vec<(usize, usize)>, Vec<ParseError>) {
188    let path_to_idx: HashMap<&PathBuf, usize> = specs
189        .iter()
190        .enumerate()
191        .map(|(i, s)| (&s.source_path, i))
192        .collect();
193
194    let mut edges = Vec::new();
195    let mut errors = Vec::new();
196
197    for (i, spec) in specs.iter().enumerate() {
198        for req in &spec.metadata.requires {
199            // Resolve the requires path relative to the spec's directory, then
200            // fall back to a raw lookup for refs written as absolute or
201            // already-root-relative paths.
202            let base_dir = spec.source_path.parent().unwrap_or(Path::new(""));
203            let resolved = normalize_path(&base_dir.join(&req.path));
204
205            match path_to_idx
206                .get(&resolved)
207                .or_else(|| path_to_idx.get(&req.path))
208            {
209                Some(&j) => edges.push((i, j)),
210                None => errors.push(ParseError {
211                    file: spec.source_path.clone(),
212                    line: 0,
213                    message: format!(
214                        "unresolved cross-reference: '{}' (resolved to '{}')",
215                        req.path.display(),
216                        resolved.display()
217                    ),
218                }),
219            }
220        }
221    }
222
223    (edges, errors)
224}
225
226/// Detect cycles in the dependency graph formed by `edges`. Uses DFS colouring.
227fn detect_cycles(specs: &[Spec], edges: &[(usize, usize)]) -> Vec<ParseError> {
228    let n = specs.len();
229    let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); n];
230    for &(dependent, dependency) in edges {
231        // Walk from dependent → dependency so a cycle is detected the same
232        // way it was historically (starts at a node, follows its requires).
233        adjacency[dependent].push(dependency);
234    }
235
236    let mut errors = Vec::new();
237    let mut visited = vec![0u8; n]; // 0=unvisited, 1=in-progress, 2=done
238
239    fn dfs(
240        node: usize,
241        adjacency: &[Vec<usize>],
242        visited: &mut [u8],
243        path: &mut Vec<usize>,
244        specs: &[Spec],
245        errors: &mut Vec<ParseError>,
246    ) {
247        visited[node] = 1;
248        path.push(node);
249
250        for &neighbor in &adjacency[node] {
251            if visited[neighbor] == 1 {
252                // Found a cycle
253                let cycle_start = path.iter().position(|&n| n == neighbor).unwrap();
254                let cycle_names: Vec<String> = path[cycle_start..]
255                    .iter()
256                    .map(|&i| specs[i].source_path.display().to_string())
257                    .collect();
258                errors.push(ParseError {
259                    file: specs[node].source_path.clone(),
260                    line: 0,
261                    message: format!(
262                        "circular dependency detected: {}",
263                        cycle_names.join(" -> ")
264                    ),
265                });
266            } else if visited[neighbor] == 0 {
267                dfs(neighbor, adjacency, visited, path, specs, errors);
268            }
269        }
270
271        path.pop();
272        visited[node] = 2;
273    }
274
275    let mut path = Vec::new();
276    for i in 0..n {
277        if visited[i] == 0 {
278            dfs(i, &adjacency, &mut visited, &mut path, specs, &mut errors);
279        }
280    }
281
282    errors
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    use crate::types::{Metadata, SpecRef};
290
291    /// Build a minimal `Spec` at `path` that `requires:` each of `deps`.
292    /// Sections are empty — we only care about `metadata.requires` here.
293    fn spec(path: &str, deps: &[&str]) -> Spec {
294        let requires = deps
295            .iter()
296            .map(|d| SpecRef {
297                label: d.to_string(),
298                path: PathBuf::from(d),
299                anchor: None,
300            })
301            .collect();
302        Spec {
303            name: path.to_string(),
304            metadata: Metadata {
305                context: None,
306                sources: Vec::new(),
307                schemas: Vec::new(),
308                requires,
309            },
310            sections: Vec::new(),
311            source_path: PathBuf::from(path),
312        }
313    }
314
315    fn names(specs: &[&Spec]) -> Vec<String> {
316        specs.iter().map(|s| s.source_path.display().to_string()).collect()
317    }
318
319    #[test]
320    fn empty_graph_is_valid() {
321        let graph = SpecGraph::from_specs(vec![]).expect("empty graph must build");
322        assert!(graph.specs().is_empty());
323        assert!(graph.topological_order().is_empty());
324    }
325
326    #[test]
327    fn single_spec_no_requires_orders_to_itself() {
328        let graph = SpecGraph::from_specs(vec![spec("a.ought.md", &[])]).unwrap();
329        let order = graph.topological_order();
330        assert_eq!(names(&order), vec!["a.ought.md"]);
331    }
332
333    #[test]
334    fn linear_dependency_orders_dependency_before_dependent() {
335        // a requires b → b must come first
336        let graph = SpecGraph::from_specs(vec![
337            spec("a.ought.md", &["b.ought.md"]),
338            spec("b.ought.md", &[]),
339        ])
340        .unwrap();
341        let order = names(&graph.topological_order());
342        let pos_a = order.iter().position(|p| p == "a.ought.md").unwrap();
343        let pos_b = order.iter().position(|p| p == "b.ought.md").unwrap();
344        assert!(pos_b < pos_a, "b must precede a; got {order:?}");
345    }
346
347    #[test]
348    fn diamond_graph_orders_correctly() {
349        // d → {b, c}, b → a, c → a. Expect a first, d last.
350        let graph = SpecGraph::from_specs(vec![
351            spec("a.ought.md", &[]),
352            spec("b.ought.md", &["a.ought.md"]),
353            spec("c.ought.md", &["a.ought.md"]),
354            spec("d.ought.md", &["b.ought.md", "c.ought.md"]),
355        ])
356        .unwrap();
357        let order = names(&graph.topological_order());
358        let pos = |p: &str| order.iter().position(|x| x == p).unwrap();
359        assert!(pos("a.ought.md") < pos("b.ought.md"));
360        assert!(pos("a.ought.md") < pos("c.ought.md"));
361        assert!(pos("b.ought.md") < pos("d.ought.md"));
362        assert!(pos("c.ought.md") < pos("d.ought.md"));
363    }
364
365    #[test]
366    fn direct_cycle_is_rejected() {
367        // a → b, b → a
368        let errors = SpecGraph::from_specs(vec![
369            spec("a.ought.md", &["b.ought.md"]),
370            spec("b.ought.md", &["a.ought.md"]),
371        ])
372        .expect_err("cycle must be rejected");
373        assert!(
374            errors.iter().any(|e| e.message.contains("circular dependency")),
375            "expected circular-dependency error; got {errors:?}"
376        );
377    }
378
379    #[test]
380    fn self_loop_is_rejected() {
381        let errors = SpecGraph::from_specs(vec![spec("a.ought.md", &["a.ought.md"])])
382            .expect_err("self-loop must be rejected");
383        assert!(
384            errors.iter().any(|e| e.message.contains("circular dependency")),
385            "expected circular-dependency error; got {errors:?}"
386        );
387    }
388
389    #[test]
390    fn unresolved_reference_is_rejected() {
391        let errors = SpecGraph::from_specs(vec![spec("a.ought.md", &["missing.ought.md"])])
392            .expect_err("unresolved ref must be rejected");
393        assert!(
394            errors
395                .iter()
396                .any(|e| e.message.contains("unresolved") && e.message.contains("missing.ought.md")),
397            "expected unresolved-reference error; got {errors:?}"
398        );
399    }
400
401    #[test]
402    fn unrelated_specs_have_no_ordering_constraint() {
403        // Two disconnected specs — both appear in the order, either first.
404        let graph = SpecGraph::from_specs(vec![
405            spec("a.ought.md", &[]),
406            spec("b.ought.md", &[]),
407        ])
408        .unwrap();
409        let order = names(&graph.topological_order());
410        assert_eq!(order.len(), 2);
411        assert!(order.contains(&"a.ought.md".to_string()));
412        assert!(order.contains(&"b.ought.md".to_string()));
413    }
414}