Skip to main content

straymark_core/
graph.rs

1//! Typed, bidirectional, orphan-preserving knowledge graph over StrayMark
2//! documents.
3//!
4//! Generalizes the CLI's original `audit_engine::build_traceability` adjacency
5//! (forward-only, `related`-only, orphan-dropping) into the graph model of
6//! Loom Spec 001 §3: one node per document, typed edges derived from the
7//! frontmatter field that produced them, a `resolved` flag for dangling
8//! references, and bidirectional adjacency.
9//!
10//! Determinism: nodes keep the input document order; edges keep declaration
11//! order (per document, fields in a fixed order; within a field, list order).
12
13use std::collections::HashMap;
14use std::path::PathBuf;
15
16use serde::Serialize;
17
18use crate::document::StrayMarkDocument;
19
20/// The frontmatter field an edge was derived from (Spec 001 §3.2).
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
22#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
23pub enum EdgeType {
24    /// `related` — stored directed, semantically undirected.
25    RelatedTo,
26    /// `supersedes` — directed, semantic.
27    Supersedes,
28    /// `alternatives_documented` — directed.
29    DocumentsAlternative,
30    /// `api_changes` — node → external API string (usually unresolved).
31    ChangesApi,
32    /// `originating_ailogs` — document origin.
33    OriginatesFrom,
34}
35
36impl EdgeType {
37    /// Stable string form (matches the serialized representation).
38    pub fn as_str(&self) -> &'static str {
39        match self {
40            EdgeType::RelatedTo => "RELATED_TO",
41            EdgeType::Supersedes => "SUPERSEDES",
42            EdgeType::DocumentsAlternative => "DOCUMENTS_ALTERNATIVE",
43            EdgeType::ChangesApi => "CHANGES_API",
44            EdgeType::OriginatesFrom => "ORIGINATES_FROM",
45        }
46    }
47}
48
49/// One node per discovered document, carrying the metadata the visualization
50/// surfaces need (Spec 001 §3.1).
51#[derive(Debug, Clone, Serialize)]
52pub struct Node {
53    /// Frontmatter `id`, falling back to the filename stem.
54    pub id: String,
55    /// True when `id` came from frontmatter (not the filename fallback).
56    pub has_explicit_id: bool,
57    pub doc_type: String,
58    pub title: String,
59    pub status: String,
60    pub risk_level: String,
61    pub created: Option<String>,
62    pub agent: Option<String>,
63    pub tags: Vec<String>,
64    pub path: PathBuf,
65    /// Incoming edges (resolved references from other documents).
66    pub degree_in: usize,
67    /// Outgoing edges (including unresolved/dangling references).
68    pub degree_out: usize,
69}
70
71impl Node {
72    /// A document nothing links to and that links to nothing.
73    pub fn is_orphan(&self) -> bool {
74        self.degree_in + self.degree_out == 0
75    }
76}
77
78/// A directed, typed edge. `source`/`target` are node ids; an edge whose
79/// target id is not present in the corpus is kept with `resolved: false`
80/// (a dangling reference — a first-class signal, never silently dropped).
81#[derive(Debug, Clone, Serialize)]
82pub struct Edge {
83    pub source: String,
84    pub target: String,
85    pub edge_type: EdgeType,
86    pub resolved: bool,
87}
88
89/// The typed, bidirectional document multigraph.
90#[derive(Debug, Default)]
91pub struct Graph {
92    /// Nodes in input (document discovery) order.
93    pub nodes: Vec<Node>,
94    /// Edges in declaration order.
95    pub edges: Vec<Edge>,
96    node_index: HashMap<String, usize>,
97    out_edges: Vec<Vec<usize>>,
98    in_edges: Vec<Vec<usize>>,
99}
100
101/// (field accessor, edge type) pairs in fixed declaration order.
102fn edge_sources(
103    doc: &StrayMarkDocument,
104) -> impl Iterator<Item = (EdgeType, &Vec<String>)> {
105    let fm = &doc.frontmatter;
106    [
107        (EdgeType::RelatedTo, fm.related.as_ref()),
108        (EdgeType::Supersedes, fm.supersedes.as_ref()),
109        (EdgeType::DocumentsAlternative, fm.alternatives_documented.as_ref()),
110        (EdgeType::ChangesApi, fm.api_changes.as_ref()),
111        (EdgeType::OriginatesFrom, fm.originating_ailogs.as_ref()),
112    ]
113    .into_iter()
114    .filter_map(|(et, list)| list.map(|l| (et, l)))
115}
116
117impl Graph {
118    /// Build the graph from parsed documents. Never drops a node or an edge:
119    /// orphan documents become isolated nodes; references to ids absent from
120    /// the corpus become `resolved: false` edges.
121    pub fn build(docs: &[&StrayMarkDocument]) -> Graph {
122        let mut graph = Graph::default();
123
124        // Pass 1 — nodes, in input order.
125        for doc in docs {
126            let (id, has_explicit_id) = match &doc.frontmatter.id {
127                Some(id) => (id.clone(), true),
128                None => (
129                    doc.path
130                        .file_stem()
131                        .and_then(|s| s.to_str())
132                        .unwrap_or(&doc.filename)
133                        .to_string(),
134                    false,
135                ),
136            };
137            let fm = &doc.frontmatter;
138            let node = Node {
139                id: id.clone(),
140                has_explicit_id,
141                doc_type: doc.doc_type.prefix().to_string(),
142                title: fm.title.clone().unwrap_or_else(|| "Untitled".into()),
143                status: fm.status.clone().unwrap_or_else(|| "unknown".into()),
144                risk_level: fm.risk_level.clone().unwrap_or_else(|| "unset".into()),
145                created: fm.created.clone(),
146                agent: fm.agent.clone(),
147                tags: fm.tags.clone().unwrap_or_default(),
148                path: doc.path.clone(),
149                degree_in: 0,
150                degree_out: 0,
151            };
152            // First document wins on id collision (mirrors no strong guarantee
153            // in the legacy code; collisions are a corpus defect surfaced by
154            // `straymark validate`).
155            graph.node_index.entry(id).or_insert(graph.nodes.len());
156            graph.nodes.push(node);
157            graph.out_edges.push(Vec::new());
158            graph.in_edges.push(Vec::new());
159        }
160
161        // Pass 2 — typed edges, in declaration order.
162        for (source_idx, doc) in docs.iter().enumerate() {
163            let source_id = graph.nodes[source_idx].id.clone();
164            for (edge_type, targets) in edge_sources(doc) {
165                for target in targets {
166                    let target_idx = graph.node_index.get(target.as_str()).copied();
167                    let edge_idx = graph.edges.len();
168                    graph.edges.push(Edge {
169                        source: source_id.clone(),
170                        target: target.clone(),
171                        edge_type,
172                        resolved: target_idx.is_some(),
173                    });
174                    graph.out_edges[source_idx].push(edge_idx);
175                    graph.nodes[source_idx].degree_out += 1;
176                    if let Some(t) = target_idx {
177                        graph.in_edges[t].push(edge_idx);
178                        graph.nodes[t].degree_in += 1;
179                    }
180                }
181            }
182        }
183
184        graph
185    }
186
187    /// Node lookup by id.
188    pub fn node(&self, id: &str) -> Option<&Node> {
189        self.node_index.get(id).map(|&i| &self.nodes[i])
190    }
191
192    /// Outgoing edges of a node, in declaration order.
193    pub fn out_edges(&self, id: &str) -> impl Iterator<Item = &Edge> {
194        self.node_index
195            .get(id)
196            .map(|&i| self.out_edges[i].as_slice())
197            .unwrap_or(&[])
198            .iter()
199            .map(|&e| &self.edges[e])
200    }
201
202    /// Incoming (resolved) edges of a node.
203    pub fn in_edges(&self, id: &str) -> impl Iterator<Item = &Edge> {
204        self.node_index
205            .get(id)
206            .map(|&i| self.in_edges[i].as_slice())
207            .unwrap_or(&[])
208            .iter()
209            .map(|&e| &self.edges[e])
210    }
211
212    /// Documents with no links in either direction, in input order.
213    pub fn orphans(&self) -> impl Iterator<Item = &Node> {
214        self.nodes.iter().filter(|n| n.is_orphan())
215    }
216
217    /// Edges whose target id is absent from the corpus, in declaration order.
218    pub fn dangling_edges(&self) -> impl Iterator<Item = &Edge> {
219        self.edges.iter().filter(|e| !e.resolved)
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::document::{DocType, Frontmatter};
227    use std::path::PathBuf;
228
229    fn make_doc(filename: &str, doc_type: DocType, fm: Frontmatter) -> StrayMarkDocument {
230        StrayMarkDocument {
231            path: PathBuf::from(format!(".straymark/test/{}", filename)),
232            filename: filename.to_string(),
233            doc_type,
234            frontmatter: fm,
235            body: String::new(),
236        }
237    }
238
239    #[test]
240    fn test_empty_graph() {
241        let g = Graph::build(&[]);
242        assert!(g.nodes.is_empty());
243        assert!(g.edges.is_empty());
244    }
245
246    #[test]
247    fn test_typed_edges_and_bidirectional_adjacency() {
248        let req = make_doc(
249            "REQ-2026-03-01-001-login.md",
250            DocType::Req,
251            Frontmatter {
252                id: Some("REQ-2026-03-01-001".into()),
253                related: Some(vec!["ADR-2026-03-02-001".into()]),
254                ..Default::default()
255            },
256        );
257        let adr = make_doc(
258            "ADR-2026-03-02-001-jwt.md",
259            DocType::Adr,
260            Frontmatter {
261                id: Some("ADR-2026-03-02-001".into()),
262                supersedes: Some(vec!["ADR-2026-01-01-001".into()]),
263                api_changes: Some(vec!["POST /login".into()]),
264                ..Default::default()
265            },
266        );
267        let old_adr = make_doc(
268            "ADR-2026-01-01-001-old.md",
269            DocType::Adr,
270            Frontmatter {
271                id: Some("ADR-2026-01-01-001".into()),
272                ..Default::default()
273            },
274        );
275        let docs = [&req, &adr, &old_adr];
276        let g = Graph::build(&docs);
277
278        assert_eq!(g.nodes.len(), 3);
279        assert_eq!(g.edges.len(), 3);
280
281        let types: Vec<EdgeType> = g.edges.iter().map(|e| e.edge_type).collect();
282        assert_eq!(
283            types,
284            vec![EdgeType::RelatedTo, EdgeType::Supersedes, EdgeType::ChangesApi]
285        );
286
287        // Bidirectional: "what links TO the old ADR" answers in one lookup.
288        let incoming: Vec<&str> = g
289            .in_edges("ADR-2026-01-01-001")
290            .map(|e| e.source.as_str())
291            .collect();
292        assert_eq!(incoming, vec!["ADR-2026-03-02-001"]);
293
294        // api_changes points at an external string → unresolved, but kept.
295        let api_edge = g.edges.iter().find(|e| e.edge_type == EdgeType::ChangesApi).unwrap();
296        assert!(!api_edge.resolved);
297        assert_eq!(api_edge.target, "POST /login");
298    }
299
300    #[test]
301    fn test_orphans_preserved() {
302        let orphan = make_doc(
303            "TDE-2026-04-01-001-orphan.md",
304            DocType::Tde,
305            Frontmatter {
306                id: Some("TDE-2026-04-01-001".into()),
307                ..Default::default()
308            },
309        );
310        let docs = [&orphan];
311        let g = Graph::build(&docs);
312        assert_eq!(g.nodes.len(), 1);
313        assert_eq!(g.orphans().count(), 1);
314        assert!(g.node("TDE-2026-04-01-001").unwrap().is_orphan());
315    }
316
317    #[test]
318    fn test_dangling_reference_surfaced() {
319        let doc = make_doc(
320            "ADR-2026-03-02-001-jwt.md",
321            DocType::Adr,
322            Frontmatter {
323                id: Some("ADR-2026-03-02-001".into()),
324                related: Some(vec!["MISSING-2026-01-01-001".into()]),
325                ..Default::default()
326            },
327        );
328        let docs = [&doc];
329        let g = Graph::build(&docs);
330        assert_eq!(g.edges.len(), 1);
331        assert!(!g.edges[0].resolved);
332        assert_eq!(g.dangling_edges().count(), 1);
333        // Degree out counts the attempt; the node is not an orphan.
334        assert!(!g.node("ADR-2026-03-02-001").unwrap().is_orphan());
335    }
336
337    #[test]
338    fn test_filename_stem_fallback_id() {
339        let doc = make_doc(
340            "AILOG-2026-03-03-001-impl.md",
341            DocType::Ailog,
342            Frontmatter::default(),
343        );
344        let docs = [&doc];
345        let g = Graph::build(&docs);
346        let node = &g.nodes[0];
347        assert_eq!(node.id, "AILOG-2026-03-03-001-impl");
348        assert!(!node.has_explicit_id);
349    }
350
351    #[test]
352    fn test_originating_ailogs_edge() {
353        let ailog = make_doc(
354            "AILOG-2026-03-03-001-impl.md",
355            DocType::Ailog,
356            Frontmatter {
357                id: Some("AILOG-2026-03-03-001".into()),
358                ..Default::default()
359            },
360        );
361        let adr = make_doc(
362            "ADR-2026-03-05-001-followup.md",
363            DocType::Adr,
364            Frontmatter {
365                id: Some("ADR-2026-03-05-001".into()),
366                originating_ailogs: Some(vec!["AILOG-2026-03-03-001".into()]),
367                ..Default::default()
368            },
369        );
370        let docs = [&ailog, &adr];
371        let g = Graph::build(&docs);
372        let e = &g.edges[0];
373        assert_eq!(e.edge_type, EdgeType::OriginatesFrom);
374        assert!(e.resolved);
375        assert_eq!(e.source, "ADR-2026-03-05-001");
376        assert_eq!(e.target, "AILOG-2026-03-03-001");
377    }
378
379    #[test]
380    fn test_deterministic_order() {
381        let a = make_doc(
382            "REQ-2026-03-01-001-a.md",
383            DocType::Req,
384            Frontmatter {
385                id: Some("REQ-2026-03-01-001".into()),
386                related: Some(vec!["B-2".into(), "B-1".into()]),
387                ..Default::default()
388            },
389        );
390        let docs = [&a];
391        let g = Graph::build(&docs);
392        let targets: Vec<&str> = g.edges.iter().map(|e| e.target.as_str()).collect();
393        // Declaration order of the `related` list is preserved.
394        assert_eq!(targets, vec!["B-2", "B-1"]);
395    }
396}