Skip to main content

straymark_core/
graph.rs

1//! Typed, bidirectional, orphan-preserving knowledge graph over StrayMark
2//! documents.
3//!
4//! Generalizes the CLI's original `audit_engine::build_traceability` adjacency
5//! (forward-only, `related`-only, orphan-dropping) into the graph model of
6//! Loom Spec 001 §3: one node per document, typed edges derived from the
7//! frontmatter field that produced them, a `resolved` flag for dangling
8//! references, and bidirectional adjacency.
9//!
10//! Determinism: nodes keep the input document order; edges keep declaration
11//! order (per document, fields in a fixed order; within a field, list order).
12
13use std::collections::HashMap;
14use std::path::PathBuf;
15
16use serde::Serialize;
17
18use crate::document::StrayMarkDocument;
19
20/// The frontmatter field an edge was derived from (Spec 001 §3.2).
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
22#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
23pub enum EdgeType {
24    /// `related` — stored directed, semantically undirected.
25    RelatedTo,
26    /// `supersedes` — directed, semantic.
27    Supersedes,
28    /// `alternatives_documented` — directed.
29    DocumentsAlternative,
30    /// `api_changes` — node → external API string (usually unresolved).
31    ChangesApi,
32    /// `originating_ailogs` — document origin.
33    OriginatesFrom,
34}
35
36impl EdgeType {
37    /// Stable string form (matches the serialized representation).
38    pub fn as_str(&self) -> &'static str {
39        match self {
40            EdgeType::RelatedTo => "RELATED_TO",
41            EdgeType::Supersedes => "SUPERSEDES",
42            EdgeType::DocumentsAlternative => "DOCUMENTS_ALTERNATIVE",
43            EdgeType::ChangesApi => "CHANGES_API",
44            EdgeType::OriginatesFrom => "ORIGINATES_FROM",
45        }
46    }
47}
48
49/// One node per discovered document, carrying the metadata the visualization
50/// surfaces need (Spec 001 §3.1).
51#[derive(Debug, Clone, Serialize)]
52pub struct Node {
53    /// Frontmatter `id`, falling back to the filename stem.
54    pub id: String,
55    /// True when `id` came from frontmatter (not the filename fallback).
56    pub has_explicit_id: bool,
57    pub doc_type: String,
58    pub title: String,
59    pub status: String,
60    pub risk_level: String,
61    pub created: Option<String>,
62    pub agent: Option<String>,
63    pub tags: Vec<String>,
64    pub path: PathBuf,
65    /// Incoming edges (resolved references from other documents).
66    pub degree_in: usize,
67    /// Outgoing edges (including unresolved/dangling references).
68    pub degree_out: usize,
69}
70
71impl Node {
72    /// A document nothing links to and that links to nothing.
73    pub fn is_orphan(&self) -> bool {
74        self.degree_in + self.degree_out == 0
75    }
76}
77
78/// A directed, typed edge. `source`/`target` are node ids; an edge whose
79/// target id is not present in the corpus is kept with `resolved: false`
80/// (a dangling reference — a first-class signal, never silently dropped).
81#[derive(Debug, Clone, Serialize)]
82pub struct Edge {
83    pub source: String,
84    pub target: String,
85    pub edge_type: EdgeType,
86    pub resolved: bool,
87}
88
89/// The typed, bidirectional document multigraph.
90#[derive(Debug, Default)]
91pub struct Graph {
92    /// Nodes in input (document discovery) order.
93    pub nodes: Vec<Node>,
94    /// Edges in declaration order.
95    pub edges: Vec<Edge>,
96    node_index: HashMap<String, usize>,
97    out_edges: Vec<Vec<usize>>,
98    in_edges: Vec<Vec<usize>>,
99}
100
101/// (field accessor, edge type) pairs in fixed declaration order.
102fn edge_sources(
103    doc: &StrayMarkDocument,
104) -> impl Iterator<Item = (EdgeType, &Vec<String>)> {
105    let fm = &doc.frontmatter;
106    [
107        (EdgeType::RelatedTo, fm.related.as_ref()),
108        (EdgeType::Supersedes, fm.supersedes.as_ref()),
109        (EdgeType::DocumentsAlternative, fm.alternatives_documented.as_ref()),
110        (EdgeType::ChangesApi, fm.api_changes.as_ref()),
111        (EdgeType::OriginatesFrom, fm.originating_ailogs.as_ref()),
112    ]
113    .into_iter()
114    .filter_map(|(et, list)| list.map(|l| (et, l)))
115}
116
117impl Graph {
118    /// Build the graph from parsed documents. Never drops a node or an edge:
119    /// orphan documents become isolated nodes; references to ids absent from
120    /// the corpus become `resolved: false` edges.
121    pub fn build(docs: &[&StrayMarkDocument]) -> Graph {
122        let mut graph = Graph::default();
123
124        // Pass 1 — nodes, in input order.
125        for doc in docs {
126            let (id, has_explicit_id) = match &doc.frontmatter.id {
127                Some(id) => (id.clone(), true),
128                None => (
129                    doc.path
130                        .file_stem()
131                        .and_then(|s| s.to_str())
132                        .unwrap_or(&doc.filename)
133                        .to_string(),
134                    false,
135                ),
136            };
137            let fm = &doc.frontmatter;
138            let node = Node {
139                id: id.clone(),
140                has_explicit_id,
141                doc_type: doc.doc_type.prefix().to_string(),
142                title: fm.title.clone().unwrap_or_else(|| "Untitled".into()),
143                status: fm.status.clone().unwrap_or_else(|| "unknown".into()),
144                risk_level: fm.risk_level.clone().unwrap_or_else(|| "unset".into()),
145                created: fm.created.clone(),
146                agent: fm.agent.clone(),
147                tags: fm.tags.clone().unwrap_or_default(),
148                path: doc.path.clone(),
149                degree_in: 0,
150                degree_out: 0,
151            };
152            // First document wins on id collision (mirrors no strong guarantee
153            // in the legacy code; collisions are a corpus defect surfaced by
154            // `straymark validate`).
155            graph.node_index.entry(id).or_insert(graph.nodes.len());
156            graph.nodes.push(node);
157            graph.out_edges.push(Vec::new());
158            graph.in_edges.push(Vec::new());
159        }
160
161        // Pass 2 — typed edges, in declaration order.
162        for (source_idx, doc) in docs.iter().enumerate() {
163            let source_id = graph.nodes[source_idx].id.clone();
164            for (edge_type, targets) in edge_sources(doc) {
165                for target in targets {
166                    let target_idx = graph.node_index.get(target.as_str()).copied();
167                    let edge_idx = graph.edges.len();
168                    graph.edges.push(Edge {
169                        source: source_id.clone(),
170                        target: target.clone(),
171                        edge_type,
172                        resolved: target_idx.is_some(),
173                    });
174                    graph.out_edges[source_idx].push(edge_idx);
175                    graph.nodes[source_idx].degree_out += 1;
176                    if let Some(t) = target_idx {
177                        graph.in_edges[t].push(edge_idx);
178                        graph.nodes[t].degree_in += 1;
179                    }
180                }
181            }
182        }
183
184        graph
185    }
186
187    /// Node lookup by id.
188    pub fn node(&self, id: &str) -> Option<&Node> {
189        self.node_index.get(id).map(|&i| &self.nodes[i])
190    }
191
192    /// Outgoing edges of a node, in declaration order.
193    pub fn out_edges(&self, id: &str) -> impl Iterator<Item = &Edge> {
194        self.node_index
195            .get(id)
196            .map(|&i| self.out_edges[i].as_slice())
197            .unwrap_or(&[])
198            .iter()
199            .map(|&e| &self.edges[e])
200    }
201
202    /// Incoming (resolved) edges of a node.
203    pub fn in_edges(&self, id: &str) -> impl Iterator<Item = &Edge> {
204        self.node_index
205            .get(id)
206            .map(|&i| self.in_edges[i].as_slice())
207            .unwrap_or(&[])
208            .iter()
209            .map(|&e| &self.edges[e])
210    }
211
212    /// Documents with no links in either direction, in input order.
213    pub fn orphans(&self) -> impl Iterator<Item = &Node> {
214        self.nodes.iter().filter(|n| n.is_orphan())
215    }
216
217    /// Edges whose target id is absent from the corpus, in declaration order.
218    pub fn dangling_edges(&self) -> impl Iterator<Item = &Edge> {
219        self.edges.iter().filter(|e| !e.resolved)
220    }
221
222    /// The thread of a node (Spec 001 §3.3, story S2): *"everything it links
223    /// to and everything that links to it, transitively"* — its transitive
224    /// **descendants** (following out-edges) plus its transitive **ancestors**
225    /// (following in-edges), optionally bounded by `depth` per direction.
226    ///
227    /// Deliberately NOT the undirected connected component: that would also
228    /// pull in "siblings" (documents merely co-citing a shared neighbor), and
229    /// in tightly-linked corpora it degenerates into highlighting everything.
230    /// The thread is the node's line of reasoning, not its neighborhood.
231    ///
232    /// Returns the node ids and edge indices (into [`Graph::edges`]) to
233    /// highlight. The edge set is every edge whose endpoints both belong to
234    /// the thread, plus the dangling out-edges of thread members.
235    pub fn thread(&self, id: &str, depth: Option<usize>) -> Option<Thread> {
236        let &start = self.node_index.get(id)?;
237
238        let mut in_thread = vec![false; self.nodes.len()];
239        let mut node_ids = Vec::new();
240        in_thread[start] = true;
241        node_ids.push(self.nodes[start].id.clone());
242
243        // Two bounded BFS passes: forward over out-edges, backward over
244        // in-edges. `Forward` follows what the document links to; `Backward`
245        // follows what links to the document.
246        for forward in [true, false] {
247            let mut visited = vec![false; self.nodes.len()];
248            visited[start] = true;
249            let mut frontier = vec![start];
250            let mut level = 0usize;
251            while !frontier.is_empty() && depth.map_or(true, |d| level < d) {
252                let mut next = Vec::new();
253                for idx in frontier {
254                    let edges = if forward { &self.out_edges[idx] } else { &self.in_edges[idx] };
255                    for &e in edges {
256                        let edge = &self.edges[e];
257                        let neighbor = if forward { edge.target.as_str() } else { edge.source.as_str() };
258                        if let Some(&n) = self.node_index.get(neighbor) {
259                            if !visited[n] {
260                                visited[n] = true;
261                                if !in_thread[n] {
262                                    in_thread[n] = true;
263                                    node_ids.push(self.nodes[n].id.clone());
264                                }
265                                next.push(n);
266                            }
267                        }
268                    }
269                }
270                frontier = next;
271                level += 1;
272            }
273        }
274
275        // Highlight every edge internal to the thread (including ones not
276        // walked, e.g. an ancestor linking directly to a descendant), plus
277        // dangling out-edges of thread members.
278        let edge_ids: Vec<usize> = self
279            .edges
280            .iter()
281            .enumerate()
282            .filter(|(_, edge)| {
283                let src_in = self.node_index.get(edge.source.as_str()).is_some_and(|&i| in_thread[i]);
284                if !src_in {
285                    return false;
286                }
287                match self.node_index.get(edge.target.as_str()) {
288                    Some(&t) => in_thread[t],
289                    None => !edge.resolved, // dangling out-edge of a member
290                }
291            })
292            .map(|(i, _)| i)
293            .collect();
294
295        Some(Thread { node_ids, edge_ids })
296    }
297}
298
299/// The highlight set for a node's thread (Spec 001 §3.3): node ids plus edge
300/// indices into [`Graph::edges`].
301#[derive(Debug, Clone, Serialize)]
302pub struct Thread {
303    pub node_ids: Vec<String>,
304    pub edge_ids: Vec<usize>,
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use crate::document::{DocType, Frontmatter};
311    use std::path::PathBuf;
312
313    fn make_doc(filename: &str, doc_type: DocType, fm: Frontmatter) -> StrayMarkDocument {
314        StrayMarkDocument {
315            path: PathBuf::from(format!(".straymark/test/{}", filename)),
316            filename: filename.to_string(),
317            doc_type,
318            frontmatter: fm,
319            body: String::new(),
320        }
321    }
322
323    #[test]
324    fn test_empty_graph() {
325        let g = Graph::build(&[]);
326        assert!(g.nodes.is_empty());
327        assert!(g.edges.is_empty());
328    }
329
330    #[test]
331    fn test_typed_edges_and_bidirectional_adjacency() {
332        let req = make_doc(
333            "REQ-2026-03-01-001-login.md",
334            DocType::Req,
335            Frontmatter {
336                id: Some("REQ-2026-03-01-001".into()),
337                related: Some(vec!["ADR-2026-03-02-001".into()]),
338                ..Default::default()
339            },
340        );
341        let adr = make_doc(
342            "ADR-2026-03-02-001-jwt.md",
343            DocType::Adr,
344            Frontmatter {
345                id: Some("ADR-2026-03-02-001".into()),
346                supersedes: Some(vec!["ADR-2026-01-01-001".into()]),
347                api_changes: Some(vec!["POST /login".into()]),
348                ..Default::default()
349            },
350        );
351        let old_adr = make_doc(
352            "ADR-2026-01-01-001-old.md",
353            DocType::Adr,
354            Frontmatter {
355                id: Some("ADR-2026-01-01-001".into()),
356                ..Default::default()
357            },
358        );
359        let docs = [&req, &adr, &old_adr];
360        let g = Graph::build(&docs);
361
362        assert_eq!(g.nodes.len(), 3);
363        assert_eq!(g.edges.len(), 3);
364
365        let types: Vec<EdgeType> = g.edges.iter().map(|e| e.edge_type).collect();
366        assert_eq!(
367            types,
368            vec![EdgeType::RelatedTo, EdgeType::Supersedes, EdgeType::ChangesApi]
369        );
370
371        // Bidirectional: "what links TO the old ADR" answers in one lookup.
372        let incoming: Vec<&str> = g
373            .in_edges("ADR-2026-01-01-001")
374            .map(|e| e.source.as_str())
375            .collect();
376        assert_eq!(incoming, vec!["ADR-2026-03-02-001"]);
377
378        // api_changes points at an external string → unresolved, but kept.
379        let api_edge = g.edges.iter().find(|e| e.edge_type == EdgeType::ChangesApi).unwrap();
380        assert!(!api_edge.resolved);
381        assert_eq!(api_edge.target, "POST /login");
382    }
383
384    #[test]
385    fn test_orphans_preserved() {
386        let orphan = make_doc(
387            "TDE-2026-04-01-001-orphan.md",
388            DocType::Tde,
389            Frontmatter {
390                id: Some("TDE-2026-04-01-001".into()),
391                ..Default::default()
392            },
393        );
394        let docs = [&orphan];
395        let g = Graph::build(&docs);
396        assert_eq!(g.nodes.len(), 1);
397        assert_eq!(g.orphans().count(), 1);
398        assert!(g.node("TDE-2026-04-01-001").unwrap().is_orphan());
399    }
400
401    #[test]
402    fn test_dangling_reference_surfaced() {
403        let doc = make_doc(
404            "ADR-2026-03-02-001-jwt.md",
405            DocType::Adr,
406            Frontmatter {
407                id: Some("ADR-2026-03-02-001".into()),
408                related: Some(vec!["MISSING-2026-01-01-001".into()]),
409                ..Default::default()
410            },
411        );
412        let docs = [&doc];
413        let g = Graph::build(&docs);
414        assert_eq!(g.edges.len(), 1);
415        assert!(!g.edges[0].resolved);
416        assert_eq!(g.dangling_edges().count(), 1);
417        // Degree out counts the attempt; the node is not an orphan.
418        assert!(!g.node("ADR-2026-03-02-001").unwrap().is_orphan());
419    }
420
421    #[test]
422    fn test_filename_stem_fallback_id() {
423        let doc = make_doc(
424            "AILOG-2026-03-03-001-impl.md",
425            DocType::Ailog,
426            Frontmatter::default(),
427        );
428        let docs = [&doc];
429        let g = Graph::build(&docs);
430        let node = &g.nodes[0];
431        assert_eq!(node.id, "AILOG-2026-03-03-001-impl");
432        assert!(!node.has_explicit_id);
433    }
434
435    #[test]
436    fn test_originating_ailogs_edge() {
437        let ailog = make_doc(
438            "AILOG-2026-03-03-001-impl.md",
439            DocType::Ailog,
440            Frontmatter {
441                id: Some("AILOG-2026-03-03-001".into()),
442                ..Default::default()
443            },
444        );
445        let adr = make_doc(
446            "ADR-2026-03-05-001-followup.md",
447            DocType::Adr,
448            Frontmatter {
449                id: Some("ADR-2026-03-05-001".into()),
450                originating_ailogs: Some(vec!["AILOG-2026-03-03-001".into()]),
451                ..Default::default()
452            },
453        );
454        let docs = [&ailog, &adr];
455        let g = Graph::build(&docs);
456        let e = &g.edges[0];
457        assert_eq!(e.edge_type, EdgeType::OriginatesFrom);
458        assert!(e.resolved);
459        assert_eq!(e.source, "ADR-2026-03-05-001");
460        assert_eq!(e.target, "AILOG-2026-03-03-001");
461    }
462
463    #[test]
464    fn test_thread_full_component_and_depth() {
465        // REQ → ADR → AILOG chain; TDE isolated.
466        let req = make_doc(
467            "REQ-1-a.md",
468            DocType::Req,
469            Frontmatter {
470                id: Some("REQ-1".into()),
471                related: Some(vec!["ADR-1".into()]),
472                ..Default::default()
473            },
474        );
475        let adr = make_doc(
476            "ADR-1-b.md",
477            DocType::Adr,
478            Frontmatter {
479                id: Some("ADR-1".into()),
480                related: Some(vec!["AILOG-1".into()]),
481                ..Default::default()
482            },
483        );
484        let ailog = make_doc(
485            "AILOG-1-c.md",
486            DocType::Ailog,
487            Frontmatter {
488                id: Some("AILOG-1".into()),
489                ..Default::default()
490            },
491        );
492        let tde = make_doc(
493            "TDE-1-d.md",
494            DocType::Tde,
495            Frontmatter {
496                id: Some("TDE-1".into()),
497                ..Default::default()
498            },
499        );
500        let docs = [&req, &adr, &ailog, &tde];
501        let g = Graph::build(&docs);
502
503        // Full component from the middle node reaches both ends, not the orphan.
504        let t = g.thread("ADR-1", None).unwrap();
505        let mut nodes = t.node_ids.clone();
506        nodes.sort();
507        assert_eq!(nodes, vec!["ADR-1", "AILOG-1", "REQ-1"]);
508        assert_eq!(t.edge_ids.len(), 2);
509
510        // Depth 1 from REQ: only the direct neighbor.
511        let t1 = g.thread("REQ-1", Some(1)).unwrap();
512        let mut nodes1 = t1.node_ids.clone();
513        nodes1.sort();
514        assert_eq!(nodes1, vec!["ADR-1", "REQ-1"]);
515
516        // Unknown id → None; isolated node → itself only.
517        assert!(g.thread("NOPE", None).is_none());
518        let t_orphan = g.thread("TDE-1", None).unwrap();
519        assert_eq!(t_orphan.node_ids, vec!["TDE-1"]);
520        assert!(t_orphan.edge_ids.is_empty());
521    }
522
523    #[test]
524    fn test_thread_excludes_siblings() {
525        // A → C ← B: B is a "sibling" of A (co-cites C) — not part of A's
526        // line of reasoning, so it must NOT light up (S2 semantics).
527        let a = make_doc(
528            "REQ-A-a.md",
529            DocType::Req,
530            Frontmatter {
531                id: Some("REQ-A".into()),
532                related: Some(vec!["ADR-C".into()]),
533                ..Default::default()
534            },
535        );
536        let b = make_doc(
537            "REQ-B-b.md",
538            DocType::Req,
539            Frontmatter {
540                id: Some("REQ-B".into()),
541                related: Some(vec!["ADR-C".into()]),
542                ..Default::default()
543            },
544        );
545        let c = make_doc(
546            "ADR-C-c.md",
547            DocType::Adr,
548            Frontmatter {
549                id: Some("ADR-C".into()),
550                ..Default::default()
551            },
552        );
553        let docs = [&a, &b, &c];
554        let g = Graph::build(&docs);
555
556        let t = g.thread("REQ-A", None).unwrap();
557        let mut nodes = t.node_ids.clone();
558        nodes.sort();
559        assert_eq!(nodes, vec!["ADR-C", "REQ-A"]); // B excluded
560        assert_eq!(t.edge_ids, vec![0]); // only A→C, not B→C
561
562        // From C, both citers are ancestors — they DO light up.
563        let tc = g.thread("ADR-C", None).unwrap();
564        let mut nodes_c = tc.node_ids.clone();
565        nodes_c.sort();
566        assert_eq!(nodes_c, vec!["ADR-C", "REQ-A", "REQ-B"]);
567    }
568
569    #[test]
570    fn test_thread_includes_dangling_edges() {
571        let doc = make_doc(
572            "ADR-1-a.md",
573            DocType::Adr,
574            Frontmatter {
575                id: Some("ADR-1".into()),
576                related: Some(vec!["MISSING-1".into()]),
577                ..Default::default()
578            },
579        );
580        let docs = [&doc];
581        let g = Graph::build(&docs);
582        let t = g.thread("ADR-1", None).unwrap();
583        assert_eq!(t.node_ids, vec!["ADR-1"]);
584        assert_eq!(t.edge_ids, vec![0]); // the dangling edge belongs to the thread
585    }
586
587    #[test]
588    fn test_deterministic_order() {
589        let a = make_doc(
590            "REQ-2026-03-01-001-a.md",
591            DocType::Req,
592            Frontmatter {
593                id: Some("REQ-2026-03-01-001".into()),
594                related: Some(vec!["B-2".into(), "B-1".into()]),
595                ..Default::default()
596            },
597        );
598        let docs = [&a];
599        let g = Graph::build(&docs);
600        let targets: Vec<&str> = g.edges.iter().map(|e| e.target.as_str()).collect();
601        // Declaration order of the `related` list is preserved.
602        assert_eq!(targets, vec!["B-2", "B-1"]);
603    }
604}