Skip to main content

fabryk_graph/
extractor.rs

1//! GraphExtractor trait for domain-specific graph extraction.
2//!
3//! This module defines the core abstraction that enables Fabryk to support
4//! multiple knowledge domains. Each domain implements `GraphExtractor` to
5//! define how its content is transformed into graph nodes and edges.
6//!
7//! # Design Philosophy
8//!
9//! The trait separates extraction (parsing) from conversion (mapping):
10//!
11//! - `extract_node()` / `extract_edges()`: Parse domain-specific data
12//! - `to_graph_node()` / `to_graph_edges()`: Convert to generic types
13//!
14//! This separation keeps `GraphBuilder` domain-agnostic while allowing
15//! full customization of content interpretation.
16
17use crate::{Edge, Node};
18use fabryk_core::Result;
19use std::path::Path;
20
21/// Trait for extracting graph data from domain-specific content.
22///
23/// Each knowledge domain (music theory, math, etc.) implements this trait
24/// to define how its markdown files with frontmatter are transformed into
25/// graph nodes and edges.
26///
27/// # Associated Types
28///
29/// - `NodeData`: Domain-specific node information (e.g., `ConceptCard`)
30/// - `EdgeData`: Domain-specific relationship information (e.g., `RelatedConcepts`)
31///
32/// # Lifecycle
33///
34/// For each content file, `GraphBuilder` calls:
35///
36/// 1. `extract_node()` - Parse frontmatter + content into `NodeData`
37/// 2. `extract_edges()` - Parse relationship data into `EdgeData`
38/// 3. `to_graph_node()` - Convert `NodeData` to generic `Node`
39/// 4. `to_graph_edges()` - Convert `EdgeData` to generic `Vec<Edge>`
40pub trait GraphExtractor: Send + Sync {
41    /// Domain-specific node data extracted from content.
42    type NodeData: Clone + Send + Sync;
43
44    /// Domain-specific edge/relationship data extracted from content.
45    type EdgeData: Clone + Send + Sync;
46
47    /// Extract node data from a content file.
48    ///
49    /// # Arguments
50    ///
51    /// * `base_path` - Root directory for content
52    /// * `file_path` - Full path to the file being processed
53    /// * `frontmatter` - Parsed YAML frontmatter as generic Value
54    /// * `content` - Markdown body (after frontmatter)
55    fn extract_node(
56        &self,
57        base_path: &Path,
58        file_path: &Path,
59        frontmatter: &yaml_serde::Value,
60        content: &str,
61    ) -> Result<Self::NodeData>;
62
63    /// Extract relationship/edge data from content.
64    ///
65    /// Returns `Ok(None)` if no relationships found (valid for leaf nodes).
66    fn extract_edges(
67        &self,
68        frontmatter: &yaml_serde::Value,
69        content: &str,
70    ) -> Result<Option<Self::EdgeData>>;
71
72    /// Convert domain node data to a generic graph Node.
73    fn to_graph_node(&self, node_data: &Self::NodeData) -> Node;
74
75    /// Convert domain edge data to generic graph Edges.
76    fn to_graph_edges(&self, from_id: &str, edge_data: &Self::EdgeData) -> Vec<Edge>;
77
78    /// Returns the content glob pattern for this domain.
79    ///
80    /// Used by `GraphBuilder` to discover content files.
81    /// Default: `"**/*.md"` (all markdown files recursively).
82    fn content_glob(&self) -> &str {
83        "**/*.md"
84    }
85
86    /// Returns the name of this extractor for logging/debugging.
87    fn name(&self) -> &str {
88        "unnamed"
89    }
90}
91
92// ============================================================================
93// Mock extractor for testing
94// ============================================================================
95
96/// A simple mock extractor for testing.
97///
98/// Extracts minimal data from content files with simple frontmatter.
99#[cfg(any(test, feature = "test-utils"))]
100pub mod mock {
101    use super::*;
102    use crate::Relationship;
103
104    /// Mock node data for testing.
105    #[derive(Clone, Debug)]
106    pub struct MockNodeData {
107        pub id: String,
108        pub title: String,
109        pub category: Option<String>,
110    }
111
112    /// Mock edge data for testing.
113    #[derive(Clone, Debug)]
114    pub struct MockEdgeData {
115        pub prerequisites: Vec<String>,
116        pub related: Vec<String>,
117    }
118
119    /// Mock extractor that expects simple frontmatter.
120    ///
121    /// Expected frontmatter format:
122    /// ```yaml
123    /// title: "Node Title"
124    /// category: "optional-category"
125    /// prerequisites:
126    ///   - prereq-id-1
127    /// related:
128    ///   - related-id-1
129    /// ```
130    #[derive(Clone, Debug, Default)]
131    pub struct MockExtractor;
132
133    impl GraphExtractor for MockExtractor {
134        type NodeData = MockNodeData;
135        type EdgeData = MockEdgeData;
136
137        fn extract_node(
138            &self,
139            _base_path: &Path,
140            file_path: &Path,
141            frontmatter: &yaml_serde::Value,
142            _content: &str,
143        ) -> Result<Self::NodeData> {
144            let id = fabryk_core::util::ids::id_from_path(file_path)
145                .ok_or_else(|| fabryk_core::Error::parse("no file stem"))?;
146
147            let title = frontmatter
148                .get("title")
149                .and_then(|v| v.as_str())
150                .unwrap_or(&id)
151                .to_string();
152
153            let category = frontmatter
154                .get("category")
155                .and_then(|v| v.as_str())
156                .map(String::from);
157
158            Ok(MockNodeData {
159                id,
160                title,
161                category,
162            })
163        }
164
165        fn extract_edges(
166            &self,
167            frontmatter: &yaml_serde::Value,
168            _content: &str,
169        ) -> Result<Option<Self::EdgeData>> {
170            let prerequisites: Vec<String> = frontmatter
171                .get("prerequisites")
172                .and_then(|v| v.as_sequence())
173                .map(|seq| {
174                    seq.iter()
175                        .filter_map(|v| v.as_str())
176                        .map(String::from)
177                        .collect()
178                })
179                .unwrap_or_default();
180
181            let related: Vec<String> = frontmatter
182                .get("related")
183                .and_then(|v| v.as_sequence())
184                .map(|seq| {
185                    seq.iter()
186                        .filter_map(|v| v.as_str())
187                        .map(String::from)
188                        .collect()
189                })
190                .unwrap_or_default();
191
192            if prerequisites.is_empty() && related.is_empty() {
193                Ok(None)
194            } else {
195                Ok(Some(MockEdgeData {
196                    prerequisites,
197                    related,
198                }))
199            }
200        }
201
202        fn to_graph_node(&self, node_data: &Self::NodeData) -> Node {
203            let mut node = Node::new(&node_data.id, &node_data.title);
204            if let Some(ref cat) = node_data.category {
205                node = node.with_category(cat);
206            }
207            node
208        }
209
210        fn to_graph_edges(&self, from_id: &str, edge_data: &Self::EdgeData) -> Vec<Edge> {
211            let mut edges = Vec::new();
212
213            for prereq in &edge_data.prerequisites {
214                edges.push(Edge::new(from_id, prereq, Relationship::Prerequisite));
215            }
216
217            for related in &edge_data.related {
218                edges.push(Edge::new(from_id, related, Relationship::RelatesTo));
219            }
220
221            edges
222        }
223
224        fn name(&self) -> &str {
225            "mock"
226        }
227    }
228}
229
230// ============================================================================
231// Tests
232// ============================================================================
233
234#[cfg(test)]
235mod tests {
236    use super::mock::*;
237    use super::*;
238    use crate::Relationship;
239    use std::path::PathBuf;
240
241    fn sample_frontmatter() -> yaml_serde::Value {
242        yaml_serde::from_str(
243            r#"
244title: "Test Concept"
245category: "test-category"
246prerequisites:
247  - prereq-a
248  - prereq-b
249related:
250  - related-x
251"#,
252        )
253        .unwrap()
254    }
255
256    #[test]
257    fn test_mock_extractor_extract_node() {
258        let extractor = MockExtractor;
259        let base_path = PathBuf::from("/data/concepts");
260        let file_path = PathBuf::from("/data/concepts/harmony/test-concept.md");
261        let frontmatter = sample_frontmatter();
262
263        let node_data = extractor
264            .extract_node(&base_path, &file_path, &frontmatter, "content")
265            .unwrap();
266
267        assert_eq!(node_data.id, "test-concept");
268        assert_eq!(node_data.title, "Test Concept");
269        assert_eq!(node_data.category, Some("test-category".to_string()));
270    }
271
272    #[test]
273    fn test_mock_extractor_extract_edges() {
274        let extractor = MockExtractor;
275        let frontmatter = sample_frontmatter();
276
277        let edge_data = extractor
278            .extract_edges(&frontmatter, "content")
279            .unwrap()
280            .unwrap();
281
282        assert_eq!(edge_data.prerequisites, vec!["prereq-a", "prereq-b"]);
283        assert_eq!(edge_data.related, vec!["related-x"]);
284    }
285
286    #[test]
287    fn test_mock_extractor_extract_edges_none() {
288        let extractor = MockExtractor;
289        let frontmatter = yaml_serde::from_str("title: Test").unwrap();
290
291        let edge_data = extractor.extract_edges(&frontmatter, "content").unwrap();
292        assert!(edge_data.is_none());
293    }
294
295    #[test]
296    fn test_mock_extractor_to_graph_node() {
297        let extractor = MockExtractor;
298        let node_data = MockNodeData {
299            id: "test-id".to_string(),
300            title: "Test Title".to_string(),
301            category: Some("test-cat".to_string()),
302        };
303
304        let node = extractor.to_graph_node(&node_data);
305
306        assert_eq!(node.id, "test-id");
307        assert_eq!(node.title, "Test Title");
308        assert_eq!(node.category, Some("test-cat".to_string()));
309    }
310
311    #[test]
312    fn test_mock_extractor_to_graph_node_no_category() {
313        let extractor = MockExtractor;
314        let node_data = MockNodeData {
315            id: "x".to_string(),
316            title: "X".to_string(),
317            category: None,
318        };
319
320        let node = extractor.to_graph_node(&node_data);
321        assert!(node.category.is_none());
322    }
323
324    #[test]
325    fn test_mock_extractor_to_graph_edges() {
326        let extractor = MockExtractor;
327        let edge_data = MockEdgeData {
328            prerequisites: vec!["a".to_string(), "b".to_string()],
329            related: vec!["x".to_string()],
330        };
331
332        let edges = extractor.to_graph_edges("from-node", &edge_data);
333
334        assert_eq!(edges.len(), 3);
335
336        assert!(
337            edges
338                .iter()
339                .any(|e| e.to == "a" && e.relationship == Relationship::Prerequisite)
340        );
341        assert!(
342            edges
343                .iter()
344                .any(|e| e.to == "b" && e.relationship == Relationship::Prerequisite)
345        );
346        assert!(
347            edges
348                .iter()
349                .any(|e| e.to == "x" && e.relationship == Relationship::RelatesTo)
350        );
351
352        // All edges should have from_id set
353        assert!(edges.iter().all(|e| e.from == "from-node"));
354    }
355
356    #[test]
357    fn test_mock_extractor_to_graph_edges_empty() {
358        let extractor = MockExtractor;
359        let edge_data = MockEdgeData {
360            prerequisites: vec![],
361            related: vec![],
362        };
363
364        let edges = extractor.to_graph_edges("from-node", &edge_data);
365        assert!(edges.is_empty());
366    }
367
368    #[test]
369    fn test_extractor_default_methods() {
370        let extractor = MockExtractor;
371        assert_eq!(extractor.content_glob(), "**/*.md");
372        assert_eq!(extractor.name(), "mock");
373    }
374}