fabryk_vector/
extractor.rs1use crate::types::VectorDocument;
16use fabryk_core::Result;
17use std::path::Path;
18
19pub trait VectorExtractor: Send + Sync {
35 fn extract_document(
53 &self,
54 base_path: &Path,
55 file_path: &Path,
56 frontmatter: &yaml_serde::Value,
57 content: &str,
58 ) -> Result<VectorDocument>;
59
60 fn content_glob(&self) -> &str {
65 "**/*.md"
66 }
67
68 fn name(&self) -> &str {
70 "unnamed"
71 }
72}
73
74#[derive(Clone, Debug, Default)]
82pub struct MockVectorExtractor;
83
84impl VectorExtractor for MockVectorExtractor {
85 fn extract_document(
86 &self,
87 _base_path: &Path,
88 file_path: &Path,
89 frontmatter: &yaml_serde::Value,
90 content: &str,
91 ) -> Result<VectorDocument> {
92 let id = fabryk_core::util::ids::id_from_path(file_path)
93 .ok_or_else(|| fabryk_core::Error::parse("no file stem"))?;
94
95 let title = frontmatter
96 .get("title")
97 .and_then(|v| v.as_str())
98 .unwrap_or(&id);
99
100 let category = frontmatter
101 .get("category")
102 .and_then(|v| v.as_str())
103 .map(String::from);
104
105 let text = format!("{} | {}", title, content.trim());
107
108 let mut doc = VectorDocument::new(id, text);
109 if let Some(cat) = category {
110 doc = doc.with_category(cat);
111 }
112
113 if let Some(tier) = frontmatter.get("tier").and_then(|v| v.as_str()) {
115 doc = doc.with_metadata("tier", tier);
116 }
117
118 Ok(doc)
119 }
120
121 fn name(&self) -> &str {
122 "mock"
123 }
124}
125
126#[cfg(test)]
131mod tests {
132 use super::*;
133 use std::path::PathBuf;
134
135 fn sample_frontmatter() -> yaml_serde::Value {
136 yaml_serde::from_str(
137 r#"
138title: "Test Concept"
139category: "test-category"
140tier: "beginner"
141"#,
142 )
143 .unwrap()
144 }
145
146 #[test]
147 fn test_mock_extractor_extract_document() {
148 let extractor = MockVectorExtractor;
149 let base_path = PathBuf::from("/data/concepts");
150 let file_path = PathBuf::from("/data/concepts/test-concept.md");
151 let frontmatter = sample_frontmatter();
152
153 let doc = extractor
154 .extract_document(&base_path, &file_path, &frontmatter, "Body content here.")
155 .unwrap();
156
157 assert_eq!(doc.id, "test-concept");
158 assert!(doc.text.contains("Test Concept"));
159 assert!(doc.text.contains("Body content here."));
160 assert_eq!(doc.category, Some("test-category".to_string()));
161 assert_eq!(doc.metadata.get("tier").unwrap(), "beginner");
162 }
163
164 #[test]
165 fn test_mock_extractor_minimal_frontmatter() {
166 let extractor = MockVectorExtractor;
167 let base_path = PathBuf::from("/data");
168 let file_path = PathBuf::from("/data/simple.md");
169 let frontmatter: yaml_serde::Value = yaml_serde::from_str("title: Simple").unwrap();
170
171 let doc = extractor
172 .extract_document(&base_path, &file_path, &frontmatter, "Content")
173 .unwrap();
174
175 assert_eq!(doc.id, "simple");
176 assert_eq!(doc.text, "Simple | Content");
177 assert!(doc.category.is_none());
178 assert!(doc.metadata.is_empty());
179 }
180
181 #[test]
182 fn test_mock_extractor_no_title() {
183 let extractor = MockVectorExtractor;
184 let base_path = PathBuf::from("/data");
185 let file_path = PathBuf::from("/data/no-title.md");
186 let frontmatter: yaml_serde::Value = yaml_serde::from_str("category: test").unwrap();
187
188 let doc = extractor
189 .extract_document(&base_path, &file_path, &frontmatter, "Content")
190 .unwrap();
191
192 assert!(doc.text.contains("no-title"));
194 }
195
196 #[test]
197 fn test_mock_extractor_defaults() {
198 let extractor = MockVectorExtractor;
199 assert_eq!(extractor.content_glob(), "**/*.md");
200 assert_eq!(extractor.name(), "mock");
201 }
202
203 #[test]
204 fn test_trait_object_safety() {
205 fn _assert_object_safe(_: &dyn VectorExtractor) {}
206 }
207}