data_modelling_core/import/knowledge.rs
1//! Knowledge Base importer
2//!
3//! Parses Knowledge Base article YAML files (.kb.yaml) and converts them to KnowledgeArticle models.
4//! Also handles the knowledge index file (knowledge.yaml).
5
6use super::ImportError;
7use crate::models::knowledge::{KnowledgeArticle, KnowledgeIndex};
8
9#[cfg(feature = "schema-validation")]
10use crate::validation::schema::validate_knowledge_internal;
11
12/// Knowledge importer for parsing Knowledge Base article YAML files
13pub struct KnowledgeImporter;
14
15impl KnowledgeImporter {
16 /// Create a new Knowledge importer instance
17 pub fn new() -> Self {
18 Self
19 }
20
21 /// Import a knowledge article from YAML content
22 ///
23 /// Optionally validates against the JSON schema if the `schema-validation` feature is enabled.
24 ///
25 /// # Arguments
26 ///
27 /// * `yaml_content` - Knowledge article YAML content as a string
28 ///
29 /// # Returns
30 ///
31 /// A `KnowledgeArticle` parsed from the YAML content
32 ///
33 /// # Example
34 ///
35 /// ```rust
36 /// use data_modelling_core::import::knowledge::KnowledgeImporter;
37 ///
38 /// let importer = KnowledgeImporter::new();
39 /// let yaml = r#"
40 /// id: 660e8400-e29b-41d4-a716-446655440000
41 /// number: 1
42 /// title: "Data Classification Guide"
43 /// articleType: guide
44 /// status: published
45 /// summary: "This guide explains data classification."
46 /// content: "Data classification is essential for governance."
47 /// authors:
48 /// - "data-governance@example.com"
49 /// createdAt: "2024-01-15T10:00:00Z"
50 /// updatedAt: "2024-01-15T10:00:00Z"
51 /// "#;
52 /// let article = importer.import(yaml).unwrap();
53 /// assert_eq!(article.title, "Data Classification Guide");
54 /// ```
55 pub fn import(&self, yaml_content: &str) -> Result<KnowledgeArticle, ImportError> {
56 // Validate against JSON Schema if feature is enabled
57 #[cfg(feature = "schema-validation")]
58 {
59 validate_knowledge_internal(yaml_content).map_err(ImportError::ValidationError)?;
60 }
61
62 // Parse the YAML content
63 KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
64 ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
65 })
66 }
67
68 /// Import a knowledge article without schema validation
69 ///
70 /// Use this when you want to skip schema validation for performance
71 /// or when importing from a trusted source.
72 ///
73 /// # Arguments
74 ///
75 /// * `yaml_content` - Knowledge article YAML content as a string
76 ///
77 /// # Returns
78 ///
79 /// A `KnowledgeArticle` parsed from the YAML content
80 pub fn import_without_validation(
81 &self,
82 yaml_content: &str,
83 ) -> Result<KnowledgeArticle, ImportError> {
84 KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
85 ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
86 })
87 }
88
89 /// Import a knowledge index from YAML content
90 ///
91 /// # Arguments
92 ///
93 /// * `yaml_content` - Knowledge index YAML content (knowledge.yaml)
94 ///
95 /// # Returns
96 ///
97 /// A `KnowledgeIndex` parsed from the YAML content
98 ///
99 /// # Example
100 ///
101 /// ```rust
102 /// use data_modelling_core::import::knowledge::KnowledgeImporter;
103 ///
104 /// let importer = KnowledgeImporter::new();
105 /// let yaml = r#"
106 /// schemaVersion: "1.0"
107 /// articles: []
108 /// nextNumber: 1
109 /// "#;
110 /// let index = importer.import_index(yaml).unwrap();
111 /// assert_eq!(index.next_number, 1);
112 /// ```
113 pub fn import_index(&self, yaml_content: &str) -> Result<KnowledgeIndex, ImportError> {
114 KnowledgeIndex::from_yaml(yaml_content).map_err(|e| {
115 ImportError::ParseError(format!("Failed to parse knowledge index YAML: {}", e))
116 })
117 }
118
119 /// Import multiple knowledge articles from a directory
120 ///
121 /// Loads all `.kb.yaml` files from the specified directory.
122 ///
123 /// # Arguments
124 ///
125 /// * `dir_path` - Path to the directory containing knowledge article files
126 ///
127 /// # Returns
128 ///
129 /// A vector of parsed `KnowledgeArticle` objects and any import errors
130 pub fn import_from_directory(
131 &self,
132 dir_path: &std::path::Path,
133 ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
134 let mut articles = Vec::new();
135 let mut errors = Vec::new();
136
137 if !dir_path.exists() {
138 return Err(ImportError::IoError(format!(
139 "Directory does not exist: {}",
140 dir_path.display()
141 )));
142 }
143
144 if !dir_path.is_dir() {
145 return Err(ImportError::IoError(format!(
146 "Path is not a directory: {}",
147 dir_path.display()
148 )));
149 }
150
151 // Read all .kb.yaml files
152 let entries = std::fs::read_dir(dir_path)
153 .map_err(|e| ImportError::IoError(format!("Failed to read directory: {}", e)))?;
154
155 for entry in entries.flatten() {
156 let path = entry.path();
157 if path.extension().and_then(|s| s.to_str()) == Some("yaml")
158 && path
159 .file_name()
160 .and_then(|s| s.to_str())
161 .is_some_and(|name| name.ends_with(".kb.yaml"))
162 {
163 match std::fs::read_to_string(&path) {
164 Ok(content) => match self.import(&content) {
165 Ok(article) => articles.push(article),
166 Err(e) => errors.push(ImportError::ParseError(format!(
167 "Failed to import {}: {}",
168 path.display(),
169 e
170 ))),
171 },
172 Err(e) => errors.push(ImportError::IoError(format!(
173 "Failed to read {}: {}",
174 path.display(),
175 e
176 ))),
177 }
178 }
179 }
180
181 // Sort articles by number
182 articles.sort_by(|a, b| a.number.cmp(&b.number));
183
184 Ok((articles, errors))
185 }
186
187 /// Import articles filtered by domain
188 ///
189 /// # Arguments
190 ///
191 /// * `dir_path` - Path to the directory containing knowledge article files
192 /// * `domain` - Domain to filter by
193 ///
194 /// # Returns
195 ///
196 /// A vector of parsed `KnowledgeArticle` objects for the specified domain
197 pub fn import_by_domain(
198 &self,
199 dir_path: &std::path::Path,
200 domain: &str,
201 ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
202 let (articles, errors) = self.import_from_directory(dir_path)?;
203
204 let filtered: Vec<KnowledgeArticle> = articles
205 .into_iter()
206 .filter(|a| a.domain.as_deref() == Some(domain))
207 .collect();
208
209 Ok((filtered, errors))
210 }
211}
212
213impl Default for KnowledgeImporter {
214 fn default() -> Self {
215 Self::new()
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 #[test]
224 fn test_import_knowledge_article() {
225 let importer = KnowledgeImporter::new();
226 let yaml = r#"
227id: 660e8400-e29b-41d4-a716-446655440000
228number: 1
229title: "Data Classification Guide"
230articleType: guide
231status: published
232summary: "This guide explains data classification."
233content: "Data classification is essential for governance."
234authors:
235 - "data-governance@example.com"
236createdAt: "2024-01-15T10:00:00Z"
237updatedAt: "2024-01-15T10:00:00Z"
238"#;
239 let result = importer.import_without_validation(yaml);
240 assert!(result.is_ok());
241 let article = result.unwrap();
242 assert_eq!(article.title, "Data Classification Guide");
243 assert_eq!(article.number, 1);
244 }
245
246 #[test]
247 fn test_import_knowledge_index() {
248 let importer = KnowledgeImporter::new();
249 let yaml = r#"
250schemaVersion: "1.0"
251articles: []
252nextNumber: 1
253"#;
254 let result = importer.import_index(yaml);
255 assert!(result.is_ok());
256 let index = result.unwrap();
257 assert_eq!(index.next_number, 1);
258 assert_eq!(index.schema_version, "1.0");
259 }
260
261 #[test]
262 fn test_import_invalid_yaml() {
263 let importer = KnowledgeImporter::new();
264 let yaml = "not: valid: yaml: at: all";
265 let result = importer.import_without_validation(yaml);
266 assert!(result.is_err());
267 }
268}