data_modelling_sdk/import/knowledge.rs
1//! Knowledge Base importer
2//!
3//! Parses Knowledge Base article YAML files (.kb.yaml) and converts them to KnowledgeArticle models.
4//! Also handles the knowledge index file (knowledge.yaml).
5
6use super::ImportError;
7use crate::models::knowledge::{KnowledgeArticle, KnowledgeIndex};
8
9#[cfg(feature = "schema-validation")]
10use crate::validation::schema::validate_knowledge_internal;
11
12/// Knowledge importer for parsing Knowledge Base article YAML files
13pub struct KnowledgeImporter;
14
15impl KnowledgeImporter {
16 /// Create a new Knowledge importer instance
17 pub fn new() -> Self {
18 Self
19 }
20
21 /// Import a knowledge article from YAML content
22 ///
23 /// Optionally validates against the JSON schema if the `schema-validation` feature is enabled.
24 ///
25 /// # Arguments
26 ///
27 /// * `yaml_content` - Knowledge article YAML content as a string
28 ///
29 /// # Returns
30 ///
31 /// A `KnowledgeArticle` parsed from the YAML content
32 ///
33 /// # Example
34 ///
35 /// ```rust
36 /// use data_modelling_sdk::import::knowledge::KnowledgeImporter;
37 ///
38 /// let importer = KnowledgeImporter::new();
39 /// let yaml = r#"
40 /// id: 660e8400-e29b-41d4-a716-446655440000
41 /// number: "KB-0001"
42 /// title: "Data Classification Guide"
43 /// article_type: guide
44 /// status: published
45 /// summary: "This guide explains data classification."
46 /// content: "Data classification is essential for governance."
47 /// author: "data-governance@example.com"
48 /// created_at: "2024-01-15T10:00:00Z"
49 /// updated_at: "2024-01-15T10:00:00Z"
50 /// "#;
51 /// let article = importer.import(yaml).unwrap();
52 /// assert_eq!(article.title, "Data Classification Guide");
53 /// ```
54 pub fn import(&self, yaml_content: &str) -> Result<KnowledgeArticle, ImportError> {
55 // Validate against JSON Schema if feature is enabled
56 #[cfg(feature = "schema-validation")]
57 {
58 validate_knowledge_internal(yaml_content).map_err(ImportError::ValidationError)?;
59 }
60
61 // Parse the YAML content
62 KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
63 ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
64 })
65 }
66
67 /// Import a knowledge article without schema validation
68 ///
69 /// Use this when you want to skip schema validation for performance
70 /// or when importing from a trusted source.
71 ///
72 /// # Arguments
73 ///
74 /// * `yaml_content` - Knowledge article YAML content as a string
75 ///
76 /// # Returns
77 ///
78 /// A `KnowledgeArticle` parsed from the YAML content
79 pub fn import_without_validation(
80 &self,
81 yaml_content: &str,
82 ) -> Result<KnowledgeArticle, ImportError> {
83 KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
84 ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
85 })
86 }
87
88 /// Import a knowledge index from YAML content
89 ///
90 /// # Arguments
91 ///
92 /// * `yaml_content` - Knowledge index YAML content (knowledge.yaml)
93 ///
94 /// # Returns
95 ///
96 /// A `KnowledgeIndex` parsed from the YAML content
97 ///
98 /// # Example
99 ///
100 /// ```rust
101 /// use data_modelling_sdk::import::knowledge::KnowledgeImporter;
102 ///
103 /// let importer = KnowledgeImporter::new();
104 /// let yaml = r#"
105 /// schema_version: "1.0"
106 /// articles: []
107 /// next_number: 1
108 /// "#;
109 /// let index = importer.import_index(yaml).unwrap();
110 /// assert_eq!(index.next_number, 1);
111 /// ```
112 pub fn import_index(&self, yaml_content: &str) -> Result<KnowledgeIndex, ImportError> {
113 KnowledgeIndex::from_yaml(yaml_content).map_err(|e| {
114 ImportError::ParseError(format!("Failed to parse knowledge index YAML: {}", e))
115 })
116 }
117
118 /// Import multiple knowledge articles from a directory
119 ///
120 /// Loads all `.kb.yaml` files from the specified directory.
121 ///
122 /// # Arguments
123 ///
124 /// * `dir_path` - Path to the directory containing knowledge article files
125 ///
126 /// # Returns
127 ///
128 /// A vector of parsed `KnowledgeArticle` objects and any import errors
129 pub fn import_from_directory(
130 &self,
131 dir_path: &std::path::Path,
132 ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
133 let mut articles = Vec::new();
134 let mut errors = Vec::new();
135
136 if !dir_path.exists() {
137 return Err(ImportError::IoError(format!(
138 "Directory does not exist: {}",
139 dir_path.display()
140 )));
141 }
142
143 if !dir_path.is_dir() {
144 return Err(ImportError::IoError(format!(
145 "Path is not a directory: {}",
146 dir_path.display()
147 )));
148 }
149
150 // Read all .kb.yaml files
151 let entries = std::fs::read_dir(dir_path)
152 .map_err(|e| ImportError::IoError(format!("Failed to read directory: {}", e)))?;
153
154 for entry in entries.flatten() {
155 let path = entry.path();
156 if path.extension().and_then(|s| s.to_str()) == Some("yaml")
157 && path
158 .file_name()
159 .and_then(|s| s.to_str())
160 .is_some_and(|name| name.ends_with(".kb.yaml"))
161 {
162 match std::fs::read_to_string(&path) {
163 Ok(content) => match self.import(&content) {
164 Ok(article) => articles.push(article),
165 Err(e) => errors.push(ImportError::ParseError(format!(
166 "Failed to import {}: {}",
167 path.display(),
168 e
169 ))),
170 },
171 Err(e) => errors.push(ImportError::IoError(format!(
172 "Failed to read {}: {}",
173 path.display(),
174 e
175 ))),
176 }
177 }
178 }
179
180 // Sort articles by number
181 articles.sort_by(|a, b| a.number.cmp(&b.number));
182
183 Ok((articles, errors))
184 }
185
186 /// Import articles filtered by domain
187 ///
188 /// # Arguments
189 ///
190 /// * `dir_path` - Path to the directory containing knowledge article files
191 /// * `domain` - Domain to filter by
192 ///
193 /// # Returns
194 ///
195 /// A vector of parsed `KnowledgeArticle` objects for the specified domain
196 pub fn import_by_domain(
197 &self,
198 dir_path: &std::path::Path,
199 domain: &str,
200 ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
201 let (articles, errors) = self.import_from_directory(dir_path)?;
202
203 let filtered: Vec<KnowledgeArticle> = articles
204 .into_iter()
205 .filter(|a| a.domain.as_deref() == Some(domain))
206 .collect();
207
208 Ok((filtered, errors))
209 }
210}
211
212impl Default for KnowledgeImporter {
213 fn default() -> Self {
214 Self::new()
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
223 fn test_import_knowledge_article() {
224 let importer = KnowledgeImporter::new();
225 let yaml = r#"
226id: 660e8400-e29b-41d4-a716-446655440000
227number: "KB-0001"
228title: "Data Classification Guide"
229article_type: guide
230status: published
231summary: "This guide explains data classification."
232content: "Data classification is essential for governance."
233author: "data-governance@example.com"
234created_at: "2024-01-15T10:00:00Z"
235updated_at: "2024-01-15T10:00:00Z"
236"#;
237 let result = importer.import_without_validation(yaml);
238 assert!(result.is_ok());
239 let article = result.unwrap();
240 assert_eq!(article.title, "Data Classification Guide");
241 assert_eq!(article.number, "KB-0001");
242 }
243
244 #[test]
245 fn test_import_knowledge_index() {
246 let importer = KnowledgeImporter::new();
247 let yaml = r#"
248schema_version: "1.0"
249articles: []
250next_number: 1
251"#;
252 let result = importer.import_index(yaml);
253 assert!(result.is_ok());
254 let index = result.unwrap();
255 assert_eq!(index.next_number, 1);
256 assert_eq!(index.schema_version, "1.0");
257 }
258
259 #[test]
260 fn test_import_invalid_yaml() {
261 let importer = KnowledgeImporter::new();
262 let yaml = "not: valid: yaml: at: all";
263 let result = importer.import_without_validation(yaml);
264 assert!(result.is_err());
265 }
266}