data_modelling_sdk/import/
knowledge.rs

1//! Knowledge Base importer
2//!
3//! Parses Knowledge Base article YAML files (.kb.yaml) and converts them to KnowledgeArticle models.
4//! Also handles the knowledge index file (knowledge.yaml).
5
6use super::ImportError;
7use crate::models::knowledge::{KnowledgeArticle, KnowledgeIndex};
8
9#[cfg(feature = "schema-validation")]
10use crate::validation::schema::validate_knowledge_internal;
11
12/// Knowledge importer for parsing Knowledge Base article YAML files
13pub struct KnowledgeImporter;
14
15impl KnowledgeImporter {
16    /// Create a new Knowledge importer instance
17    pub fn new() -> Self {
18        Self
19    }
20
21    /// Import a knowledge article from YAML content
22    ///
23    /// Optionally validates against the JSON schema if the `schema-validation` feature is enabled.
24    ///
25    /// # Arguments
26    ///
27    /// * `yaml_content` - Knowledge article YAML content as a string
28    ///
29    /// # Returns
30    ///
31    /// A `KnowledgeArticle` parsed from the YAML content
32    ///
33    /// # Example
34    ///
35    /// ```rust
36    /// use data_modelling_sdk::import::knowledge::KnowledgeImporter;
37    ///
38    /// let importer = KnowledgeImporter::new();
39    /// let yaml = r#"
40    /// id: 660e8400-e29b-41d4-a716-446655440000
41    /// number: "KB-0001"
42    /// title: "Data Classification Guide"
43    /// article_type: guide
44    /// status: published
45    /// summary: "This guide explains data classification."
46    /// content: "Data classification is essential for governance."
47    /// author: "data-governance@example.com"
48    /// created_at: "2024-01-15T10:00:00Z"
49    /// updated_at: "2024-01-15T10:00:00Z"
50    /// "#;
51    /// let article = importer.import(yaml).unwrap();
52    /// assert_eq!(article.title, "Data Classification Guide");
53    /// ```
54    pub fn import(&self, yaml_content: &str) -> Result<KnowledgeArticle, ImportError> {
55        // Validate against JSON Schema if feature is enabled
56        #[cfg(feature = "schema-validation")]
57        {
58            validate_knowledge_internal(yaml_content).map_err(ImportError::ValidationError)?;
59        }
60
61        // Parse the YAML content
62        KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
63            ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
64        })
65    }
66
67    /// Import a knowledge article without schema validation
68    ///
69    /// Use this when you want to skip schema validation for performance
70    /// or when importing from a trusted source.
71    ///
72    /// # Arguments
73    ///
74    /// * `yaml_content` - Knowledge article YAML content as a string
75    ///
76    /// # Returns
77    ///
78    /// A `KnowledgeArticle` parsed from the YAML content
79    pub fn import_without_validation(
80        &self,
81        yaml_content: &str,
82    ) -> Result<KnowledgeArticle, ImportError> {
83        KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
84            ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
85        })
86    }
87
88    /// Import a knowledge index from YAML content
89    ///
90    /// # Arguments
91    ///
92    /// * `yaml_content` - Knowledge index YAML content (knowledge.yaml)
93    ///
94    /// # Returns
95    ///
96    /// A `KnowledgeIndex` parsed from the YAML content
97    ///
98    /// # Example
99    ///
100    /// ```rust
101    /// use data_modelling_sdk::import::knowledge::KnowledgeImporter;
102    ///
103    /// let importer = KnowledgeImporter::new();
104    /// let yaml = r#"
105    /// schema_version: "1.0"
106    /// articles: []
107    /// next_number: 1
108    /// "#;
109    /// let index = importer.import_index(yaml).unwrap();
110    /// assert_eq!(index.next_number, 1);
111    /// ```
112    pub fn import_index(&self, yaml_content: &str) -> Result<KnowledgeIndex, ImportError> {
113        KnowledgeIndex::from_yaml(yaml_content).map_err(|e| {
114            ImportError::ParseError(format!("Failed to parse knowledge index YAML: {}", e))
115        })
116    }
117
118    /// Import multiple knowledge articles from a directory
119    ///
120    /// Loads all `.kb.yaml` files from the specified directory.
121    ///
122    /// # Arguments
123    ///
124    /// * `dir_path` - Path to the directory containing knowledge article files
125    ///
126    /// # Returns
127    ///
128    /// A vector of parsed `KnowledgeArticle` objects and any import errors
129    pub fn import_from_directory(
130        &self,
131        dir_path: &std::path::Path,
132    ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
133        let mut articles = Vec::new();
134        let mut errors = Vec::new();
135
136        if !dir_path.exists() {
137            return Err(ImportError::IoError(format!(
138                "Directory does not exist: {}",
139                dir_path.display()
140            )));
141        }
142
143        if !dir_path.is_dir() {
144            return Err(ImportError::IoError(format!(
145                "Path is not a directory: {}",
146                dir_path.display()
147            )));
148        }
149
150        // Read all .kb.yaml files
151        let entries = std::fs::read_dir(dir_path)
152            .map_err(|e| ImportError::IoError(format!("Failed to read directory: {}", e)))?;
153
154        for entry in entries.flatten() {
155            let path = entry.path();
156            if path.extension().and_then(|s| s.to_str()) == Some("yaml")
157                && path
158                    .file_name()
159                    .and_then(|s| s.to_str())
160                    .is_some_and(|name| name.ends_with(".kb.yaml"))
161            {
162                match std::fs::read_to_string(&path) {
163                    Ok(content) => match self.import(&content) {
164                        Ok(article) => articles.push(article),
165                        Err(e) => errors.push(ImportError::ParseError(format!(
166                            "Failed to import {}: {}",
167                            path.display(),
168                            e
169                        ))),
170                    },
171                    Err(e) => errors.push(ImportError::IoError(format!(
172                        "Failed to read {}: {}",
173                        path.display(),
174                        e
175                    ))),
176                }
177            }
178        }
179
180        // Sort articles by number
181        articles.sort_by(|a, b| a.number.cmp(&b.number));
182
183        Ok((articles, errors))
184    }
185
186    /// Import articles filtered by domain
187    ///
188    /// # Arguments
189    ///
190    /// * `dir_path` - Path to the directory containing knowledge article files
191    /// * `domain` - Domain to filter by
192    ///
193    /// # Returns
194    ///
195    /// A vector of parsed `KnowledgeArticle` objects for the specified domain
196    pub fn import_by_domain(
197        &self,
198        dir_path: &std::path::Path,
199        domain: &str,
200    ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
201        let (articles, errors) = self.import_from_directory(dir_path)?;
202
203        let filtered: Vec<KnowledgeArticle> = articles
204            .into_iter()
205            .filter(|a| a.domain.as_deref() == Some(domain))
206            .collect();
207
208        Ok((filtered, errors))
209    }
210}
211
212impl Default for KnowledgeImporter {
213    fn default() -> Self {
214        Self::new()
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_import_knowledge_article() {
224        let importer = KnowledgeImporter::new();
225        let yaml = r#"
226id: 660e8400-e29b-41d4-a716-446655440000
227number: "KB-0001"
228title: "Data Classification Guide"
229article_type: guide
230status: published
231summary: "This guide explains data classification."
232content: "Data classification is essential for governance."
233author: "data-governance@example.com"
234created_at: "2024-01-15T10:00:00Z"
235updated_at: "2024-01-15T10:00:00Z"
236"#;
237        let result = importer.import_without_validation(yaml);
238        assert!(result.is_ok());
239        let article = result.unwrap();
240        assert_eq!(article.title, "Data Classification Guide");
241        assert_eq!(article.number, "KB-0001");
242    }
243
244    #[test]
245    fn test_import_knowledge_index() {
246        let importer = KnowledgeImporter::new();
247        let yaml = r#"
248schema_version: "1.0"
249articles: []
250next_number: 1
251"#;
252        let result = importer.import_index(yaml);
253        assert!(result.is_ok());
254        let index = result.unwrap();
255        assert_eq!(index.next_number, 1);
256        assert_eq!(index.schema_version, "1.0");
257    }
258
259    #[test]
260    fn test_import_invalid_yaml() {
261        let importer = KnowledgeImporter::new();
262        let yaml = "not: valid: yaml: at: all";
263        let result = importer.import_without_validation(yaml);
264        assert!(result.is_err());
265    }
266}