data_modelling_core/import/
knowledge.rs

1//! Knowledge Base importer
2//!
3//! Parses Knowledge Base article YAML files (.kb.yaml) and converts them to KnowledgeArticle models.
4//! Also handles the knowledge index file (knowledge.yaml).
5
6use super::ImportError;
7use crate::models::knowledge::{KnowledgeArticle, KnowledgeIndex};
8
9#[cfg(feature = "schema-validation")]
10use crate::validation::schema::validate_knowledge_internal;
11
12/// Knowledge importer for parsing Knowledge Base article YAML files
13pub struct KnowledgeImporter;
14
15impl KnowledgeImporter {
16    /// Create a new Knowledge importer instance
17    pub fn new() -> Self {
18        Self
19    }
20
21    /// Import a knowledge article from YAML content
22    ///
23    /// Optionally validates against the JSON schema if the `schema-validation` feature is enabled.
24    ///
25    /// # Arguments
26    ///
27    /// * `yaml_content` - Knowledge article YAML content as a string
28    ///
29    /// # Returns
30    ///
31    /// A `KnowledgeArticle` parsed from the YAML content
32    ///
33    /// # Example
34    ///
35    /// ```rust
36    /// use data_modelling_core::import::knowledge::KnowledgeImporter;
37    ///
38    /// let importer = KnowledgeImporter::new();
39    /// let yaml = r#"
40    /// id: 660e8400-e29b-41d4-a716-446655440000
41    /// number: 1
42    /// title: "Data Classification Guide"
43    /// articleType: guide
44    /// status: published
45    /// summary: "This guide explains data classification."
46    /// content: "Data classification is essential for governance."
47    /// authors:
48    ///   - "data-governance@example.com"
49    /// createdAt: "2024-01-15T10:00:00Z"
50    /// updatedAt: "2024-01-15T10:00:00Z"
51    /// "#;
52    /// let article = importer.import(yaml).unwrap();
53    /// assert_eq!(article.title, "Data Classification Guide");
54    /// ```
55    pub fn import(&self, yaml_content: &str) -> Result<KnowledgeArticle, ImportError> {
56        // Validate against JSON Schema if feature is enabled
57        #[cfg(feature = "schema-validation")]
58        {
59            validate_knowledge_internal(yaml_content).map_err(ImportError::ValidationError)?;
60        }
61
62        // Parse the YAML content
63        KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
64            ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
65        })
66    }
67
68    /// Import a knowledge article without schema validation
69    ///
70    /// Use this when you want to skip schema validation for performance
71    /// or when importing from a trusted source.
72    ///
73    /// # Arguments
74    ///
75    /// * `yaml_content` - Knowledge article YAML content as a string
76    ///
77    /// # Returns
78    ///
79    /// A `KnowledgeArticle` parsed from the YAML content
80    pub fn import_without_validation(
81        &self,
82        yaml_content: &str,
83    ) -> Result<KnowledgeArticle, ImportError> {
84        KnowledgeArticle::from_yaml(yaml_content).map_err(|e| {
85            ImportError::ParseError(format!("Failed to parse knowledge article YAML: {}", e))
86        })
87    }
88
89    /// Import a knowledge index from YAML content
90    ///
91    /// # Arguments
92    ///
93    /// * `yaml_content` - Knowledge index YAML content (knowledge.yaml)
94    ///
95    /// # Returns
96    ///
97    /// A `KnowledgeIndex` parsed from the YAML content
98    ///
99    /// # Example
100    ///
101    /// ```rust
102    /// use data_modelling_core::import::knowledge::KnowledgeImporter;
103    ///
104    /// let importer = KnowledgeImporter::new();
105    /// let yaml = r#"
106    /// schemaVersion: "1.0"
107    /// articles: []
108    /// nextNumber: 1
109    /// "#;
110    /// let index = importer.import_index(yaml).unwrap();
111    /// assert_eq!(index.next_number, 1);
112    /// ```
113    pub fn import_index(&self, yaml_content: &str) -> Result<KnowledgeIndex, ImportError> {
114        KnowledgeIndex::from_yaml(yaml_content).map_err(|e| {
115            ImportError::ParseError(format!("Failed to parse knowledge index YAML: {}", e))
116        })
117    }
118
119    /// Import multiple knowledge articles from a directory
120    ///
121    /// Loads all `.kb.yaml` files from the specified directory.
122    ///
123    /// # Arguments
124    ///
125    /// * `dir_path` - Path to the directory containing knowledge article files
126    ///
127    /// # Returns
128    ///
129    /// A vector of parsed `KnowledgeArticle` objects and any import errors
130    pub fn import_from_directory(
131        &self,
132        dir_path: &std::path::Path,
133    ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
134        let mut articles = Vec::new();
135        let mut errors = Vec::new();
136
137        if !dir_path.exists() {
138            return Err(ImportError::IoError(format!(
139                "Directory does not exist: {}",
140                dir_path.display()
141            )));
142        }
143
144        if !dir_path.is_dir() {
145            return Err(ImportError::IoError(format!(
146                "Path is not a directory: {}",
147                dir_path.display()
148            )));
149        }
150
151        // Read all .kb.yaml files
152        let entries = std::fs::read_dir(dir_path)
153            .map_err(|e| ImportError::IoError(format!("Failed to read directory: {}", e)))?;
154
155        for entry in entries.flatten() {
156            let path = entry.path();
157            if path.extension().and_then(|s| s.to_str()) == Some("yaml")
158                && path
159                    .file_name()
160                    .and_then(|s| s.to_str())
161                    .is_some_and(|name| name.ends_with(".kb.yaml"))
162            {
163                match std::fs::read_to_string(&path) {
164                    Ok(content) => match self.import(&content) {
165                        Ok(article) => articles.push(article),
166                        Err(e) => errors.push(ImportError::ParseError(format!(
167                            "Failed to import {}: {}",
168                            path.display(),
169                            e
170                        ))),
171                    },
172                    Err(e) => errors.push(ImportError::IoError(format!(
173                        "Failed to read {}: {}",
174                        path.display(),
175                        e
176                    ))),
177                }
178            }
179        }
180
181        // Sort articles by number
182        articles.sort_by(|a, b| a.number.cmp(&b.number));
183
184        Ok((articles, errors))
185    }
186
187    /// Import articles filtered by domain
188    ///
189    /// # Arguments
190    ///
191    /// * `dir_path` - Path to the directory containing knowledge article files
192    /// * `domain` - Domain to filter by
193    ///
194    /// # Returns
195    ///
196    /// A vector of parsed `KnowledgeArticle` objects for the specified domain
197    pub fn import_by_domain(
198        &self,
199        dir_path: &std::path::Path,
200        domain: &str,
201    ) -> Result<(Vec<KnowledgeArticle>, Vec<ImportError>), ImportError> {
202        let (articles, errors) = self.import_from_directory(dir_path)?;
203
204        let filtered: Vec<KnowledgeArticle> = articles
205            .into_iter()
206            .filter(|a| a.domain.as_deref() == Some(domain))
207            .collect();
208
209        Ok((filtered, errors))
210    }
211}
212
213impl Default for KnowledgeImporter {
214    fn default() -> Self {
215        Self::new()
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_import_knowledge_article() {
225        let importer = KnowledgeImporter::new();
226        let yaml = r#"
227id: 660e8400-e29b-41d4-a716-446655440000
228number: 1
229title: "Data Classification Guide"
230articleType: guide
231status: published
232summary: "This guide explains data classification."
233content: "Data classification is essential for governance."
234authors:
235  - "data-governance@example.com"
236createdAt: "2024-01-15T10:00:00Z"
237updatedAt: "2024-01-15T10:00:00Z"
238"#;
239        let result = importer.import_without_validation(yaml);
240        assert!(result.is_ok());
241        let article = result.unwrap();
242        assert_eq!(article.title, "Data Classification Guide");
243        assert_eq!(article.number, 1);
244    }
245
246    #[test]
247    fn test_import_knowledge_index() {
248        let importer = KnowledgeImporter::new();
249        let yaml = r#"
250schemaVersion: "1.0"
251articles: []
252nextNumber: 1
253"#;
254        let result = importer.import_index(yaml);
255        assert!(result.is_ok());
256        let index = result.unwrap();
257        assert_eq!(index.next_number, 1);
258        assert_eq!(index.schema_version, "1.0");
259    }
260
261    #[test]
262    fn test_import_invalid_yaml() {
263        let importer = KnowledgeImporter::new();
264        let yaml = "not: valid: yaml: at: all";
265        let result = importer.import_without_validation(yaml);
266        assert!(result.is_err());
267    }
268}