Skip to main content

systemprompt_content/config/
validated.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use systemprompt_identifiers::{CategoryId, SourceId};
4use systemprompt_models::{
5    Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
6    ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
7};
8
9const SOURCE_WEB: &str = "web";
10const SOURCE_UNKNOWN: &str = "unknown";
11
12#[derive(Debug, Clone)]
13pub struct ContentConfigValidated {
14    content_sources: HashMap<String, ContentSourceConfigValidated>,
15    metadata: Metadata,
16    categories: HashMap<String, Category>,
17    base_path: PathBuf,
18}
19
20#[derive(Debug, Clone)]
21pub struct ContentSourceConfigValidated {
22    pub path: PathBuf,
23    pub source_id: SourceId,
24    pub category_id: CategoryId,
25    pub enabled: bool,
26    pub description: String,
27    pub allowed_content_types: Vec<String>,
28    pub indexing: IndexingConfig,
29    pub sitemap: Option<SitemapConfig>,
30    pub branding: Option<SourceBranding>,
31}
32
33pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;
34
35impl ContentConfigValidated {
36    pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
37        let mut errors = ContentConfigErrors::new();
38
39        let categories = validate_categories(&raw.categories, &mut errors);
40        let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);
41
42        errors.into_result(Self {
43            content_sources,
44            metadata: raw.metadata,
45            categories,
46            base_path,
47        })
48    }
49
50    pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
51        &self.content_sources
52    }
53
54    pub const fn metadata(&self) -> &Metadata {
55        &self.metadata
56    }
57
58    pub const fn categories(&self) -> &HashMap<String, Category> {
59        &self.categories
60    }
61
62    pub const fn base_path(&self) -> &PathBuf {
63        &self.base_path
64    }
65
66    pub fn is_html_page(&self, path: &str) -> bool {
67        if path == "/" {
68            return true;
69        }
70
71        self.content_sources
72            .values()
73            .filter(|source| source.enabled)
74            .filter_map(|source| source.sitemap.as_ref())
75            .filter(|sitemap| sitemap.enabled)
76            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
77    }
78
79    fn matches_url_pattern(pattern: &str, path: &str) -> bool {
80        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
81        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
82
83        if pattern_parts.len() != path_parts.len() {
84            return false;
85        }
86
87        pattern_parts
88            .iter()
89            .zip(path_parts.iter())
90            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
91    }
92
93    pub fn determine_source(&self, path: &str) -> String {
94        if path == "/" {
95            return SOURCE_WEB.to_string();
96        }
97
98        self.content_sources
99            .iter()
100            .filter(|(_, source)| source.enabled)
101            .find_map(|(name, source)| {
102                source.sitemap.as_ref().and_then(|sitemap| {
103                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
104                        .then(|| name.clone())
105                })
106            })
107            .unwrap_or_else(|| SOURCE_UNKNOWN.to_string())
108    }
109
110    pub fn resolve_slug(&self, path: &str) -> Option<String> {
111        self.content_sources
112            .values()
113            .filter(|source| source.enabled)
114            .filter_map(|source| source.sitemap.as_ref())
115            .filter(|sitemap| sitemap.enabled)
116            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
117    }
118}
119
120fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
121    let prefix = pattern.split('{').next()?;
122    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
123    let raw = raw.split('?').next().unwrap_or(raw);
124    let raw = raw.split('#').next().unwrap_or(raw);
125    (!raw.is_empty()).then(|| raw.to_string())
126}
127
128impl ContentRouting for ContentConfigValidated {
129    fn is_html_page(&self, path: &str) -> bool {
130        ContentConfigValidated::is_html_page(self, path)
131    }
132
133    fn determine_source(&self, path: &str) -> String {
134        ContentConfigValidated::determine_source(self, path)
135    }
136
137    fn resolve_slug(&self, path: &str) -> Option<String> {
138        ContentConfigValidated::resolve_slug(self, path)
139    }
140}
141
142fn validate_categories(
143    raw: &HashMap<String, Category>,
144    errors: &mut ContentConfigErrors,
145) -> HashMap<String, Category> {
146    let mut validated = HashMap::new();
147
148    for (id, cat) in raw {
149        if cat.name.is_empty() {
150            errors.push(ContentConfigError::Validation {
151                field: format!("categories.{id}.name"),
152                message: "Category name cannot be empty".to_string(),
153                suggestion: Some("Provide a non-empty name".to_string()),
154            });
155            continue;
156        }
157        validated.insert(id.clone(), cat.clone());
158    }
159
160    validated
161}
162
163fn validate_sources(
164    raw: &ContentConfigRaw,
165    categories: &HashMap<String, Category>,
166    base_path: &Path,
167    errors: &mut ContentConfigErrors,
168) -> HashMap<String, ContentSourceConfigValidated> {
169    let mut validated = HashMap::new();
170
171    for (name, source) in &raw.content_sources {
172        if let Some(validated_source) =
173            validate_single_source(name, source, categories, base_path, errors)
174        {
175            validated.insert(name.clone(), validated_source);
176        }
177    }
178
179    validated
180}
181
182fn validate_single_source(
183    name: &str,
184    source: &ContentSourceConfigRaw,
185    categories: &HashMap<String, Category>,
186    base_path: &Path,
187    errors: &mut ContentConfigErrors,
188) -> Option<ContentSourceConfigValidated> {
189    let field_prefix = format!("content_sources.{name}");
190
191    if source.path.is_empty() {
192        errors.push(ContentConfigError::Validation {
193            field: format!("{field_prefix}.path"),
194            message: "Source path is required".to_string(),
195            suggestion: Some("Add a path to the content directory".to_string()),
196        });
197        return None;
198    }
199
200    if source.source_id.as_str().is_empty() {
201        errors.push(ContentConfigError::Validation {
202            field: format!("{field_prefix}.source_id"),
203            message: "source_id is required".to_string(),
204            suggestion: Some("Add a unique source_id".to_string()),
205        });
206        return None;
207    }
208
209    if source.category_id.as_str().is_empty() {
210        errors.push(ContentConfigError::Validation {
211            field: format!("{field_prefix}.category_id"),
212            message: "category_id is required".to_string(),
213            suggestion: Some("Add a category_id that references a defined category".to_string()),
214        });
215        return None;
216    }
217
218    if !categories.contains_key(source.category_id.as_str()) {
219        errors.push(ContentConfigError::Validation {
220            field: format!("{field_prefix}.category_id"),
221            message: format!("Referenced category '{}' not found", source.category_id),
222            suggestion: Some("Add this category to the categories section".to_string()),
223        });
224    }
225
226    let resolved_path = if source.path.starts_with('/') {
227        PathBuf::from(&source.path)
228    } else {
229        base_path.join(&source.path)
230    };
231
232    let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
233        errors.push(ContentConfigError::Validation {
234            field: format!("{field_prefix}.path"),
235            message: "Content source directory does not exist".to_string(),
236            suggestion: Some("Create the directory or fix the path".to_string()),
237        });
238        return None;
239    };
240
241    Some(ContentSourceConfigValidated {
242        path: canonical_path,
243        source_id: source.source_id.clone(),
244        category_id: source.category_id.clone(),
245        enabled: source.enabled,
246        description: source.description.clone(),
247        allowed_content_types: source.allowed_content_types.clone(),
248        indexing: source.indexing.unwrap_or(IndexingConfig {
249            clear_before: false,
250            recursive: false,
251            override_existing: false,
252        }),
253        sitemap: source.sitemap.clone(),
254        branding: source.branding.clone(),
255    })
256}