Skip to main content

systemprompt_content/config/
validated.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use systemprompt_identifiers::{CategoryId, SourceId};
4use systemprompt_models::services::{ContentConfig, ServicesConfig};
5use systemprompt_models::{
6    Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
7    ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
8};
9
10const SOURCE_WEB: &str = "web";
11const SOURCE_UNKNOWN: &str = "unknown";
12
13#[derive(Debug, Clone)]
14pub struct ContentConfigValidated {
15    content_sources: HashMap<String, ContentSourceConfigValidated>,
16    metadata: Metadata,
17    categories: HashMap<String, Category>,
18    base_path: PathBuf,
19}
20
21#[derive(Debug, Clone)]
22pub struct ContentSourceConfigValidated {
23    pub path: PathBuf,
24    pub source_id: SourceId,
25    pub category_id: CategoryId,
26    pub enabled: bool,
27    pub description: String,
28    pub allowed_content_types: Vec<String>,
29    pub indexing: IndexingConfig,
30    pub sitemap: Option<SitemapConfig>,
31    pub branding: Option<SourceBranding>,
32}
33
34pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;
35
36impl ContentConfigValidated {
37    pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
38        let mut errors = ContentConfigErrors::new();
39
40        let categories = validate_categories(&raw.categories, &mut errors);
41        let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);
42
43        errors.into_result(Self {
44            content_sources,
45            metadata: raw.metadata,
46            categories,
47            base_path,
48        })
49    }
50
51    pub fn from_content_config(config: &ContentConfig, base_path: PathBuf) -> ValidationResult {
52        Self::from_raw(config.raw.clone(), base_path)
53    }
54
55    pub fn from_services_config(services: &ServicesConfig, base_path: PathBuf) -> ValidationResult {
56        Self::from_content_config(&services.content, base_path)
57    }
58
59    pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
60        &self.content_sources
61    }
62
63    pub const fn metadata(&self) -> &Metadata {
64        &self.metadata
65    }
66
67    pub const fn categories(&self) -> &HashMap<String, Category> {
68        &self.categories
69    }
70
71    pub const fn base_path(&self) -> &PathBuf {
72        &self.base_path
73    }
74
75    pub fn is_html_page(&self, path: &str) -> bool {
76        if path == "/" {
77            return true;
78        }
79
80        self.content_sources
81            .values()
82            .filter(|source| source.enabled)
83            .filter_map(|source| source.sitemap.as_ref())
84            .filter(|sitemap| sitemap.enabled)
85            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
86    }
87
88    fn matches_url_pattern(pattern: &str, path: &str) -> bool {
89        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
90        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
91
92        if pattern_parts.len() != path_parts.len() {
93            return false;
94        }
95
96        pattern_parts
97            .iter()
98            .zip(path_parts.iter())
99            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
100    }
101
102    pub fn determine_source(&self, path: &str) -> String {
103        if path == "/" {
104            return SOURCE_WEB.to_string();
105        }
106
107        self.content_sources
108            .iter()
109            .filter(|(_, source)| source.enabled)
110            .find_map(|(name, source)| {
111                source.sitemap.as_ref().and_then(|sitemap| {
112                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
113                        .then(|| name.clone())
114                })
115            })
116            .unwrap_or_else(|| SOURCE_UNKNOWN.to_string())
117    }
118
119    pub fn resolve_slug(&self, path: &str) -> Option<String> {
120        self.content_sources
121            .values()
122            .filter(|source| source.enabled)
123            .filter_map(|source| source.sitemap.as_ref())
124            .filter(|sitemap| sitemap.enabled)
125            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
126    }
127}
128
129fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
130    let prefix = pattern.split('{').next()?;
131    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
132    let raw = raw.split('?').next().unwrap_or(raw);
133    let raw = raw.split('#').next().unwrap_or(raw);
134    (!raw.is_empty()).then(|| raw.to_string())
135}
136
137impl ContentRouting for ContentConfigValidated {
138    fn is_html_page(&self, path: &str) -> bool {
139        ContentConfigValidated::is_html_page(self, path)
140    }
141
142    fn determine_source(&self, path: &str) -> String {
143        ContentConfigValidated::determine_source(self, path)
144    }
145
146    fn resolve_slug(&self, path: &str) -> Option<String> {
147        ContentConfigValidated::resolve_slug(self, path)
148    }
149}
150
151fn validate_categories(
152    raw: &HashMap<String, Category>,
153    errors: &mut ContentConfigErrors,
154) -> HashMap<String, Category> {
155    let mut validated = HashMap::new();
156
157    for (id, cat) in raw {
158        if cat.name.is_empty() {
159            errors.push(ContentConfigError::Validation {
160                field: format!("categories.{id}.name"),
161                message: "Category name cannot be empty".to_string(),
162                suggestion: Some("Provide a non-empty name".to_string()),
163            });
164            continue;
165        }
166        validated.insert(id.clone(), cat.clone());
167    }
168
169    validated
170}
171
172fn validate_sources(
173    raw: &ContentConfigRaw,
174    categories: &HashMap<String, Category>,
175    base_path: &Path,
176    errors: &mut ContentConfigErrors,
177) -> HashMap<String, ContentSourceConfigValidated> {
178    let mut validated = HashMap::new();
179
180    for (name, source) in &raw.content_sources {
181        if let Some(validated_source) =
182            validate_single_source(name, source, categories, base_path, errors)
183        {
184            validated.insert(name.clone(), validated_source);
185        }
186    }
187
188    validated
189}
190
191fn validate_single_source(
192    name: &str,
193    source: &ContentSourceConfigRaw,
194    categories: &HashMap<String, Category>,
195    base_path: &Path,
196    errors: &mut ContentConfigErrors,
197) -> Option<ContentSourceConfigValidated> {
198    let field_prefix = format!("content_sources.{name}");
199
200    if source.path.is_empty() {
201        errors.push(ContentConfigError::Validation {
202            field: format!("{field_prefix}.path"),
203            message: "Source path is required".to_string(),
204            suggestion: Some("Add a path to the content directory".to_string()),
205        });
206        return None;
207    }
208
209    if source.source_id.as_str().is_empty() {
210        errors.push(ContentConfigError::Validation {
211            field: format!("{field_prefix}.source_id"),
212            message: "source_id is required".to_string(),
213            suggestion: Some("Add a unique source_id".to_string()),
214        });
215        return None;
216    }
217
218    if source.category_id.as_str().is_empty() {
219        errors.push(ContentConfigError::Validation {
220            field: format!("{field_prefix}.category_id"),
221            message: "category_id is required".to_string(),
222            suggestion: Some("Add a category_id that references a defined category".to_string()),
223        });
224        return None;
225    }
226
227    if !categories.contains_key(source.category_id.as_str()) {
228        errors.push(ContentConfigError::Validation {
229            field: format!("{field_prefix}.category_id"),
230            message: format!("Referenced category '{}' not found", source.category_id),
231            suggestion: Some("Add this category to the categories section".to_string()),
232        });
233    }
234
235    let resolved_path = if source.path.starts_with('/') {
236        PathBuf::from(&source.path)
237    } else {
238        base_path.join(&source.path)
239    };
240
241    let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
242        errors.push(ContentConfigError::Validation {
243            field: format!("{field_prefix}.path"),
244            message: "Content source directory does not exist".to_string(),
245            suggestion: Some("Create the directory or fix the path".to_string()),
246        });
247        return None;
248    };
249
250    Some(ContentSourceConfigValidated {
251        path: canonical_path,
252        source_id: source.source_id.clone(),
253        category_id: source.category_id.clone(),
254        enabled: source.enabled,
255        description: source.description.clone(),
256        allowed_content_types: source.allowed_content_types.clone(),
257        indexing: source.indexing.unwrap_or(IndexingConfig {
258            clear_before: false,
259            recursive: false,
260            override_existing: false,
261        }),
262        sitemap: source.sitemap.clone(),
263        branding: source.branding.clone(),
264    })
265}