systemprompt_content/config/
validated.rs1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use systemprompt_identifiers::{CategoryId, SourceId};
4use systemprompt_models::{
5 Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
6 ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
7};
8
9const SOURCE_WEB: &str = "web";
10const SOURCE_UNKNOWN: &str = "unknown";
11
12#[derive(Debug, Clone)]
13pub struct ContentConfigValidated {
14 content_sources: HashMap<String, ContentSourceConfigValidated>,
15 metadata: Metadata,
16 categories: HashMap<String, Category>,
17 base_path: PathBuf,
18}
19
20#[derive(Debug, Clone)]
21pub struct ContentSourceConfigValidated {
22 pub path: PathBuf,
23 pub source_id: SourceId,
24 pub category_id: CategoryId,
25 pub enabled: bool,
26 pub description: String,
27 pub allowed_content_types: Vec<String>,
28 pub indexing: IndexingConfig,
29 pub sitemap: Option<SitemapConfig>,
30 pub branding: Option<SourceBranding>,
31}
32
33pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;
34
35impl ContentConfigValidated {
36 pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
37 let mut errors = ContentConfigErrors::new();
38
39 let categories = validate_categories(&raw.categories, &mut errors);
40 let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);
41
42 errors.into_result(Self {
43 content_sources,
44 metadata: raw.metadata,
45 categories,
46 base_path,
47 })
48 }
49
50 pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
51 &self.content_sources
52 }
53
54 pub const fn metadata(&self) -> &Metadata {
55 &self.metadata
56 }
57
58 pub const fn categories(&self) -> &HashMap<String, Category> {
59 &self.categories
60 }
61
62 pub const fn base_path(&self) -> &PathBuf {
63 &self.base_path
64 }
65
66 pub fn is_html_page(&self, path: &str) -> bool {
67 if path == "/" {
68 return true;
69 }
70
71 self.content_sources
72 .values()
73 .filter(|source| source.enabled)
74 .filter_map(|source| source.sitemap.as_ref())
75 .filter(|sitemap| sitemap.enabled)
76 .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
77 }
78
79 fn matches_url_pattern(pattern: &str, path: &str) -> bool {
80 let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
81 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
82
83 if pattern_parts.len() != path_parts.len() {
84 return false;
85 }
86
87 pattern_parts
88 .iter()
89 .zip(path_parts.iter())
90 .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
91 }
92
93 pub fn determine_source(&self, path: &str) -> String {
94 if path == "/" {
95 return SOURCE_WEB.to_string();
96 }
97
98 self.content_sources
99 .iter()
100 .filter(|(_, source)| source.enabled)
101 .find_map(|(name, source)| {
102 source.sitemap.as_ref().and_then(|sitemap| {
103 (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
104 .then(|| name.clone())
105 })
106 })
107 .unwrap_or_else(|| SOURCE_UNKNOWN.to_string())
108 }
109
110 pub fn resolve_slug(&self, path: &str) -> Option<String> {
111 self.content_sources
112 .values()
113 .filter(|source| source.enabled)
114 .filter_map(|source| source.sitemap.as_ref())
115 .filter(|sitemap| sitemap.enabled)
116 .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
117 }
118}
119
120fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
121 let prefix = pattern.split('{').next()?;
122 let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
123 let raw = raw.split('?').next().unwrap_or(raw);
124 let raw = raw.split('#').next().unwrap_or(raw);
125 (!raw.is_empty()).then(|| raw.to_string())
126}
127
128impl ContentRouting for ContentConfigValidated {
129 fn is_html_page(&self, path: &str) -> bool {
130 ContentConfigValidated::is_html_page(self, path)
131 }
132
133 fn determine_source(&self, path: &str) -> String {
134 ContentConfigValidated::determine_source(self, path)
135 }
136
137 fn resolve_slug(&self, path: &str) -> Option<String> {
138 ContentConfigValidated::resolve_slug(self, path)
139 }
140}
141
142fn validate_categories(
143 raw: &HashMap<String, Category>,
144 errors: &mut ContentConfigErrors,
145) -> HashMap<String, Category> {
146 let mut validated = HashMap::new();
147
148 for (id, cat) in raw {
149 if cat.name.is_empty() {
150 errors.push(ContentConfigError::Validation {
151 field: format!("categories.{id}.name"),
152 message: "Category name cannot be empty".to_string(),
153 suggestion: Some("Provide a non-empty name".to_string()),
154 });
155 continue;
156 }
157 validated.insert(id.clone(), cat.clone());
158 }
159
160 validated
161}
162
163fn validate_sources(
164 raw: &ContentConfigRaw,
165 categories: &HashMap<String, Category>,
166 base_path: &Path,
167 errors: &mut ContentConfigErrors,
168) -> HashMap<String, ContentSourceConfigValidated> {
169 let mut validated = HashMap::new();
170
171 for (name, source) in &raw.content_sources {
172 if let Some(validated_source) =
173 validate_single_source(name, source, categories, base_path, errors)
174 {
175 validated.insert(name.clone(), validated_source);
176 }
177 }
178
179 validated
180}
181
182fn validate_single_source(
183 name: &str,
184 source: &ContentSourceConfigRaw,
185 categories: &HashMap<String, Category>,
186 base_path: &Path,
187 errors: &mut ContentConfigErrors,
188) -> Option<ContentSourceConfigValidated> {
189 let field_prefix = format!("content_sources.{name}");
190
191 if source.path.is_empty() {
192 errors.push(ContentConfigError::Validation {
193 field: format!("{field_prefix}.path"),
194 message: "Source path is required".to_string(),
195 suggestion: Some("Add a path to the content directory".to_string()),
196 });
197 return None;
198 }
199
200 if source.source_id.as_str().is_empty() {
201 errors.push(ContentConfigError::Validation {
202 field: format!("{field_prefix}.source_id"),
203 message: "source_id is required".to_string(),
204 suggestion: Some("Add a unique source_id".to_string()),
205 });
206 return None;
207 }
208
209 if source.category_id.as_str().is_empty() {
210 errors.push(ContentConfigError::Validation {
211 field: format!("{field_prefix}.category_id"),
212 message: "category_id is required".to_string(),
213 suggestion: Some("Add a category_id that references a defined category".to_string()),
214 });
215 return None;
216 }
217
218 if !categories.contains_key(source.category_id.as_str()) {
219 errors.push(ContentConfigError::Validation {
220 field: format!("{field_prefix}.category_id"),
221 message: format!("Referenced category '{}' not found", source.category_id),
222 suggestion: Some("Add this category to the categories section".to_string()),
223 });
224 }
225
226 let resolved_path = if source.path.starts_with('/') {
227 PathBuf::from(&source.path)
228 } else {
229 base_path.join(&source.path)
230 };
231
232 let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
233 errors.push(ContentConfigError::Validation {
234 field: format!("{field_prefix}.path"),
235 message: "Content source directory does not exist".to_string(),
236 suggestion: Some("Create the directory or fix the path".to_string()),
237 });
238 return None;
239 };
240
241 Some(ContentSourceConfigValidated {
242 path: canonical_path,
243 source_id: source.source_id.clone(),
244 category_id: source.category_id.clone(),
245 enabled: source.enabled,
246 description: source.description.clone(),
247 allowed_content_types: source.allowed_content_types.clone(),
248 indexing: source.indexing.unwrap_or(IndexingConfig {
249 clear_before: false,
250 recursive: false,
251 override_existing: false,
252 }),
253 sitemap: source.sitemap.clone(),
254 branding: source.branding.clone(),
255 })
256}