systemprompt_content/config/
validated.rs1use std::collections::HashMap;
10use std::path::{Path, PathBuf};
11use systemprompt_identifiers::{CategoryId, SourceId};
12use systemprompt_models::{
13 Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
14 ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
15};
16
17const SOURCE_WEB: &str = "web";
18const SOURCE_UNKNOWN: &str = "unknown";
19
20#[derive(Debug, Clone)]
21pub struct ContentConfigValidated {
22 content_sources: HashMap<String, ContentSourceConfigValidated>,
23 metadata: Metadata,
24 categories: HashMap<String, Category>,
25 base_path: PathBuf,
26}
27
28#[derive(Debug, Clone)]
29pub struct ContentSourceConfigValidated {
30 pub path: PathBuf,
31 pub source_id: SourceId,
32 pub category_id: CategoryId,
33 pub enabled: bool,
34 pub description: String,
35 pub allowed_content_types: Vec<String>,
36 pub indexing: IndexingConfig,
37 pub sitemap: Option<SitemapConfig>,
38 pub branding: Option<SourceBranding>,
39}
40
41pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;
42
43impl ContentConfigValidated {
44 pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
45 let mut errors = ContentConfigErrors::new();
46
47 let categories = validate_categories(&raw.categories, &mut errors);
48 let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);
49
50 errors.into_result(Self {
51 content_sources,
52 metadata: raw.metadata,
53 categories,
54 base_path,
55 })
56 }
57
58 pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
59 &self.content_sources
60 }
61
62 pub const fn metadata(&self) -> &Metadata {
63 &self.metadata
64 }
65
66 pub const fn categories(&self) -> &HashMap<String, Category> {
67 &self.categories
68 }
69
70 pub const fn base_path(&self) -> &PathBuf {
71 &self.base_path
72 }
73
74 pub fn is_html_page(&self, path: &str) -> bool {
75 if path == "/" {
76 return true;
77 }
78
79 self.content_sources
80 .values()
81 .filter(|source| source.enabled)
82 .filter_map(|source| source.sitemap.as_ref())
83 .filter(|sitemap| sitemap.enabled)
84 .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
85 }
86
87 fn matches_url_pattern(pattern: &str, path: &str) -> bool {
88 let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
89 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
90
91 if pattern_parts.len() != path_parts.len() {
92 return false;
93 }
94
95 pattern_parts
96 .iter()
97 .zip(path_parts.iter())
98 .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
99 }
100
101 pub fn determine_source(&self, path: &str) -> String {
102 if path == "/" {
103 return SOURCE_WEB.to_owned();
104 }
105
106 self.content_sources
107 .iter()
108 .filter(|(_, source)| source.enabled)
109 .find_map(|(name, source)| {
110 source.sitemap.as_ref().and_then(|sitemap| {
111 (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
112 .then(|| name.clone())
113 })
114 })
115 .unwrap_or_else(|| SOURCE_UNKNOWN.to_owned())
116 }
117
118 pub fn resolve_slug(&self, path: &str) -> Option<String> {
119 self.content_sources
120 .values()
121 .filter(|source| source.enabled)
122 .filter_map(|source| source.sitemap.as_ref())
123 .filter(|sitemap| sitemap.enabled)
124 .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
125 }
126}
127
128fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
129 let prefix = pattern.split('{').next()?;
130 let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
131 let raw = raw.split('?').next().unwrap_or(raw);
132 let raw = raw.split('#').next().unwrap_or(raw);
133 (!raw.is_empty()).then(|| raw.to_owned())
134}
135
136impl ContentRouting for ContentConfigValidated {
137 fn is_html_page(&self, path: &str) -> bool {
138 ContentConfigValidated::is_html_page(self, path)
139 }
140
141 fn determine_source(&self, path: &str) -> String {
142 ContentConfigValidated::determine_source(self, path)
143 }
144
145 fn resolve_slug(&self, path: &str) -> Option<String> {
146 ContentConfigValidated::resolve_slug(self, path)
147 }
148}
149
150fn validate_categories(
151 raw: &HashMap<String, Category>,
152 errors: &mut ContentConfigErrors,
153) -> HashMap<String, Category> {
154 let mut validated = HashMap::new();
155
156 for (id, cat) in raw {
157 if cat.name.is_empty() {
158 errors.push(ContentConfigError::Validation {
159 field: format!("categories.{id}.name"),
160 message: "Category name cannot be empty".to_owned(),
161 suggestion: Some("Provide a non-empty name".to_owned()),
162 });
163 continue;
164 }
165 validated.insert(id.clone(), cat.clone());
166 }
167
168 validated
169}
170
171fn validate_sources(
172 raw: &ContentConfigRaw,
173 categories: &HashMap<String, Category>,
174 base_path: &Path,
175 errors: &mut ContentConfigErrors,
176) -> HashMap<String, ContentSourceConfigValidated> {
177 let mut validated = HashMap::new();
178
179 for (name, source) in &raw.content_sources {
180 if let Some(validated_source) =
181 validate_single_source(name, source, categories, base_path, errors)
182 {
183 validated.insert(name.clone(), validated_source);
184 }
185 }
186
187 validated
188}
189
190fn validate_single_source(
191 name: &str,
192 source: &ContentSourceConfigRaw,
193 categories: &HashMap<String, Category>,
194 base_path: &Path,
195 errors: &mut ContentConfigErrors,
196) -> Option<ContentSourceConfigValidated> {
197 let field_prefix = format!("content_sources.{name}");
198
199 if source.path.is_empty() {
200 errors.push(ContentConfigError::Validation {
201 field: format!("{field_prefix}.path"),
202 message: "Source path is required".to_owned(),
203 suggestion: Some("Add a path to the content directory".to_owned()),
204 });
205 return None;
206 }
207
208 if source.source_id.as_str().is_empty() {
209 errors.push(ContentConfigError::Validation {
210 field: format!("{field_prefix}.source_id"),
211 message: "source_id is required".to_owned(),
212 suggestion: Some("Add a unique source_id".to_owned()),
213 });
214 return None;
215 }
216
217 if source.category_id.as_str().is_empty() {
218 errors.push(ContentConfigError::Validation {
219 field: format!("{field_prefix}.category_id"),
220 message: "category_id is required".to_owned(),
221 suggestion: Some("Add a category_id that references a defined category".to_owned()),
222 });
223 return None;
224 }
225
226 if !categories.contains_key(source.category_id.as_str()) {
227 errors.push(ContentConfigError::Validation {
228 field: format!("{field_prefix}.category_id"),
229 message: format!("Referenced category '{}' not found", source.category_id),
230 suggestion: Some("Add this category to the categories section".to_owned()),
231 });
232 }
233
234 let resolved_path = if source.path.starts_with('/') {
235 PathBuf::from(&source.path)
236 } else {
237 base_path.join(&source.path)
238 };
239
240 let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
241 errors.push(ContentConfigError::Validation {
242 field: format!("{field_prefix}.path"),
243 message: "Content source directory does not exist".to_owned(),
244 suggestion: Some("Create the directory or fix the path".to_owned()),
245 });
246 return None;
247 };
248
249 Some(ContentSourceConfigValidated {
250 path: canonical_path,
251 source_id: source.source_id.clone(),
252 category_id: source.category_id.clone(),
253 enabled: source.enabled,
254 description: source.description.clone(),
255 allowed_content_types: source.allowed_content_types.clone(),
256 indexing: source.indexing.unwrap_or(IndexingConfig {
257 clear_before: false,
258 recursive: false,
259 override_existing: false,
260 }),
261 sitemap: source.sitemap.clone(),
262 branding: source.branding.clone(),
263 })
264}