Skip to main content

systemprompt_content/config/
validated.rs

1//! Validated content configuration.
2//!
3//! [`ContentConfigValidated`] is the verified form of the raw config: category
4//! references are resolved, source paths are canonicalised against the base
5//! path, and the [`systemprompt_models::ContentRouting`] implementation answers
6//! URL routing questions (HTML-page detection, source resolution, slug
7//! extraction) from each source's sitemap pattern.
8
9use std::collections::HashMap;
10use std::path::{Path, PathBuf};
11use systemprompt_identifiers::{CategoryId, SourceId};
12use systemprompt_models::{
13    Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
14    ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
15};
16
17const SOURCE_WEB: &str = "web";
18const SOURCE_UNKNOWN: &str = "unknown";
19
20#[derive(Debug, Clone)]
21pub struct ContentConfigValidated {
22    content_sources: HashMap<String, ContentSourceConfigValidated>,
23    metadata: Metadata,
24    categories: HashMap<String, Category>,
25    base_path: PathBuf,
26}
27
28#[derive(Debug, Clone)]
29pub struct ContentSourceConfigValidated {
30    pub path: PathBuf,
31    pub source_id: SourceId,
32    pub category_id: CategoryId,
33    pub enabled: bool,
34    pub description: String,
35    pub allowed_content_types: Vec<String>,
36    pub indexing: IndexingConfig,
37    pub sitemap: Option<SitemapConfig>,
38    pub branding: Option<SourceBranding>,
39}
40
41pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;
42
43impl ContentConfigValidated {
44    pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
45        let mut errors = ContentConfigErrors::new();
46
47        let categories = validate_categories(&raw.categories, &mut errors);
48        let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);
49
50        errors.into_result(Self {
51            content_sources,
52            metadata: raw.metadata,
53            categories,
54            base_path,
55        })
56    }
57
58    pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
59        &self.content_sources
60    }
61
62    pub const fn metadata(&self) -> &Metadata {
63        &self.metadata
64    }
65
66    pub const fn categories(&self) -> &HashMap<String, Category> {
67        &self.categories
68    }
69
70    pub const fn base_path(&self) -> &PathBuf {
71        &self.base_path
72    }
73
74    pub fn is_html_page(&self, path: &str) -> bool {
75        if path == "/" {
76            return true;
77        }
78
79        self.content_sources
80            .values()
81            .filter(|source| source.enabled)
82            .filter_map(|source| source.sitemap.as_ref())
83            .filter(|sitemap| sitemap.enabled)
84            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
85    }
86
87    fn matches_url_pattern(pattern: &str, path: &str) -> bool {
88        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
89        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
90
91        if pattern_parts.len() != path_parts.len() {
92            return false;
93        }
94
95        pattern_parts
96            .iter()
97            .zip(path_parts.iter())
98            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
99    }
100
101    pub fn determine_source(&self, path: &str) -> String {
102        if path == "/" {
103            return SOURCE_WEB.to_owned();
104        }
105
106        self.content_sources
107            .iter()
108            .filter(|(_, source)| source.enabled)
109            .find_map(|(name, source)| {
110                source.sitemap.as_ref().and_then(|sitemap| {
111                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
112                        .then(|| name.clone())
113                })
114            })
115            .unwrap_or_else(|| SOURCE_UNKNOWN.to_owned())
116    }
117
118    pub fn resolve_slug(&self, path: &str) -> Option<String> {
119        self.content_sources
120            .values()
121            .filter(|source| source.enabled)
122            .filter_map(|source| source.sitemap.as_ref())
123            .filter(|sitemap| sitemap.enabled)
124            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
125    }
126}
127
128fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
129    let prefix = pattern.split('{').next()?;
130    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
131    let raw = raw.split('?').next().unwrap_or(raw);
132    let raw = raw.split('#').next().unwrap_or(raw);
133    (!raw.is_empty()).then(|| raw.to_owned())
134}
135
136impl ContentRouting for ContentConfigValidated {
137    fn is_html_page(&self, path: &str) -> bool {
138        ContentConfigValidated::is_html_page(self, path)
139    }
140
141    fn determine_source(&self, path: &str) -> String {
142        ContentConfigValidated::determine_source(self, path)
143    }
144
145    fn resolve_slug(&self, path: &str) -> Option<String> {
146        ContentConfigValidated::resolve_slug(self, path)
147    }
148}
149
150fn validate_categories(
151    raw: &HashMap<String, Category>,
152    errors: &mut ContentConfigErrors,
153) -> HashMap<String, Category> {
154    let mut validated = HashMap::new();
155
156    for (id, cat) in raw {
157        if cat.name.is_empty() {
158            errors.push(ContentConfigError::Validation {
159                field: format!("categories.{id}.name"),
160                message: "Category name cannot be empty".to_owned(),
161                suggestion: Some("Provide a non-empty name".to_owned()),
162            });
163            continue;
164        }
165        validated.insert(id.clone(), cat.clone());
166    }
167
168    validated
169}
170
171fn validate_sources(
172    raw: &ContentConfigRaw,
173    categories: &HashMap<String, Category>,
174    base_path: &Path,
175    errors: &mut ContentConfigErrors,
176) -> HashMap<String, ContentSourceConfigValidated> {
177    let mut validated = HashMap::new();
178
179    for (name, source) in &raw.content_sources {
180        if let Some(validated_source) =
181            validate_single_source(name, source, categories, base_path, errors)
182        {
183            validated.insert(name.clone(), validated_source);
184        }
185    }
186
187    validated
188}
189
190fn validate_single_source(
191    name: &str,
192    source: &ContentSourceConfigRaw,
193    categories: &HashMap<String, Category>,
194    base_path: &Path,
195    errors: &mut ContentConfigErrors,
196) -> Option<ContentSourceConfigValidated> {
197    let field_prefix = format!("content_sources.{name}");
198
199    if source.path.is_empty() {
200        errors.push(ContentConfigError::Validation {
201            field: format!("{field_prefix}.path"),
202            message: "Source path is required".to_owned(),
203            suggestion: Some("Add a path to the content directory".to_owned()),
204        });
205        return None;
206    }
207
208    if source.source_id.as_str().is_empty() {
209        errors.push(ContentConfigError::Validation {
210            field: format!("{field_prefix}.source_id"),
211            message: "source_id is required".to_owned(),
212            suggestion: Some("Add a unique source_id".to_owned()),
213        });
214        return None;
215    }
216
217    if source.category_id.as_str().is_empty() {
218        errors.push(ContentConfigError::Validation {
219            field: format!("{field_prefix}.category_id"),
220            message: "category_id is required".to_owned(),
221            suggestion: Some("Add a category_id that references a defined category".to_owned()),
222        });
223        return None;
224    }
225
226    if !categories.contains_key(source.category_id.as_str()) {
227        errors.push(ContentConfigError::Validation {
228            field: format!("{field_prefix}.category_id"),
229            message: format!("Referenced category '{}' not found", source.category_id),
230            suggestion: Some("Add this category to the categories section".to_owned()),
231        });
232    }
233
234    let resolved_path = if source.path.starts_with('/') {
235        PathBuf::from(&source.path)
236    } else {
237        base_path.join(&source.path)
238    };
239
240    let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
241        errors.push(ContentConfigError::Validation {
242            field: format!("{field_prefix}.path"),
243            message: "Content source directory does not exist".to_owned(),
244            suggestion: Some("Create the directory or fix the path".to_owned()),
245        });
246        return None;
247    };
248
249    Some(ContentSourceConfigValidated {
250        path: canonical_path,
251        source_id: source.source_id.clone(),
252        category_id: source.category_id.clone(),
253        enabled: source.enabled,
254        description: source.description.clone(),
255        allowed_content_types: source.allowed_content_types.clone(),
256        indexing: source.indexing.unwrap_or(IndexingConfig {
257            clear_before: false,
258            recursive: false,
259            override_existing: false,
260        }),
261        sitemap: source.sitemap.clone(),
262        branding: source.branding.clone(),
263    })
264}