Skip to main content

systemprompt_models/
content_config.rs

1//! Content-configuration model and routing trait.
2//!
3//! The deserialized `content.yaml` shape — sources, categories,
4//! organization metadata, sitemap and structured-data settings — plus
5//! the [`ContentRouting`] trait that maps request paths to content
6//! sources. Parsing returns [`ContentConfigError`].
7
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::Arc;
12use systemprompt_identifiers::{CategoryId, SourceId};
13use thiserror::Error;
14
15pub trait ContentRouting: Send + Sync {
16    fn is_html_page(&self, path: &str) -> bool;
17    fn determine_source(&self, path: &str) -> String;
18    fn resolve_slug(&self, _path: &str) -> Option<String> {
19        None
20    }
21}
22
23impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
24    fn is_html_page(&self, path: &str) -> bool {
25        (**self).is_html_page(path)
26    }
27
28    fn determine_source(&self, path: &str) -> String {
29        (**self).determine_source(path)
30    }
31
32    fn resolve_slug(&self, path: &str) -> Option<String> {
33        (**self).resolve_slug(path)
34    }
35}
36
37#[derive(Debug, Clone, Error)]
38pub enum ContentConfigError {
39    #[error("IO error reading {path}: {message}")]
40    Io { path: PathBuf, message: String },
41
42    #[error("YAML parse error in {path}: {message}")]
43    Parse { path: PathBuf, message: String },
44
45    #[error("Validation error in {field}: {message}")]
46    Validation {
47        field: String,
48        message: String,
49        suggestion: Option<String>,
50    },
51}
52
53#[derive(Debug, Default)]
54pub struct ContentConfigErrors {
55    errors: Vec<ContentConfigError>,
56}
57
58impl ContentConfigErrors {
59    pub fn new() -> Self {
60        Self::default()
61    }
62
63    pub fn push(&mut self, error: ContentConfigError) {
64        self.errors.push(error);
65    }
66
67    pub fn is_empty(&self) -> bool {
68        self.errors.is_empty()
69    }
70
71    pub fn errors(&self) -> &[ContentConfigError] {
72        &self.errors
73    }
74
75    pub fn into_result<T>(self, value: T) -> Result<T, Self> {
76        if self.is_empty() {
77            Ok(value)
78        } else {
79            Err(self)
80        }
81    }
82}
83
84impl std::fmt::Display for ContentConfigErrors {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        for (i, error) in self.errors.iter().enumerate() {
87            if i > 0 {
88                writeln!(f)?;
89            }
90            write!(f, "  - {error}")?;
91        }
92        Ok(())
93    }
94}
95
96impl std::error::Error for ContentConfigErrors {}
97
98#[derive(Debug, Clone, Default, Serialize, Deserialize)]
99pub struct ContentConfigRaw {
100    #[serde(default)]
101    pub content_sources: HashMap<String, ContentSourceConfigRaw>,
102    #[serde(default)]
103    pub metadata: Metadata,
104    #[serde(default)]
105    pub categories: HashMap<String, Category>,
106}
107
108impl ContentConfigRaw {
109    pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
110        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
111        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
112
113        if pattern_parts.len() != path_parts.len() {
114            return false;
115        }
116
117        pattern_parts
118            .iter()
119            .zip(path_parts.iter())
120            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
121    }
122}
123
124impl ContentRouting for ContentConfigRaw {
125    fn is_html_page(&self, path: &str) -> bool {
126        if path == "/" {
127            return true;
128        }
129
130        let matches_sitemap = self
131            .content_sources
132            .values()
133            .filter(|source| source.enabled)
134            .filter_map(|source| source.sitemap.as_ref())
135            .filter(|sitemap| sitemap.enabled)
136            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path));
137
138        if matches_sitemap {
139            return true;
140        }
141
142        !path.contains('.')
143            && !path.starts_with("/api/")
144            && !path.starts_with("/track/")
145            && !path.starts_with("/.well-known/")
146    }
147
148    fn determine_source(&self, path: &str) -> String {
149        if path == "/" {
150            return "web".to_string();
151        }
152
153        self.content_sources
154            .iter()
155            .filter(|(_, source)| source.enabled)
156            .find_map(|(name, source)| {
157                source.sitemap.as_ref().and_then(|sitemap| {
158                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
159                        .then(|| name.clone())
160                })
161            })
162            .unwrap_or_else(|| "unknown".to_string())
163    }
164
165    fn resolve_slug(&self, path: &str) -> Option<String> {
166        self.content_sources
167            .values()
168            .filter(|source| source.enabled)
169            .filter_map(|source| source.sitemap.as_ref())
170            .filter(|sitemap| sitemap.enabled)
171            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
172    }
173}
174
175fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
176    let prefix = pattern.split('{').next()?;
177    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
178    let raw = raw.split('?').next().unwrap_or(raw);
179    let raw = raw.split('#').next().unwrap_or(raw);
180    (!raw.is_empty()).then(|| raw.to_string())
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct ContentSourceConfigRaw {
185    pub path: String,
186    pub source_id: SourceId,
187    pub category_id: CategoryId,
188    pub enabled: bool,
189    #[serde(default)]
190    pub description: String,
191    #[serde(default)]
192    pub allowed_content_types: Vec<String>,
193    #[serde(default)]
194    pub indexing: Option<IndexingConfig>,
195    #[serde(default)]
196    pub sitemap: Option<SitemapConfig>,
197    #[serde(default)]
198    pub branding: Option<SourceBranding>,
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize, Default)]
202pub struct SourceBranding {
203    #[serde(default)]
204    pub name: Option<String>,
205    #[serde(default)]
206    pub description: Option<String>,
207    #[serde(default)]
208    pub image: Option<String>,
209    #[serde(default)]
210    pub keywords: Option<String>,
211}
212
213#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
214pub struct IndexingConfig {
215    #[serde(default)]
216    pub clear_before: bool,
217    #[serde(default)]
218    pub recursive: bool,
219    #[serde(default)]
220    pub override_existing: bool,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct SitemapConfig {
225    pub enabled: bool,
226    pub url_pattern: String,
227    pub priority: f32,
228    pub changefreq: String,
229    #[serde(default)]
230    pub fetch_from: String,
231    #[serde(default)]
232    pub parent_route: Option<ParentRoute>,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct ParentRoute {
237    pub enabled: bool,
238    pub url: String,
239    pub priority: f32,
240    pub changefreq: String,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize, Default)]
244pub struct Metadata {
245    #[serde(default)]
246    pub default_author: String,
247    #[serde(default)]
248    pub structured_data: StructuredData,
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize, Default)]
252pub struct StructuredData {
253    #[serde(default)]
254    pub organization: OrganizationData,
255    #[serde(default)]
256    pub article: ArticleDefaults,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize, Default)]
260pub struct OrganizationData {
261    #[serde(default)]
262    pub name: String,
263    #[serde(default)]
264    pub url: String,
265    #[serde(default)]
266    pub logo: String,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize, Default)]
270pub struct ArticleDefaults {
271    #[serde(default, rename = "type")]
272    pub article_type: String,
273    #[serde(default)]
274    pub article_section: String,
275}
276
277#[derive(Debug, Clone, Serialize, Deserialize, Default)]
278pub struct Category {
279    #[serde(default)]
280    pub name: String,
281    #[serde(default)]
282    pub description: String,
283}