Skip to main content

systemprompt_models/
content_config.rs

1//! `content_config` module — see crate-level docs for context.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6use std::sync::Arc;
7use systemprompt_identifiers::{CategoryId, SourceId};
8use thiserror::Error;
9
10pub trait ContentRouting: Send + Sync {
11    fn is_html_page(&self, path: &str) -> bool;
12    fn determine_source(&self, path: &str) -> String;
13    fn resolve_slug(&self, _path: &str) -> Option<String> {
14        None
15    }
16}
17
18impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
19    fn is_html_page(&self, path: &str) -> bool {
20        (**self).is_html_page(path)
21    }
22
23    fn determine_source(&self, path: &str) -> String {
24        (**self).determine_source(path)
25    }
26
27    fn resolve_slug(&self, path: &str) -> Option<String> {
28        (**self).resolve_slug(path)
29    }
30}
31
32#[derive(Debug, Clone, Error)]
33pub enum ContentConfigError {
34    #[error("IO error reading {path}: {message}")]
35    Io { path: PathBuf, message: String },
36
37    #[error("YAML parse error in {path}: {message}")]
38    Parse { path: PathBuf, message: String },
39
40    #[error("Validation error in {field}: {message}")]
41    Validation {
42        field: String,
43        message: String,
44        suggestion: Option<String>,
45    },
46}
47
48#[derive(Debug, Default)]
49pub struct ContentConfigErrors {
50    errors: Vec<ContentConfigError>,
51}
52
53impl ContentConfigErrors {
54    pub fn new() -> Self {
55        Self::default()
56    }
57
58    pub fn push(&mut self, error: ContentConfigError) {
59        self.errors.push(error);
60    }
61
62    pub fn is_empty(&self) -> bool {
63        self.errors.is_empty()
64    }
65
66    pub fn errors(&self) -> &[ContentConfigError] {
67        &self.errors
68    }
69
70    pub fn into_result<T>(self, value: T) -> Result<T, Self> {
71        if self.is_empty() {
72            Ok(value)
73        } else {
74            Err(self)
75        }
76    }
77}
78
79impl std::fmt::Display for ContentConfigErrors {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        for (i, error) in self.errors.iter().enumerate() {
82            if i > 0 {
83                writeln!(f)?;
84            }
85            write!(f, "  - {error}")?;
86        }
87        Ok(())
88    }
89}
90
91impl std::error::Error for ContentConfigErrors {}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct ContentConfigRaw {
95    #[serde(default)]
96    pub content_sources: HashMap<String, ContentSourceConfigRaw>,
97    #[serde(default)]
98    pub metadata: Metadata,
99    #[serde(default)]
100    pub categories: HashMap<String, Category>,
101}
102
103impl ContentConfigRaw {
104    pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
105        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
106        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
107
108        if pattern_parts.len() != path_parts.len() {
109            return false;
110        }
111
112        pattern_parts
113            .iter()
114            .zip(path_parts.iter())
115            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
116    }
117}
118
119impl ContentRouting for ContentConfigRaw {
120    fn is_html_page(&self, path: &str) -> bool {
121        if path == "/" {
122            return true;
123        }
124
125        let matches_sitemap = self
126            .content_sources
127            .values()
128            .filter(|source| source.enabled)
129            .filter_map(|source| source.sitemap.as_ref())
130            .filter(|sitemap| sitemap.enabled)
131            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path));
132
133        if matches_sitemap {
134            return true;
135        }
136
137        !path.contains('.')
138            && !path.starts_with("/api/")
139            && !path.starts_with("/track/")
140            && !path.starts_with("/.well-known/")
141    }
142
143    fn determine_source(&self, path: &str) -> String {
144        if path == "/" {
145            return "web".to_string();
146        }
147
148        self.content_sources
149            .iter()
150            .filter(|(_, source)| source.enabled)
151            .find_map(|(name, source)| {
152                source.sitemap.as_ref().and_then(|sitemap| {
153                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
154                        .then(|| name.clone())
155                })
156            })
157            .unwrap_or_else(|| "unknown".to_string())
158    }
159
160    fn resolve_slug(&self, path: &str) -> Option<String> {
161        self.content_sources
162            .values()
163            .filter(|source| source.enabled)
164            .filter_map(|source| source.sitemap.as_ref())
165            .filter(|sitemap| sitemap.enabled)
166            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
167    }
168}
169
170fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
171    let prefix = pattern.split('{').next()?;
172    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
173    let raw = raw.split('?').next().unwrap_or(raw);
174    let raw = raw.split('#').next().unwrap_or(raw);
175    (!raw.is_empty()).then(|| raw.to_string())
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct ContentSourceConfigRaw {
180    pub path: String,
181    pub source_id: SourceId,
182    pub category_id: CategoryId,
183    pub enabled: bool,
184    #[serde(default)]
185    pub description: String,
186    #[serde(default)]
187    pub allowed_content_types: Vec<String>,
188    #[serde(default)]
189    pub indexing: Option<IndexingConfig>,
190    #[serde(default)]
191    pub sitemap: Option<SitemapConfig>,
192    #[serde(default)]
193    pub branding: Option<SourceBranding>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize, Default)]
197pub struct SourceBranding {
198    #[serde(default)]
199    pub name: Option<String>,
200    #[serde(default)]
201    pub description: Option<String>,
202    #[serde(default)]
203    pub image: Option<String>,
204    #[serde(default)]
205    pub keywords: Option<String>,
206}
207
208#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
209pub struct IndexingConfig {
210    #[serde(default)]
211    pub clear_before: bool,
212    #[serde(default)]
213    pub recursive: bool,
214    #[serde(default)]
215    pub override_existing: bool,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SitemapConfig {
220    pub enabled: bool,
221    pub url_pattern: String,
222    pub priority: f32,
223    pub changefreq: String,
224    #[serde(default)]
225    pub fetch_from: String,
226    #[serde(default)]
227    pub parent_route: Option<ParentRoute>,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct ParentRoute {
232    pub enabled: bool,
233    pub url: String,
234    pub priority: f32,
235    pub changefreq: String,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize, Default)]
239pub struct Metadata {
240    #[serde(default)]
241    pub default_author: String,
242    #[serde(default)]
243    pub language: String,
244    #[serde(default)]
245    pub structured_data: StructuredData,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize, Default)]
249pub struct StructuredData {
250    #[serde(default)]
251    pub organization: OrganizationData,
252    #[serde(default)]
253    pub article: ArticleDefaults,
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize, Default)]
257pub struct OrganizationData {
258    #[serde(default)]
259    pub name: String,
260    #[serde(default)]
261    pub url: String,
262    #[serde(default)]
263    pub logo: String,
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize, Default)]
267pub struct ArticleDefaults {
268    #[serde(default, rename = "type")]
269    pub article_type: String,
270    #[serde(default)]
271    pub article_section: String,
272    #[serde(default)]
273    pub language: String,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize, Default)]
277pub struct Category {
278    #[serde(default)]
279    pub name: String,
280    #[serde(default)]
281    pub description: String,
282}