Skip to main content

systemprompt_models/
content_config.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::Arc;
5use systemprompt_identifiers::{CategoryId, SourceId};
6use thiserror::Error;
7
8pub trait ContentRouting: Send + Sync {
9    fn is_html_page(&self, path: &str) -> bool;
10    fn determine_source(&self, path: &str) -> String;
11    fn resolve_slug(&self, _path: &str) -> Option<String> {
12        None
13    }
14}
15
16impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
17    fn is_html_page(&self, path: &str) -> bool {
18        (**self).is_html_page(path)
19    }
20
21    fn determine_source(&self, path: &str) -> String {
22        (**self).determine_source(path)
23    }
24
25    fn resolve_slug(&self, path: &str) -> Option<String> {
26        (**self).resolve_slug(path)
27    }
28}
29
30#[derive(Debug, Clone, Error)]
31pub enum ContentConfigError {
32    #[error("IO error reading {path}: {message}")]
33    Io { path: PathBuf, message: String },
34
35    #[error("YAML parse error in {path}: {message}")]
36    Parse { path: PathBuf, message: String },
37
38    #[error("Validation error in {field}: {message}")]
39    Validation {
40        field: String,
41        message: String,
42        suggestion: Option<String>,
43    },
44}
45
46#[derive(Debug, Default)]
47pub struct ContentConfigErrors {
48    errors: Vec<ContentConfigError>,
49}
50
51impl ContentConfigErrors {
52    pub fn new() -> Self {
53        Self::default()
54    }
55
56    pub fn push(&mut self, error: ContentConfigError) {
57        self.errors.push(error);
58    }
59
60    pub fn is_empty(&self) -> bool {
61        self.errors.is_empty()
62    }
63
64    pub fn errors(&self) -> &[ContentConfigError] {
65        &self.errors
66    }
67
68    pub fn into_result<T>(self, value: T) -> Result<T, Self> {
69        if self.is_empty() {
70            Ok(value)
71        } else {
72            Err(self)
73        }
74    }
75}
76
77impl std::fmt::Display for ContentConfigErrors {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        for (i, error) in self.errors.iter().enumerate() {
80            if i > 0 {
81                writeln!(f)?;
82            }
83            write!(f, "  - {error}")?;
84        }
85        Ok(())
86    }
87}
88
89impl std::error::Error for ContentConfigErrors {}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ContentConfigRaw {
93    #[serde(default)]
94    pub content_sources: HashMap<String, ContentSourceConfigRaw>,
95    #[serde(default)]
96    pub metadata: Metadata,
97    #[serde(default)]
98    pub categories: HashMap<String, Category>,
99}
100
101impl ContentConfigRaw {
102    pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
103        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
104        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
105
106        if pattern_parts.len() != path_parts.len() {
107            return false;
108        }
109
110        pattern_parts
111            .iter()
112            .zip(path_parts.iter())
113            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
114    }
115}
116
117impl ContentRouting for ContentConfigRaw {
118    fn is_html_page(&self, path: &str) -> bool {
119        if path == "/" {
120            return true;
121        }
122
123        let matches_sitemap = self
124            .content_sources
125            .values()
126            .filter(|source| source.enabled)
127            .filter_map(|source| source.sitemap.as_ref())
128            .filter(|sitemap| sitemap.enabled)
129            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path));
130
131        if matches_sitemap {
132            return true;
133        }
134
135        !path.contains('.')
136            && !path.starts_with("/api/")
137            && !path.starts_with("/track/")
138            && !path.starts_with("/.well-known/")
139    }
140
141    fn determine_source(&self, path: &str) -> String {
142        if path == "/" {
143            return "web".to_string();
144        }
145
146        self.content_sources
147            .iter()
148            .filter(|(_, source)| source.enabled)
149            .find_map(|(name, source)| {
150                source.sitemap.as_ref().and_then(|sitemap| {
151                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
152                        .then(|| name.clone())
153                })
154            })
155            .unwrap_or_else(|| "unknown".to_string())
156    }
157
158    fn resolve_slug(&self, path: &str) -> Option<String> {
159        self.content_sources
160            .values()
161            .filter(|source| source.enabled)
162            .filter_map(|source| source.sitemap.as_ref())
163            .filter(|sitemap| sitemap.enabled)
164            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
165    }
166}
167
168fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
169    let prefix = pattern.split('{').next()?;
170    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
171    let raw = raw.split('?').next().unwrap_or(raw);
172    let raw = raw.split('#').next().unwrap_or(raw);
173    (!raw.is_empty()).then(|| raw.to_string())
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct ContentSourceConfigRaw {
178    pub path: String,
179    pub source_id: SourceId,
180    pub category_id: CategoryId,
181    pub enabled: bool,
182    #[serde(default)]
183    pub description: String,
184    #[serde(default)]
185    pub allowed_content_types: Vec<String>,
186    #[serde(default)]
187    pub indexing: Option<IndexingConfig>,
188    #[serde(default)]
189    pub sitemap: Option<SitemapConfig>,
190    #[serde(default)]
191    pub branding: Option<SourceBranding>,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize, Default)]
195pub struct SourceBranding {
196    #[serde(default)]
197    pub name: Option<String>,
198    #[serde(default)]
199    pub description: Option<String>,
200    #[serde(default)]
201    pub image: Option<String>,
202    #[serde(default)]
203    pub keywords: Option<String>,
204}
205
206#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
207pub struct IndexingConfig {
208    #[serde(default)]
209    pub clear_before: bool,
210    #[serde(default)]
211    pub recursive: bool,
212    #[serde(default)]
213    pub override_existing: bool,
214}
215
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct SitemapConfig {
218    pub enabled: bool,
219    pub url_pattern: String,
220    pub priority: f32,
221    pub changefreq: String,
222    #[serde(default)]
223    pub fetch_from: String,
224    #[serde(default)]
225    pub parent_route: Option<ParentRoute>,
226}
227
228#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct ParentRoute {
230    pub enabled: bool,
231    pub url: String,
232    pub priority: f32,
233    pub changefreq: String,
234}
235
236#[derive(Debug, Clone, Serialize, Deserialize, Default)]
237pub struct Metadata {
238    #[serde(default)]
239    pub default_author: String,
240    #[serde(default)]
241    pub language: String,
242    #[serde(default)]
243    pub structured_data: StructuredData,
244}
245
246#[derive(Debug, Clone, Serialize, Deserialize, Default)]
247pub struct StructuredData {
248    #[serde(default)]
249    pub organization: OrganizationData,
250    #[serde(default)]
251    pub article: ArticleDefaults,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize, Default)]
255pub struct OrganizationData {
256    #[serde(default)]
257    pub name: String,
258    #[serde(default)]
259    pub url: String,
260    #[serde(default)]
261    pub logo: String,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize, Default)]
265pub struct ArticleDefaults {
266    #[serde(default, rename = "type")]
267    pub article_type: String,
268    #[serde(default)]
269    pub article_section: String,
270    #[serde(default)]
271    pub language: String,
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize, Default)]
275pub struct Category {
276    #[serde(default)]
277    pub name: String,
278    #[serde(default)]
279    pub description: String,
280}