Skip to main content

systemprompt_models/
content_config.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::sync::Arc;
5use systemprompt_identifiers::{CategoryId, SourceId};
6use thiserror::Error;
7
8pub trait ContentRouting: Send + Sync {
9    fn is_html_page(&self, path: &str) -> bool;
10    fn determine_source(&self, path: &str) -> String;
11}
12
13impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
14    fn is_html_page(&self, path: &str) -> bool {
15        (**self).is_html_page(path)
16    }
17
18    fn determine_source(&self, path: &str) -> String {
19        (**self).determine_source(path)
20    }
21}
22
23#[derive(Debug, Clone, Error)]
24pub enum ContentConfigError {
25    #[error("IO error reading {path}: {message}")]
26    Io { path: PathBuf, message: String },
27
28    #[error("YAML parse error in {path}: {message}")]
29    Parse { path: PathBuf, message: String },
30
31    #[error("Validation error in {field}: {message}")]
32    Validation {
33        field: String,
34        message: String,
35        suggestion: Option<String>,
36    },
37}
38
39#[derive(Debug, Default)]
40pub struct ContentConfigErrors {
41    errors: Vec<ContentConfigError>,
42}
43
44impl ContentConfigErrors {
45    pub fn new() -> Self {
46        Self::default()
47    }
48
49    pub fn push(&mut self, error: ContentConfigError) {
50        self.errors.push(error);
51    }
52
53    pub fn is_empty(&self) -> bool {
54        self.errors.is_empty()
55    }
56
57    pub fn errors(&self) -> &[ContentConfigError] {
58        &self.errors
59    }
60
61    pub fn into_result<T>(self, value: T) -> Result<T, Self> {
62        if self.is_empty() {
63            Ok(value)
64        } else {
65            Err(self)
66        }
67    }
68}
69
70impl std::fmt::Display for ContentConfigErrors {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        for (i, error) in self.errors.iter().enumerate() {
73            if i > 0 {
74                writeln!(f)?;
75            }
76            write!(f, "  - {error}")?;
77        }
78        Ok(())
79    }
80}
81
82impl std::error::Error for ContentConfigErrors {}
83
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct ContentConfigRaw {
86    #[serde(default)]
87    pub content_sources: HashMap<String, ContentSourceConfigRaw>,
88    #[serde(default)]
89    pub metadata: Metadata,
90    #[serde(default)]
91    pub categories: HashMap<String, Category>,
92}
93
94impl ContentConfigRaw {
95    pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
96        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
97        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
98
99        if pattern_parts.len() != path_parts.len() {
100            return false;
101        }
102
103        pattern_parts
104            .iter()
105            .zip(path_parts.iter())
106            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
107    }
108}
109
110impl ContentRouting for ContentConfigRaw {
111    fn is_html_page(&self, path: &str) -> bool {
112        if path == "/" {
113            return true;
114        }
115
116        self.content_sources
117            .values()
118            .filter(|source| source.enabled)
119            .filter_map(|source| source.sitemap.as_ref())
120            .filter(|sitemap| sitemap.enabled)
121            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
122    }
123
124    fn determine_source(&self, path: &str) -> String {
125        if path == "/" {
126            return "web".to_string();
127        }
128
129        self.content_sources
130            .iter()
131            .filter(|(_, source)| source.enabled)
132            .find_map(|(name, source)| {
133                source.sitemap.as_ref().and_then(|sitemap| {
134                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
135                        .then(|| name.clone())
136                })
137            })
138            .unwrap_or_else(|| "unknown".to_string())
139    }
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct ContentSourceConfigRaw {
144    pub path: String,
145    pub source_id: SourceId,
146    pub category_id: CategoryId,
147    pub enabled: bool,
148    #[serde(default)]
149    pub description: String,
150    #[serde(default)]
151    pub allowed_content_types: Vec<String>,
152    #[serde(default)]
153    pub indexing: Option<IndexingConfig>,
154    #[serde(default)]
155    pub sitemap: Option<SitemapConfig>,
156    #[serde(default)]
157    pub branding: Option<SourceBranding>,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize, Default)]
161pub struct SourceBranding {
162    #[serde(default)]
163    pub name: Option<String>,
164    #[serde(default)]
165    pub description: Option<String>,
166    #[serde(default)]
167    pub image: Option<String>,
168    #[serde(default)]
169    pub keywords: Option<String>,
170}
171
172#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
173pub struct IndexingConfig {
174    #[serde(default)]
175    pub clear_before: bool,
176    #[serde(default)]
177    pub recursive: bool,
178    #[serde(default)]
179    pub override_existing: bool,
180}
181
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct SitemapConfig {
184    pub enabled: bool,
185    pub url_pattern: String,
186    pub priority: f32,
187    pub changefreq: String,
188    #[serde(default)]
189    pub fetch_from: String,
190    #[serde(default)]
191    pub parent_route: Option<ParentRoute>,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct ParentRoute {
196    pub enabled: bool,
197    pub url: String,
198    pub priority: f32,
199    pub changefreq: String,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize, Default)]
203pub struct Metadata {
204    #[serde(default)]
205    pub default_author: String,
206    #[serde(default)]
207    pub language: String,
208    #[serde(default)]
209    pub structured_data: StructuredData,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize, Default)]
213pub struct StructuredData {
214    #[serde(default)]
215    pub organization: OrganizationData,
216    #[serde(default)]
217    pub article: ArticleDefaults,
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize, Default)]
221pub struct OrganizationData {
222    #[serde(default)]
223    pub name: String,
224    #[serde(default)]
225    pub url: String,
226    #[serde(default)]
227    pub logo: String,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize, Default)]
231pub struct ArticleDefaults {
232    #[serde(default, rename = "type")]
233    pub article_type: String,
234    #[serde(default)]
235    pub article_section: String,
236    #[serde(default)]
237    pub language: String,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize, Default)]
241pub struct Category {
242    #[serde(default)]
243    pub name: String,
244    #[serde(default)]
245    pub description: String,
246}