1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6use std::sync::Arc;
7use systemprompt_identifiers::{CategoryId, SourceId};
8use thiserror::Error;
9
10pub trait ContentRouting: Send + Sync {
11 fn is_html_page(&self, path: &str) -> bool;
12 fn determine_source(&self, path: &str) -> String;
13 fn resolve_slug(&self, _path: &str) -> Option<String> {
14 None
15 }
16}
17
18impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
19 fn is_html_page(&self, path: &str) -> bool {
20 (**self).is_html_page(path)
21 }
22
23 fn determine_source(&self, path: &str) -> String {
24 (**self).determine_source(path)
25 }
26
27 fn resolve_slug(&self, path: &str) -> Option<String> {
28 (**self).resolve_slug(path)
29 }
30}
31
32#[derive(Debug, Clone, Error)]
33pub enum ContentConfigError {
34 #[error("IO error reading {path}: {message}")]
35 Io { path: PathBuf, message: String },
36
37 #[error("YAML parse error in {path}: {message}")]
38 Parse { path: PathBuf, message: String },
39
40 #[error("Validation error in {field}: {message}")]
41 Validation {
42 field: String,
43 message: String,
44 suggestion: Option<String>,
45 },
46}
47
48#[derive(Debug, Default)]
49pub struct ContentConfigErrors {
50 errors: Vec<ContentConfigError>,
51}
52
53impl ContentConfigErrors {
54 pub fn new() -> Self {
55 Self::default()
56 }
57
58 pub fn push(&mut self, error: ContentConfigError) {
59 self.errors.push(error);
60 }
61
62 pub fn is_empty(&self) -> bool {
63 self.errors.is_empty()
64 }
65
66 pub fn errors(&self) -> &[ContentConfigError] {
67 &self.errors
68 }
69
70 pub fn into_result<T>(self, value: T) -> Result<T, Self> {
71 if self.is_empty() {
72 Ok(value)
73 } else {
74 Err(self)
75 }
76 }
77}
78
79impl std::fmt::Display for ContentConfigErrors {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 for (i, error) in self.errors.iter().enumerate() {
82 if i > 0 {
83 writeln!(f)?;
84 }
85 write!(f, " - {error}")?;
86 }
87 Ok(())
88 }
89}
90
91impl std::error::Error for ContentConfigErrors {}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct ContentConfigRaw {
95 #[serde(default)]
96 pub content_sources: HashMap<String, ContentSourceConfigRaw>,
97 #[serde(default)]
98 pub metadata: Metadata,
99 #[serde(default)]
100 pub categories: HashMap<String, Category>,
101}
102
103impl ContentConfigRaw {
104 pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
105 let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
106 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
107
108 if pattern_parts.len() != path_parts.len() {
109 return false;
110 }
111
112 pattern_parts
113 .iter()
114 .zip(path_parts.iter())
115 .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
116 }
117}
118
119impl ContentRouting for ContentConfigRaw {
120 fn is_html_page(&self, path: &str) -> bool {
121 if path == "/" {
122 return true;
123 }
124
125 let matches_sitemap = self
126 .content_sources
127 .values()
128 .filter(|source| source.enabled)
129 .filter_map(|source| source.sitemap.as_ref())
130 .filter(|sitemap| sitemap.enabled)
131 .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path));
132
133 if matches_sitemap {
134 return true;
135 }
136
137 !path.contains('.')
138 && !path.starts_with("/api/")
139 && !path.starts_with("/track/")
140 && !path.starts_with("/.well-known/")
141 }
142
143 fn determine_source(&self, path: &str) -> String {
144 if path == "/" {
145 return "web".to_string();
146 }
147
148 self.content_sources
149 .iter()
150 .filter(|(_, source)| source.enabled)
151 .find_map(|(name, source)| {
152 source.sitemap.as_ref().and_then(|sitemap| {
153 (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
154 .then(|| name.clone())
155 })
156 })
157 .unwrap_or_else(|| "unknown".to_string())
158 }
159
160 fn resolve_slug(&self, path: &str) -> Option<String> {
161 self.content_sources
162 .values()
163 .filter(|source| source.enabled)
164 .filter_map(|source| source.sitemap.as_ref())
165 .filter(|sitemap| sitemap.enabled)
166 .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
167 }
168}
169
170fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
171 let prefix = pattern.split('{').next()?;
172 let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
173 let raw = raw.split('?').next().unwrap_or(raw);
174 let raw = raw.split('#').next().unwrap_or(raw);
175 (!raw.is_empty()).then(|| raw.to_string())
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct ContentSourceConfigRaw {
180 pub path: String,
181 pub source_id: SourceId,
182 pub category_id: CategoryId,
183 pub enabled: bool,
184 #[serde(default)]
185 pub description: String,
186 #[serde(default)]
187 pub allowed_content_types: Vec<String>,
188 #[serde(default)]
189 pub indexing: Option<IndexingConfig>,
190 #[serde(default)]
191 pub sitemap: Option<SitemapConfig>,
192 #[serde(default)]
193 pub branding: Option<SourceBranding>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize, Default)]
197pub struct SourceBranding {
198 #[serde(default)]
199 pub name: Option<String>,
200 #[serde(default)]
201 pub description: Option<String>,
202 #[serde(default)]
203 pub image: Option<String>,
204 #[serde(default)]
205 pub keywords: Option<String>,
206}
207
208#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
209pub struct IndexingConfig {
210 #[serde(default)]
211 pub clear_before: bool,
212 #[serde(default)]
213 pub recursive: bool,
214 #[serde(default)]
215 pub override_existing: bool,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SitemapConfig {
220 pub enabled: bool,
221 pub url_pattern: String,
222 pub priority: f32,
223 pub changefreq: String,
224 #[serde(default)]
225 pub fetch_from: String,
226 #[serde(default)]
227 pub parent_route: Option<ParentRoute>,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct ParentRoute {
232 pub enabled: bool,
233 pub url: String,
234 pub priority: f32,
235 pub changefreq: String,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize, Default)]
239pub struct Metadata {
240 #[serde(default)]
241 pub default_author: String,
242 #[serde(default)]
243 pub language: String,
244 #[serde(default)]
245 pub structured_data: StructuredData,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize, Default)]
249pub struct StructuredData {
250 #[serde(default)]
251 pub organization: OrganizationData,
252 #[serde(default)]
253 pub article: ArticleDefaults,
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize, Default)]
257pub struct OrganizationData {
258 #[serde(default)]
259 pub name: String,
260 #[serde(default)]
261 pub url: String,
262 #[serde(default)]
263 pub logo: String,
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize, Default)]
267pub struct ArticleDefaults {
268 #[serde(default, rename = "type")]
269 pub article_type: String,
270 #[serde(default)]
271 pub article_section: String,
272 #[serde(default)]
273 pub language: String,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize, Default)]
277pub struct Category {
278 #[serde(default)]
279 pub name: String,
280 #[serde(default)]
281 pub description: String,
282}