1use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::Arc;
12use systemprompt_identifiers::{CategoryId, SourceId};
13use thiserror::Error;
14
15pub trait ContentRouting: Send + Sync {
16 fn is_html_page(&self, path: &str) -> bool;
17 fn determine_source(&self, path: &str) -> String;
18 fn resolve_slug(&self, _path: &str) -> Option<String> {
19 None
20 }
21}
22
23impl<T: ContentRouting + ?Sized> ContentRouting for Arc<T> {
24 fn is_html_page(&self, path: &str) -> bool {
25 (**self).is_html_page(path)
26 }
27
28 fn determine_source(&self, path: &str) -> String {
29 (**self).determine_source(path)
30 }
31
32 fn resolve_slug(&self, path: &str) -> Option<String> {
33 (**self).resolve_slug(path)
34 }
35}
36
37#[derive(Debug, Clone, Error)]
38pub enum ContentConfigError {
39 #[error("IO error reading {path}: {message}")]
40 Io { path: PathBuf, message: String },
41
42 #[error("YAML parse error in {path}: {message}")]
43 Parse { path: PathBuf, message: String },
44
45 #[error("Validation error in {field}: {message}")]
46 Validation {
47 field: String,
48 message: String,
49 suggestion: Option<String>,
50 },
51}
52
53#[derive(Debug, Default)]
54pub struct ContentConfigErrors {
55 errors: Vec<ContentConfigError>,
56}
57
58impl ContentConfigErrors {
59 pub fn new() -> Self {
60 Self::default()
61 }
62
63 pub fn push(&mut self, error: ContentConfigError) {
64 self.errors.push(error);
65 }
66
67 pub fn is_empty(&self) -> bool {
68 self.errors.is_empty()
69 }
70
71 pub fn errors(&self) -> &[ContentConfigError] {
72 &self.errors
73 }
74
75 pub fn into_result<T>(self, value: T) -> Result<T, Self> {
76 if self.is_empty() {
77 Ok(value)
78 } else {
79 Err(self)
80 }
81 }
82}
83
84impl std::fmt::Display for ContentConfigErrors {
85 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 for (i, error) in self.errors.iter().enumerate() {
87 if i > 0 {
88 writeln!(f)?;
89 }
90 write!(f, " - {error}")?;
91 }
92 Ok(())
93 }
94}
95
96impl std::error::Error for ContentConfigErrors {}
97
98#[derive(Debug, Clone, Default, Serialize, Deserialize)]
99pub struct ContentConfigRaw {
100 #[serde(default)]
101 pub content_sources: HashMap<String, ContentSourceConfigRaw>,
102 #[serde(default)]
103 pub metadata: Metadata,
104 #[serde(default)]
105 pub categories: HashMap<String, Category>,
106}
107
108impl ContentConfigRaw {
109 pub fn matches_url_pattern(pattern: &str, path: &str) -> bool {
110 let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
111 let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
112
113 if pattern_parts.len() != path_parts.len() {
114 return false;
115 }
116
117 pattern_parts
118 .iter()
119 .zip(path_parts.iter())
120 .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
121 }
122}
123
124impl ContentRouting for ContentConfigRaw {
125 fn is_html_page(&self, path: &str) -> bool {
126 if path == "/" {
127 return true;
128 }
129
130 let matches_sitemap = self
131 .content_sources
132 .values()
133 .filter(|source| source.enabled)
134 .filter_map(|source| source.sitemap.as_ref())
135 .filter(|sitemap| sitemap.enabled)
136 .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path));
137
138 if matches_sitemap {
139 return true;
140 }
141
142 !path.contains('.')
143 && !path.starts_with("/api/")
144 && !path.starts_with("/track/")
145 && !path.starts_with("/.well-known/")
146 }
147
148 fn determine_source(&self, path: &str) -> String {
149 if path == "/" {
150 return "web".to_string();
151 }
152
153 self.content_sources
154 .iter()
155 .filter(|(_, source)| source.enabled)
156 .find_map(|(name, source)| {
157 source.sitemap.as_ref().and_then(|sitemap| {
158 (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
159 .then(|| name.clone())
160 })
161 })
162 .unwrap_or_else(|| "unknown".to_string())
163 }
164
165 fn resolve_slug(&self, path: &str) -> Option<String> {
166 self.content_sources
167 .values()
168 .filter(|source| source.enabled)
169 .filter_map(|source| source.sitemap.as_ref())
170 .filter(|sitemap| sitemap.enabled)
171 .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
172 }
173}
174
175fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
176 let prefix = pattern.split('{').next()?;
177 let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
178 let raw = raw.split('?').next().unwrap_or(raw);
179 let raw = raw.split('#').next().unwrap_or(raw);
180 (!raw.is_empty()).then(|| raw.to_string())
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct ContentSourceConfigRaw {
185 pub path: String,
186 pub source_id: SourceId,
187 pub category_id: CategoryId,
188 pub enabled: bool,
189 #[serde(default)]
190 pub description: String,
191 #[serde(default)]
192 pub allowed_content_types: Vec<String>,
193 #[serde(default)]
194 pub indexing: Option<IndexingConfig>,
195 #[serde(default)]
196 pub sitemap: Option<SitemapConfig>,
197 #[serde(default)]
198 pub branding: Option<SourceBranding>,
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize, Default)]
202pub struct SourceBranding {
203 #[serde(default)]
204 pub name: Option<String>,
205 #[serde(default)]
206 pub description: Option<String>,
207 #[serde(default)]
208 pub image: Option<String>,
209 #[serde(default)]
210 pub keywords: Option<String>,
211}
212
213#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
214pub struct IndexingConfig {
215 #[serde(default)]
216 pub clear_before: bool,
217 #[serde(default)]
218 pub recursive: bool,
219 #[serde(default)]
220 pub override_existing: bool,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct SitemapConfig {
225 pub enabled: bool,
226 pub url_pattern: String,
227 pub priority: f32,
228 pub changefreq: String,
229 #[serde(default)]
230 pub fetch_from: String,
231 #[serde(default)]
232 pub parent_route: Option<ParentRoute>,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct ParentRoute {
237 pub enabled: bool,
238 pub url: String,
239 pub priority: f32,
240 pub changefreq: String,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize, Default)]
244pub struct Metadata {
245 #[serde(default)]
246 pub default_author: String,
247 #[serde(default)]
248 pub structured_data: StructuredData,
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize, Default)]
252pub struct StructuredData {
253 #[serde(default)]
254 pub organization: OrganizationData,
255 #[serde(default)]
256 pub article: ArticleDefaults,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize, Default)]
260pub struct OrganizationData {
261 #[serde(default)]
262 pub name: String,
263 #[serde(default)]
264 pub url: String,
265 #[serde(default)]
266 pub logo: String,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize, Default)]
270pub struct ArticleDefaults {
271 #[serde(default, rename = "type")]
272 pub article_type: String,
273 #[serde(default)]
274 pub article_section: String,
275}
276
277#[derive(Debug, Clone, Serialize, Deserialize, Default)]
278pub struct Category {
279 #[serde(default)]
280 pub name: String,
281 #[serde(default)]
282 pub description: String,
283}