systemprompt_content/config/
ready.rs1use chrono::{DateTime, Utc};
2use sha2::{Digest, Sha256};
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use systemprompt_identifiers::{CategoryId, SourceId};
6use systemprompt_models::ContentRouting;
7use walkdir::WalkDir;
8
9use crate::models::ContentMetadata;
10use crate::services::validate_content_metadata;
11use crate::ContentError;
12
13use super::validated::{ContentConfigValidated, ContentSourceConfigValidated};
14
15#[derive(Debug, Clone)]
16pub struct ContentReady {
17 config: ContentConfigValidated,
18 content_by_slug: HashMap<String, ParsedContent>,
19 content_by_source: HashMap<SourceId, Vec<ParsedContent>>,
20 stats: LoadStats,
21}
22
23#[derive(Debug, Clone)]
24pub struct ParsedContent {
25 pub slug: String,
26 pub title: String,
27 pub description: String,
28 pub body: String,
29 pub author: String,
30 pub published_at: DateTime<Utc>,
31 pub keywords: String,
32 pub kind: String,
33 pub image: Option<String>,
34 pub category_id: CategoryId,
35 pub source_id: SourceId,
36 pub version_hash: String,
37 pub file_path: PathBuf,
38}
39
40#[derive(Debug, Clone, Default)]
41pub struct LoadStats {
42 pub files_found: usize,
43 pub files_loaded: usize,
44 pub files_with_errors: usize,
45 pub load_time_ms: u64,
46 pub source_stats: HashMap<String, SourceLoadStats>,
47}
48
49#[derive(Debug, Clone, Copy, Default)]
50pub struct SourceLoadStats {
51 pub files_found: usize,
52 pub files_loaded: usize,
53 pub errors: usize,
54}
55
56impl ContentReady {
57 pub fn from_validated(config: ContentConfigValidated) -> Self {
58 let start_time = std::time::Instant::now();
59 let mut content_by_slug = HashMap::new();
60 let mut content_by_source: HashMap<SourceId, Vec<ParsedContent>> = HashMap::new();
61 let mut stats = LoadStats::default();
62
63 for (source_name, source_config) in config.content_sources() {
64 if !source_config.enabled {
65 continue;
66 }
67
68 let mut source_stats = SourceLoadStats::default();
69
70 let files = scan_markdown_files(&source_config.path, source_config.indexing.recursive);
71 source_stats.files_found = files.len();
72 stats.files_found += files.len();
73
74 for file_path in files {
75 match parse_content_file(&file_path, source_config) {
76 Ok(content) => {
77 let slug = content.slug.clone();
78 let source_id = content.source_id.clone();
79
80 content_by_source
81 .entry(source_id)
82 .or_default()
83 .push(content.clone());
84
85 content_by_slug.insert(slug, content);
86
87 source_stats.files_loaded += 1;
88 stats.files_loaded += 1;
89 },
90 Err(e) => {
91 tracing::warn!(
92 file = %file_path.display(),
93 error = %e,
94 "Failed to parse content file"
95 );
96 source_stats.errors += 1;
97 stats.files_with_errors += 1;
98 },
99 }
100 }
101
102 stats.source_stats.insert(source_name.clone(), source_stats);
103 }
104
105 stats.load_time_ms = start_time.elapsed().as_millis() as u64;
106
107 Self {
108 config,
109 content_by_slug,
110 content_by_source,
111 stats,
112 }
113 }
114
115 pub const fn config(&self) -> &ContentConfigValidated {
116 &self.config
117 }
118
119 pub const fn stats(&self) -> &LoadStats {
120 &self.stats
121 }
122
123 pub fn get_by_slug(&self, slug: &str) -> Option<&ParsedContent> {
124 self.content_by_slug.get(slug)
125 }
126
127 pub fn get_by_source(&self, source_id: &SourceId) -> Option<&Vec<ParsedContent>> {
128 self.content_by_source.get(source_id)
129 }
130
131 pub fn all_content(&self) -> impl Iterator<Item = &ParsedContent> {
132 self.content_by_slug.values()
133 }
134
135 pub fn content_count(&self) -> usize {
136 self.content_by_slug.len()
137 }
138}
139
140impl ContentRouting for ContentReady {
141 fn is_html_page(&self, path: &str) -> bool {
142 self.config.is_html_page(path)
143 }
144
145 fn determine_source(&self, path: &str) -> String {
146 self.config.determine_source(path)
147 }
148}
149
150fn scan_markdown_files(dir: &Path, recursive: bool) -> Vec<PathBuf> {
151 let walker = if recursive {
152 WalkDir::new(dir).min_depth(1)
153 } else {
154 WalkDir::new(dir).min_depth(1).max_depth(1)
155 };
156
157 walker
158 .into_iter()
159 .filter_map(Result::ok)
160 .filter(|e| e.file_type().is_file())
161 .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
162 .map(|e| e.path().to_path_buf())
163 .collect()
164}
165
166fn parse_content_file(
167 file_path: &Path,
168 source_config: &ContentSourceConfigValidated,
169) -> Result<ParsedContent, ContentError> {
170 let markdown_text = std::fs::read_to_string(file_path).map_err(ContentError::Io)?;
171
172 let (metadata, body) = parse_frontmatter(&markdown_text)?;
173
174 validate_content_metadata(&metadata)?;
175
176 let published_at = parse_date(&metadata.published_at)?;
177
178 let category_id = metadata.category.as_ref().map_or_else(
179 || source_config.category_id.clone(),
180 |c| CategoryId::new(c.clone()),
181 );
182
183 let version_hash = compute_version_hash(&metadata.title, &body, &metadata.description);
184
185 Ok(ParsedContent {
186 slug: metadata.slug,
187 title: metadata.title,
188 description: metadata.description,
189 body,
190 author: metadata.author,
191 published_at,
192 keywords: metadata.keywords,
193 kind: metadata.kind,
194 image: metadata.image,
195 category_id,
196 source_id: source_config.source_id.clone(),
197 version_hash,
198 file_path: file_path.to_path_buf(),
199 })
200}
201
202fn parse_frontmatter(markdown: &str) -> Result<(ContentMetadata, String), ContentError> {
203 let parts: Vec<&str> = markdown.splitn(3, "---").collect();
204
205 if parts.len() < 3 {
206 return Err(ContentError::Parse(
207 "Invalid frontmatter format - missing '---' delimiters".to_string(),
208 ));
209 }
210
211 let metadata: ContentMetadata = serde_yaml::from_str(parts[1]).map_err(ContentError::Yaml)?;
212
213 Ok((metadata, parts[2].trim().to_string()))
214}
215
216fn parse_date(date_str: &str) -> Result<DateTime<Utc>, ContentError> {
217 chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d")
218 .map_err(|e| ContentError::Parse(format!("Invalid date '{}': {}", date_str, e)))?
219 .and_hms_opt(0, 0, 0)
220 .ok_or_else(|| ContentError::Parse("Failed to create datetime".to_string()))?
221 .and_local_timezone(Utc)
222 .single()
223 .ok_or_else(|| ContentError::Parse("Ambiguous timezone conversion".to_string()))
224}
225
226fn compute_version_hash(title: &str, body: &str, description: &str) -> String {
227 let mut hasher = Sha256::new();
228 hasher.update(title.as_bytes());
229 hasher.update(body.as_bytes());
230 hasher.update(description.as_bytes());
231 format!("{:x}", hasher.finalize())
232}