Skip to main content

systemprompt_content/config/
ready.rs

1use chrono::{DateTime, Utc};
2use sha2::{Digest, Sha256};
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use systemprompt_identifiers::{CategoryId, SourceId};
6use systemprompt_models::ContentRouting;
7use walkdir::WalkDir;
8
9use crate::models::ContentMetadata;
10use crate::services::validate_content_metadata;
11use crate::ContentError;
12
13use super::validated::{ContentConfigValidated, ContentSourceConfigValidated};
14
15#[derive(Debug, Clone)]
16pub struct ContentReady {
17    config: ContentConfigValidated,
18    content_by_slug: HashMap<String, ParsedContent>,
19    content_by_source: HashMap<SourceId, Vec<ParsedContent>>,
20    stats: LoadStats,
21}
22
23#[derive(Debug, Clone)]
24pub struct ParsedContent {
25    pub slug: String,
26    pub title: String,
27    pub description: String,
28    pub body: String,
29    pub author: String,
30    pub published_at: DateTime<Utc>,
31    pub keywords: String,
32    pub kind: String,
33    pub image: Option<String>,
34    pub category_id: CategoryId,
35    pub source_id: SourceId,
36    pub version_hash: String,
37    pub file_path: PathBuf,
38}
39
40#[derive(Debug, Clone, Default)]
41pub struct LoadStats {
42    pub files_found: usize,
43    pub files_loaded: usize,
44    pub files_with_errors: usize,
45    pub load_time_ms: u64,
46    pub source_stats: HashMap<String, SourceLoadStats>,
47}
48
49#[derive(Debug, Clone, Copy, Default)]
50pub struct SourceLoadStats {
51    pub files_found: usize,
52    pub files_loaded: usize,
53    pub errors: usize,
54}
55
56impl ContentReady {
57    pub fn from_validated(config: ContentConfigValidated) -> Self {
58        let start_time = std::time::Instant::now();
59        let mut content_by_slug = HashMap::new();
60        let mut content_by_source: HashMap<SourceId, Vec<ParsedContent>> = HashMap::new();
61        let mut stats = LoadStats::default();
62
63        for (source_name, source_config) in config.content_sources() {
64            if !source_config.enabled {
65                continue;
66            }
67
68            let mut source_stats = SourceLoadStats::default();
69
70            let files = scan_markdown_files(&source_config.path, source_config.indexing.recursive);
71            source_stats.files_found = files.len();
72            stats.files_found += files.len();
73
74            for file_path in files {
75                match parse_content_file(&file_path, source_config) {
76                    Ok(content) => {
77                        let slug = content.slug.clone();
78                        let source_id = content.source_id.clone();
79
80                        content_by_source
81                            .entry(source_id)
82                            .or_default()
83                            .push(content.clone());
84
85                        content_by_slug.insert(slug, content);
86
87                        source_stats.files_loaded += 1;
88                        stats.files_loaded += 1;
89                    },
90                    Err(e) => {
91                        tracing::warn!(
92                            file = %file_path.display(),
93                            error = %e,
94                            "Failed to parse content file"
95                        );
96                        source_stats.errors += 1;
97                        stats.files_with_errors += 1;
98                    },
99                }
100            }
101
102            stats.source_stats.insert(source_name.clone(), source_stats);
103        }
104
105        stats.load_time_ms = start_time.elapsed().as_millis() as u64;
106
107        Self {
108            config,
109            content_by_slug,
110            content_by_source,
111            stats,
112        }
113    }
114
115    pub const fn config(&self) -> &ContentConfigValidated {
116        &self.config
117    }
118
119    pub const fn stats(&self) -> &LoadStats {
120        &self.stats
121    }
122
123    pub fn get_by_slug(&self, slug: &str) -> Option<&ParsedContent> {
124        self.content_by_slug.get(slug)
125    }
126
127    pub fn get_by_source(&self, source_id: &SourceId) -> Option<&Vec<ParsedContent>> {
128        self.content_by_source.get(source_id)
129    }
130
131    pub fn all_content(&self) -> impl Iterator<Item = &ParsedContent> {
132        self.content_by_slug.values()
133    }
134
135    pub fn content_count(&self) -> usize {
136        self.content_by_slug.len()
137    }
138}
139
140impl ContentRouting for ContentReady {
141    fn is_html_page(&self, path: &str) -> bool {
142        self.config.is_html_page(path)
143    }
144
145    fn determine_source(&self, path: &str) -> String {
146        self.config.determine_source(path)
147    }
148}
149
150fn scan_markdown_files(dir: &Path, recursive: bool) -> Vec<PathBuf> {
151    let walker = if recursive {
152        WalkDir::new(dir).min_depth(1)
153    } else {
154        WalkDir::new(dir).min_depth(1).max_depth(1)
155    };
156
157    walker
158        .into_iter()
159        .filter_map(Result::ok)
160        .filter(|e| e.file_type().is_file())
161        .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
162        .map(|e| e.path().to_path_buf())
163        .collect()
164}
165
166fn parse_content_file(
167    file_path: &Path,
168    source_config: &ContentSourceConfigValidated,
169) -> Result<ParsedContent, ContentError> {
170    let markdown_text = std::fs::read_to_string(file_path).map_err(ContentError::Io)?;
171
172    let (metadata, body) = parse_frontmatter(&markdown_text)?;
173
174    validate_content_metadata(&metadata)?;
175
176    let published_at = parse_date(&metadata.published_at)?;
177
178    let category_id = metadata.category.as_ref().map_or_else(
179        || source_config.category_id.clone(),
180        |c| CategoryId::new(c.clone()),
181    );
182
183    let version_hash = compute_version_hash(&metadata.title, &body, &metadata.description);
184
185    Ok(ParsedContent {
186        slug: metadata.slug,
187        title: metadata.title,
188        description: metadata.description,
189        body,
190        author: metadata.author,
191        published_at,
192        keywords: metadata.keywords,
193        kind: metadata.kind,
194        image: metadata.image,
195        category_id,
196        source_id: source_config.source_id.clone(),
197        version_hash,
198        file_path: file_path.to_path_buf(),
199    })
200}
201
202fn parse_frontmatter(markdown: &str) -> Result<(ContentMetadata, String), ContentError> {
203    let parts: Vec<&str> = markdown.splitn(3, "---").collect();
204
205    if parts.len() < 3 {
206        return Err(ContentError::Parse(
207            "Invalid frontmatter format - missing '---' delimiters".to_string(),
208        ));
209    }
210
211    let metadata: ContentMetadata = serde_yaml::from_str(parts[1]).map_err(ContentError::Yaml)?;
212
213    Ok((metadata, parts[2].trim().to_string()))
214}
215
216fn parse_date(date_str: &str) -> Result<DateTime<Utc>, ContentError> {
217    chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d")
218        .map_err(|e| ContentError::Parse(format!("Invalid date '{}': {}", date_str, e)))?
219        .and_hms_opt(0, 0, 0)
220        .ok_or_else(|| ContentError::Parse("Failed to create datetime".to_string()))?
221        .and_local_timezone(Utc)
222        .single()
223        .ok_or_else(|| ContentError::Parse("Ambiguous timezone conversion".to_string()))
224}
225
226fn compute_version_hash(title: &str, body: &str, description: &str) -> String {
227    let mut hasher = Sha256::new();
228    hasher.update(title.as_bytes());
229    hasher.update(body.as_bytes());
230    hasher.update(description.as_bytes());
231    format!("{:x}", hasher.finalize())
232}