Skip to main content

systemprompt_sync/diff/
content.rs

1//! Compute the diff between content stored on disk (markdown + frontmatter)
2//! and in the database for one content source.
3
4use super::compute_content_hash;
5use crate::error::{SyncError, SyncResult};
6use crate::models::{ContentDiffItem, ContentDiffResult, DiffStatus, DiskContent};
7use std::collections::HashMap;
8use std::path::Path;
9use systemprompt_content::models::Content;
10use systemprompt_content::repository::ContentRepository;
11use systemprompt_database::DbPool;
12use systemprompt_identifiers::{LocaleCode, SourceId};
13use tracing::warn;
14use walkdir::WalkDir;
15
16#[derive(Debug)]
17pub struct ContentDiffCalculator {
18    content_repo: ContentRepository,
19}
20
21impl ContentDiffCalculator {
22    pub fn new(db: &DbPool) -> SyncResult<Self> {
23        Ok(Self {
24            content_repo: ContentRepository::new(db).map_err(SyncError::internal)?,
25        })
26    }
27
28    pub async fn calculate_diff(
29        &self,
30        source_id: &SourceId,
31        disk_path: &Path,
32        allowed_types: &[String],
33    ) -> SyncResult<ContentDiffResult> {
34        let db_content = self
35            .content_repo
36            .list_by_source(source_id, &LocaleCode::new("en"))
37            .await
38            .map_err(SyncError::internal)?;
39        let db_map: HashMap<String, Content> = db_content
40            .into_iter()
41            .map(|c| (c.slug.clone(), c))
42            .collect();
43
44        let disk_items = Self::scan_disk_content(disk_path, allowed_types);
45
46        let mut result = ContentDiffResult {
47            source_id: source_id.clone(),
48            ..Default::default()
49        };
50
51        for (slug, disk_item) in &disk_items {
52            let disk_hash = compute_content_hash(&disk_item.body, &disk_item.title);
53
54            match db_map.get(slug) {
55                None => {
56                    result.added.push(ContentDiffItem {
57                        slug: slug.clone(),
58                        source_id: source_id.clone(),
59                        status: DiffStatus::Added,
60                        disk_hash: Some(disk_hash),
61                        db_hash: None,
62                        disk_updated_at: None,
63                        db_updated_at: None,
64                        title: Some(disk_item.title.clone()),
65                    });
66                },
67                Some(db_item) => {
68                    if db_item.version_hash == disk_hash {
69                        result.unchanged += 1;
70                    } else {
71                        result.modified.push(ContentDiffItem {
72                            slug: slug.clone(),
73                            source_id: source_id.clone(),
74                            status: DiffStatus::Modified,
75                            disk_hash: Some(disk_hash),
76                            db_hash: Some(db_item.version_hash.clone()),
77                            disk_updated_at: None,
78                            db_updated_at: Some(db_item.updated_at),
79                            title: Some(disk_item.title.clone()),
80                        });
81                    }
82                },
83            }
84        }
85
86        for (slug, db_item) in &db_map {
87            if !disk_items.contains_key(slug) {
88                result.removed.push(ContentDiffItem {
89                    slug: slug.clone(),
90                    source_id: source_id.clone(),
91                    status: DiffStatus::Removed,
92                    disk_hash: None,
93                    db_hash: Some(db_item.version_hash.clone()),
94                    disk_updated_at: None,
95                    db_updated_at: Some(db_item.updated_at),
96                    title: Some(db_item.title.clone()),
97                });
98            }
99        }
100
101        Ok(result)
102    }
103
104    fn scan_disk_content(path: &Path, allowed_types: &[String]) -> HashMap<String, DiskContent> {
105        let mut items = HashMap::new();
106
107        if !path.exists() {
108            return items;
109        }
110
111        for entry in WalkDir::new(path)
112            .into_iter()
113            .filter_map(|e| match e {
114                Ok(entry) => Some(entry),
115                Err(err) => {
116                    tracing::warn!(error = %err, "Failed to read directory entry during sync");
117                    None
118                },
119            })
120            .filter(|e| e.file_type().is_file())
121            .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
122        {
123            let file_path = entry.path();
124            match parse_content_file(file_path, allowed_types) {
125                Ok(Some(content)) => {
126                    items.insert(content.slug.clone(), content);
127                },
128                Ok(None) => {},
129                Err(e) => {
130                    warn!("Failed to parse {}: {}", file_path.display(), e);
131                },
132            }
133        }
134
135        items
136    }
137}
138
139fn parse_content_file(path: &Path, allowed_types: &[String]) -> SyncResult<Option<DiskContent>> {
140    let content = std::fs::read_to_string(path)?;
141
142    let parts: Vec<&str> = content.splitn(3, "---").collect();
143    if parts.len() < 3 {
144        return Err(SyncError::invalid_input("Invalid frontmatter format"));
145    }
146
147    let frontmatter: serde_yaml::Value = serde_yaml::from_str(parts[1])?;
148    let body = parts[2].trim().to_owned();
149
150    let kind = frontmatter
151        .get("kind")
152        .and_then(|v| v.as_str())
153        .ok_or_else(|| SyncError::invalid_input("Missing kind in frontmatter"))?;
154
155    if !allowed_types.iter().any(|t| t == kind) {
156        return Ok(None);
157    }
158
159    let slug = frontmatter
160        .get("slug")
161        .and_then(|v| v.as_str())
162        .ok_or_else(|| SyncError::invalid_input("Missing slug in frontmatter"))?
163        .to_owned();
164
165    let title = frontmatter
166        .get("title")
167        .and_then(|v| v.as_str())
168        .ok_or_else(|| SyncError::invalid_input("Missing title in frontmatter"))?
169        .to_owned();
170
171    Ok(Some(DiskContent { slug, title, body }))
172}