Skip to main content

systemprompt_sync/diff/
content.rs

1//! Compute the diff between content stored on disk (markdown + frontmatter)
2//! and in the database for one content source.
3
4use super::compute_content_hash;
5use crate::error::{SyncError, SyncResult};
6use crate::models::{ContentDiffItem, ContentDiffResult, DiffStatus, DiskContent};
7use std::collections::HashMap;
8use std::path::Path;
9use systemprompt_content::models::Content;
10use systemprompt_content::repository::ContentRepository;
11use systemprompt_database::DbPool;
12use systemprompt_identifiers::SourceId;
13use tracing::warn;
14use walkdir::WalkDir;
15
16#[derive(Debug)]
17pub struct ContentDiffCalculator {
18    content_repo: ContentRepository,
19}
20
21impl ContentDiffCalculator {
22    pub fn new(db: &DbPool) -> SyncResult<Self> {
23        Ok(Self {
24            content_repo: ContentRepository::new(db).map_err(SyncError::internal)?,
25        })
26    }
27
28    pub async fn calculate_diff(
29        &self,
30        source_id: &SourceId,
31        disk_path: &Path,
32        allowed_types: &[String],
33    ) -> SyncResult<ContentDiffResult> {
34        let source_id_typed = source_id.clone();
35        let db_content = self
36            .content_repo
37            .list_by_source(&source_id_typed)
38            .await
39            .map_err(SyncError::internal)?;
40        let db_map: HashMap<String, Content> = db_content
41            .into_iter()
42            .map(|c| (c.slug.clone(), c))
43            .collect();
44
45        let disk_items = Self::scan_disk_content(disk_path, allowed_types);
46
47        let mut result = ContentDiffResult {
48            source_id: source_id_typed.clone(),
49            ..Default::default()
50        };
51
52        for (slug, disk_item) in &disk_items {
53            let disk_hash = compute_content_hash(&disk_item.body, &disk_item.title);
54
55            match db_map.get(slug) {
56                None => {
57                    result.added.push(ContentDiffItem {
58                        slug: slug.clone(),
59                        source_id: source_id_typed.clone(),
60                        status: DiffStatus::Added,
61                        disk_hash: Some(disk_hash),
62                        db_hash: None,
63                        disk_updated_at: None,
64                        db_updated_at: None,
65                        title: Some(disk_item.title.clone()),
66                    });
67                },
68                Some(db_item) => {
69                    if db_item.version_hash == disk_hash {
70                        result.unchanged += 1;
71                    } else {
72                        result.modified.push(ContentDiffItem {
73                            slug: slug.clone(),
74                            source_id: source_id_typed.clone(),
75                            status: DiffStatus::Modified,
76                            disk_hash: Some(disk_hash),
77                            db_hash: Some(db_item.version_hash.clone()),
78                            disk_updated_at: None,
79                            db_updated_at: Some(db_item.updated_at),
80                            title: Some(disk_item.title.clone()),
81                        });
82                    }
83                },
84            }
85        }
86
87        for (slug, db_item) in &db_map {
88            if !disk_items.contains_key(slug) {
89                result.removed.push(ContentDiffItem {
90                    slug: slug.clone(),
91                    source_id: source_id_typed.clone(),
92                    status: DiffStatus::Removed,
93                    disk_hash: None,
94                    db_hash: Some(db_item.version_hash.clone()),
95                    disk_updated_at: None,
96                    db_updated_at: Some(db_item.updated_at),
97                    title: Some(db_item.title.clone()),
98                });
99            }
100        }
101
102        Ok(result)
103    }
104
105    fn scan_disk_content(path: &Path, allowed_types: &[String]) -> HashMap<String, DiskContent> {
106        let mut items = HashMap::new();
107
108        if !path.exists() {
109            return items;
110        }
111
112        for entry in WalkDir::new(path)
113            .into_iter()
114            .filter_map(|e| match e {
115                Ok(entry) => Some(entry),
116                Err(err) => {
117                    tracing::warn!(error = %err, "Failed to read directory entry during sync");
118                    None
119                },
120            })
121            .filter(|e| e.file_type().is_file())
122            .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
123        {
124            let file_path = entry.path();
125            match parse_content_file(file_path, allowed_types) {
126                Ok(Some(content)) => {
127                    items.insert(content.slug.clone(), content);
128                },
129                Ok(None) => {},
130                Err(e) => {
131                    warn!("Failed to parse {}: {}", file_path.display(), e);
132                },
133            }
134        }
135
136        items
137    }
138}
139
140fn parse_content_file(path: &Path, allowed_types: &[String]) -> SyncResult<Option<DiskContent>> {
141    let content = std::fs::read_to_string(path)?;
142
143    let parts: Vec<&str> = content.splitn(3, "---").collect();
144    if parts.len() < 3 {
145        return Err(SyncError::invalid_input("Invalid frontmatter format"));
146    }
147
148    let frontmatter: serde_yaml::Value = serde_yaml::from_str(parts[1])?;
149    let body = parts[2].trim().to_string();
150
151    let kind = frontmatter
152        .get("kind")
153        .and_then(|v| v.as_str())
154        .ok_or_else(|| SyncError::invalid_input("Missing kind in frontmatter"))?;
155
156    if !allowed_types.iter().any(|t| t == kind) {
157        return Ok(None);
158    }
159
160    let slug = frontmatter
161        .get("slug")
162        .and_then(|v| v.as_str())
163        .ok_or_else(|| SyncError::invalid_input("Missing slug in frontmatter"))?
164        .to_string();
165
166    let title = frontmatter
167        .get("title")
168        .and_then(|v| v.as_str())
169        .ok_or_else(|| SyncError::invalid_input("Missing title in frontmatter"))?
170        .to_string();
171
172    Ok(Some(DiskContent { slug, title, body }))
173}