Skip to main content

systemprompt_sync/diff/
content.rs

1use super::compute_content_hash;
2use crate::models::{ContentDiffItem, ContentDiffResult, DiffStatus, DiskContent};
3use anyhow::{anyhow, Result};
4use std::collections::HashMap;
5use std::path::Path;
6use systemprompt_content::models::Content;
7use systemprompt_content::repository::ContentRepository;
8use systemprompt_database::DbPool;
9use systemprompt_identifiers::SourceId;
10use tracing::warn;
11use walkdir::WalkDir;
12
13#[derive(Debug)]
14pub struct ContentDiffCalculator {
15    content_repo: ContentRepository,
16}
17
18impl ContentDiffCalculator {
19    pub fn new(db: &DbPool) -> Result<Self> {
20        Ok(Self {
21            content_repo: ContentRepository::new(db)?,
22        })
23    }
24
25    pub async fn calculate_diff(
26        &self,
27        source_id: &str,
28        disk_path: &Path,
29        allowed_types: &[String],
30    ) -> Result<ContentDiffResult> {
31        let source_id_typed = SourceId::new(source_id);
32        let db_content = self.content_repo.list_by_source(&source_id_typed).await?;
33        let db_map: HashMap<String, Content> = db_content
34            .into_iter()
35            .map(|c| (c.slug.clone(), c))
36            .collect();
37
38        let disk_items = Self::scan_disk_content(disk_path, allowed_types);
39
40        let mut result = ContentDiffResult {
41            source_id: source_id.to_string(),
42            ..Default::default()
43        };
44
45        for (slug, disk_item) in &disk_items {
46            let disk_hash = compute_content_hash(&disk_item.body, &disk_item.title);
47
48            match db_map.get(slug) {
49                None => {
50                    result.added.push(ContentDiffItem {
51                        slug: slug.clone(),
52                        source_id: source_id.to_string(),
53                        status: DiffStatus::Added,
54                        disk_hash: Some(disk_hash),
55                        db_hash: None,
56                        disk_updated_at: None,
57                        db_updated_at: None,
58                        title: Some(disk_item.title.clone()),
59                    });
60                },
61                Some(db_item) => {
62                    if db_item.version_hash == disk_hash {
63                        result.unchanged += 1;
64                    } else {
65                        result.modified.push(ContentDiffItem {
66                            slug: slug.clone(),
67                            source_id: source_id.to_string(),
68                            status: DiffStatus::Modified,
69                            disk_hash: Some(disk_hash),
70                            db_hash: Some(db_item.version_hash.clone()),
71                            disk_updated_at: None,
72                            db_updated_at: Some(db_item.updated_at),
73                            title: Some(disk_item.title.clone()),
74                        });
75                    }
76                },
77            }
78        }
79
80        for (slug, db_item) in &db_map {
81            if !disk_items.contains_key(slug) {
82                result.removed.push(ContentDiffItem {
83                    slug: slug.clone(),
84                    source_id: source_id.to_string(),
85                    status: DiffStatus::Removed,
86                    disk_hash: None,
87                    db_hash: Some(db_item.version_hash.clone()),
88                    disk_updated_at: None,
89                    db_updated_at: Some(db_item.updated_at),
90                    title: Some(db_item.title.clone()),
91                });
92            }
93        }
94
95        Ok(result)
96    }
97
98    fn scan_disk_content(path: &Path, allowed_types: &[String]) -> HashMap<String, DiskContent> {
99        let mut items = HashMap::new();
100
101        if !path.exists() {
102            return items;
103        }
104
105        for entry in WalkDir::new(path)
106            .into_iter()
107            .filter_map(|e| {
108                e.map_err(|err| {
109                    tracing::warn!(error = %err, "Failed to read directory entry during sync");
110                    err
111                })
112                .ok()
113            })
114            .filter(|e| e.file_type().is_file())
115            .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
116        {
117            let file_path = entry.path();
118            match parse_content_file(file_path, allowed_types) {
119                Ok(Some(content)) => {
120                    items.insert(content.slug.clone(), content);
121                },
122                Ok(None) => {},
123                Err(e) => {
124                    warn!("Failed to parse {}: {}", file_path.display(), e);
125                },
126            }
127        }
128
129        items
130    }
131}
132
133fn parse_content_file(path: &Path, allowed_types: &[String]) -> Result<Option<DiskContent>> {
134    let content = std::fs::read_to_string(path)?;
135
136    let parts: Vec<&str> = content.splitn(3, "---").collect();
137    if parts.len() < 3 {
138        return Err(anyhow!("Invalid frontmatter format"));
139    }
140
141    let frontmatter: serde_yaml::Value = serde_yaml::from_str(parts[1])?;
142    let body = parts[2].trim().to_string();
143
144    let kind = frontmatter
145        .get("kind")
146        .and_then(|v| v.as_str())
147        .ok_or_else(|| anyhow!("Missing kind in frontmatter"))?;
148
149    if !allowed_types.iter().any(|t| t == kind) {
150        return Ok(None);
151    }
152
153    let slug = frontmatter
154        .get("slug")
155        .and_then(|v| v.as_str())
156        .ok_or_else(|| anyhow!("Missing slug in frontmatter"))?
157        .to_string();
158
159    let title = frontmatter
160        .get("title")
161        .and_then(|v| v.as_str())
162        .ok_or_else(|| anyhow!("Missing title in frontmatter"))?
163        .to_string();
164
165    Ok(Some(DiskContent { slug, title, body }))
166}