1use std::collections::HashMap;
4use std::path::PathBuf;
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8
9use crate::types::{DocumentId, Language, WorkspaceId};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct WorkspaceMetadata {
14 pub workspace_id: WorkspaceId,
16 pub root_path: PathBuf,
18 pub created_at: DateTime<Utc>,
20 pub updated_at: DateTime<Utc>,
22 pub document_count: usize,
24 pub chunk_count: usize,
26 pub total_bytes: u64,
28 pub embedding_dimension: usize,
30 pub index_version: u32,
32 pub document_states: HashMap<PathBuf, DocumentState>,
34 pub language_stats: HashMap<Language, LanguageInfo>,
36}
37
38impl WorkspaceMetadata {
39 pub fn new(workspace_id: WorkspaceId, root_path: PathBuf, embedding_dimension: usize) -> Self {
41 let now = Utc::now();
42 Self {
43 workspace_id,
44 root_path,
45 created_at: now,
46 updated_at: now,
47 document_count: 0,
48 chunk_count: 0,
49 total_bytes: 0,
50 embedding_dimension,
51 index_version: CURRENT_INDEX_VERSION,
52 document_states: HashMap::new(),
53 language_stats: HashMap::new(),
54 }
55 }
56
57 pub fn touch(&mut self) {
59 self.updated_at = Utc::now();
60 }
61
62 pub fn needs_reindex(&self, path: &PathBuf, content_hash: &str) -> bool {
64 match self.document_states.get(path) {
65 Some(state) => state.content_hash != content_hash,
66 None => true,
67 }
68 }
69
70 pub fn record_document(
72 &mut self,
73 path: PathBuf,
74 document_id: DocumentId,
75 content_hash: String,
76 size_bytes: u64,
77 language: Language,
78 chunk_count: usize,
79 ) {
80 self.document_states.insert(
81 path,
82 DocumentState {
83 document_id,
84 content_hash,
85 size_bytes,
86 language,
87 chunk_count,
88 indexed_at: Utc::now(),
89 },
90 );
91
92 let lang_info = self.language_stats.entry(language).or_insert(LanguageInfo {
94 file_count: 0,
95 chunk_count: 0,
96 total_bytes: 0,
97 });
98 lang_info.file_count += 1;
99 lang_info.chunk_count += chunk_count;
100 lang_info.total_bytes += size_bytes;
101
102 self.document_count = self.document_states.len();
103 self.chunk_count = self.document_states.values().map(|s| s.chunk_count).sum();
104 self.total_bytes = self.document_states.values().map(|s| s.size_bytes).sum();
105 self.touch();
106 }
107
108 pub fn remove_document(&mut self, path: &PathBuf) {
110 if let Some(state) = self.document_states.remove(path) {
111 if let Some(lang_info) = self.language_stats.get_mut(&state.language) {
113 lang_info.file_count = lang_info.file_count.saturating_sub(1);
114 lang_info.chunk_count = lang_info.chunk_count.saturating_sub(state.chunk_count);
115 lang_info.total_bytes = lang_info.total_bytes.saturating_sub(state.size_bytes);
116
117 if lang_info.file_count == 0 {
118 self.language_stats.remove(&state.language);
119 }
120 }
121
122 self.document_count = self.document_states.len();
123 self.chunk_count = self.document_states.values().map(|s| s.chunk_count).sum();
124 self.total_bytes = self.document_states.values().map(|s| s.size_bytes).sum();
125 self.touch();
126 }
127 }
128
129 pub fn find_deleted_documents(&self, existing_paths: &[PathBuf]) -> Vec<PathBuf> {
131 let existing_set: std::collections::HashSet<_> = existing_paths.iter().collect();
132 self.document_states
133 .keys()
134 .filter(|path| !existing_set.contains(path))
135 .cloned()
136 .collect()
137 }
138
139 pub fn is_compatible(&self) -> bool {
141 self.index_version == CURRENT_INDEX_VERSION
142 }
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct DocumentState {
148 pub document_id: DocumentId,
150 pub content_hash: String,
152 pub size_bytes: u64,
154 pub language: Language,
156 pub chunk_count: usize,
158 pub indexed_at: DateTime<Utc>,
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct LanguageInfo {
165 pub file_count: usize,
167 pub chunk_count: usize,
169 pub total_bytes: u64,
171}
172
173pub const CURRENT_INDEX_VERSION: u32 = 1;
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179
180 #[test]
181 fn test_workspace_metadata() {
182 let mut meta = WorkspaceMetadata::new(
183 WorkspaceId::new(),
184 PathBuf::from("/test"),
185 384,
186 );
187
188 assert_eq!(meta.document_count, 0);
189 assert!(meta.is_compatible());
190
191 meta.record_document(
193 PathBuf::from("test.rs"),
194 DocumentId::new(),
195 "hash123".to_string(),
196 1000,
197 Language::Rust,
198 5,
199 );
200
201 assert_eq!(meta.document_count, 1);
202 assert_eq!(meta.chunk_count, 5);
203 assert_eq!(meta.total_bytes, 1000);
204
205 assert!(!meta.needs_reindex(&PathBuf::from("test.rs"), "hash123"));
207 assert!(meta.needs_reindex(&PathBuf::from("test.rs"), "hash456"));
208 assert!(meta.needs_reindex(&PathBuf::from("other.rs"), "hash123"));
209 }
210}