1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::fs;
4use std::path::{Path, PathBuf};
5
6const MAX_ENTRY_CONTENT_BYTES: usize = 256 * 1024;
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
9pub struct CursorHistoryEntrySnapshot {
10 pub entry_id: String,
11 pub timestamp: i64,
12 #[serde(default, skip_serializing_if = "Option::is_none")]
13 pub content: Option<String>,
14 #[serde(default, skip_serializing_if = "Option::is_none")]
15 pub content_sha256: Option<String>,
16 #[serde(default, skip_serializing_if = "Option::is_none")]
17 pub content_encoding: Option<String>,
18 #[serde(default, skip_serializing_if = "Option::is_none")]
19 pub content_bytes: Option<usize>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct CursorHistoryWorkspaceSnapshot {
24 pub folder_key: String,
25 pub version: u32,
26 pub resource: String,
27 pub entries: Vec<CursorHistoryEntrySnapshot>,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
31pub struct CursorHistoryCollection {
32 #[serde(default)]
33 pub supported: bool,
34 #[serde(default)]
35 pub history_root: String,
36 #[serde(default)]
37 pub exists: bool,
38 #[serde(default)]
39 pub workspace_count: usize,
40 #[serde(default)]
41 pub entry_count: usize,
42 #[serde(default)]
43 pub collected_at: Option<DateTime<Utc>>,
44 #[serde(default)]
45 pub workspaces: Vec<CursorHistoryWorkspaceSnapshot>,
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub note: Option<String>,
48}
49
50#[derive(Debug, Deserialize)]
51struct CursorEntriesManifest {
52 version: u32,
53 resource: String,
54 entries: Vec<CursorEntriesManifestEntry>,
55}
56
57#[derive(Debug, Deserialize)]
58struct CursorEntriesManifestEntry {
59 id: String,
60 timestamp: i64,
61}
62
63pub fn default_cursor_history_root() -> Option<PathBuf> {
64 dirs::data_dir().map(|path| path.join("Cursor").join("User").join("History"))
65}
66
67pub fn collect_cursor_history(explicit_root: Option<&Path>) -> CursorHistoryCollection {
68 let root = explicit_root
69 .map(Path::to_path_buf)
70 .or_else(default_cursor_history_root);
71
72 if !cfg!(windows) && explicit_root.is_none() {
73 return CursorHistoryCollection {
74 supported: false,
75 history_root: root
76 .as_ref()
77 .map(|path| path.display().to_string())
78 .unwrap_or_default(),
79 exists: root.as_ref().map(|path| path.exists()).unwrap_or(false),
80 note: Some(
81 "Cursor local history ingestion is only implemented on Windows right now."
82 .to_string(),
83 ),
84 ..Default::default()
85 };
86 }
87
88 let Some(root) = root else {
89 return CursorHistoryCollection {
90 supported: true,
91 exists: false,
92 note: Some("Unable to resolve %APPDATA%\\Cursor\\User\\History.".to_string()),
93 ..Default::default()
94 };
95 };
96
97 if !root.exists() {
98 return CursorHistoryCollection {
99 supported: true,
100 history_root: root.display().to_string(),
101 exists: false,
102 note: Some("Cursor local history directory was not found.".to_string()),
103 ..Default::default()
104 };
105 }
106
107 let mut workspaces = Vec::new();
108 let mut entry_count = 0usize;
109
110 let Ok(entries) = fs::read_dir(&root) else {
111 return CursorHistoryCollection {
112 supported: true,
113 history_root: root.display().to_string(),
114 exists: true,
115 note: Some("Failed to read Cursor local history directory.".to_string()),
116 ..Default::default()
117 };
118 };
119
120 for entry in entries.flatten() {
121 let path = entry.path();
122 if !path.is_dir() {
123 continue;
124 }
125
126 let Some(folder_key) = path
127 .file_name()
128 .and_then(|value| value.to_str())
129 .map(str::to_string)
130 else {
131 continue;
132 };
133
134 let manifest_path = path.join("entries.json");
135 if !manifest_path.is_file() {
136 continue;
137 }
138
139 let Ok(content) = fs::read_to_string(&manifest_path) else {
140 continue;
141 };
142
143 let Ok(manifest) = serde_json::from_str::<CursorEntriesManifest>(&content) else {
144 continue;
145 };
146
147 let mut snapshots = Vec::new();
148 for manifest_entry in manifest.entries {
149 let entry_path = path.join(&manifest_entry.id);
150 let snapshot =
151 build_entry_snapshot(&entry_path, &manifest_entry.id, manifest_entry.timestamp);
152 snapshots.push(snapshot);
153 }
154
155 entry_count += snapshots.len();
156 workspaces.push(CursorHistoryWorkspaceSnapshot {
157 folder_key,
158 version: manifest.version,
159 resource: decode_cursor_resource_uri(&manifest.resource),
160 entries: snapshots,
161 });
162 }
163
164 workspaces.sort_by(|left, right| left.folder_key.cmp(&right.folder_key));
165
166 CursorHistoryCollection {
167 supported: true,
168 history_root: root.display().to_string(),
169 exists: true,
170 workspace_count: workspaces.len(),
171 entry_count,
172 collected_at: Some(Utc::now()),
173 workspaces,
174 note: None,
175 }
176}
177
178fn build_entry_snapshot(
179 entry_path: &Path,
180 entry_id: &str,
181 timestamp: i64,
182) -> CursorHistoryEntrySnapshot {
183 if !entry_path.is_file() {
184 return CursorHistoryEntrySnapshot {
185 entry_id: entry_id.to_string(),
186 timestamp,
187 content: None,
188 content_sha256: None,
189 content_encoding: None,
190 content_bytes: None,
191 };
192 }
193
194 let Ok(bytes) = fs::read(entry_path) else {
195 return CursorHistoryEntrySnapshot {
196 entry_id: entry_id.to_string(),
197 timestamp,
198 content: None,
199 content_sha256: None,
200 content_encoding: Some("unreadable".to_string()),
201 content_bytes: None,
202 };
203 };
204
205 let content_bytes = bytes.len();
206 let content_sha256 = Some(sha256_hex(&bytes));
207
208 if content_bytes > MAX_ENTRY_CONTENT_BYTES {
209 return CursorHistoryEntrySnapshot {
210 entry_id: entry_id.to_string(),
211 timestamp,
212 content: None,
213 content_sha256,
214 content_encoding: Some("too-large".to_string()),
215 content_bytes: Some(content_bytes),
216 };
217 }
218
219 match String::from_utf8(bytes) {
220 Ok(content) => CursorHistoryEntrySnapshot {
221 entry_id: entry_id.to_string(),
222 timestamp,
223 content: Some(content),
224 content_sha256,
225 content_encoding: Some("utf-8".to_string()),
226 content_bytes: Some(content_bytes),
227 },
228 Err(error) => CursorHistoryEntrySnapshot {
229 entry_id: entry_id.to_string(),
230 timestamp,
231 content: None,
232 content_sha256,
233 content_encoding: Some("binary".to_string()),
234 content_bytes: Some(error.as_bytes().len()),
235 },
236 }
237}
238
239pub fn decode_cursor_resource_uri(resource: &str) -> String {
240 let trimmed = resource.trim();
241 let without_scheme = trimmed
242 .strip_prefix("file://")
243 .or_else(|| trimmed.strip_prefix("file:"))
244 .unwrap_or(trimmed);
245
246 let decoded = percent_decode(without_scheme);
247 if cfg!(windows) || looks_like_windows_resource_path(&decoded) {
248 return decoded
249 .trim_start_matches("//")
250 .trim_start_matches('/')
251 .replace('/', "\\");
252 }
253
254 decoded
255}
256
257fn percent_decode(input: &str) -> String {
258 let bytes = input.as_bytes();
259 let mut output = Vec::with_capacity(bytes.len());
260 let mut index = 0;
261
262 while index < bytes.len() {
263 if bytes[index] == b'%' && index + 2 < bytes.len() {
264 if let Ok(byte) = u8::from_str_radix(
265 std::str::from_utf8(&bytes[index + 1..index + 3]).unwrap_or(""),
266 16,
267 ) {
268 output.push(byte);
269 index += 3;
270 continue;
271 }
272 }
273
274 output.push(bytes[index]);
275 index += 1;
276 }
277
278 String::from_utf8_lossy(&output).into_owned()
279}
280
281fn looks_like_windows_resource_path(decoded: &str) -> bool {
282 let trimmed = decoded.trim_start_matches("//").trim_start_matches('/');
283 let bytes = trimmed.as_bytes();
284 bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
285}
286
287fn sha256_hex(bytes: &[u8]) -> String {
288 use sha2::{Digest, Sha256};
289
290 let digest = Sha256::digest(bytes);
291 digest.iter().map(|byte| format!("{byte:02x}")).collect()
292}
293
294#[cfg(test)]
295mod tests {
296 use super::{collect_cursor_history, decode_cursor_resource_uri};
297 use std::fs;
298 use std::path::PathBuf;
299 use std::time::{SystemTime, UNIX_EPOCH};
300
301 fn temp_dir(label: &str) -> PathBuf {
302 let nanos = SystemTime::now()
303 .duration_since(UNIX_EPOCH)
304 .expect("time")
305 .as_nanos();
306 let path = std::env::temp_dir().join(format!("xbp-cursor-history-{label}-{nanos}"));
307 fs::create_dir_all(&path).expect("temp dir");
308 path
309 }
310
311 #[test]
312 fn decodes_cursor_file_resource_uri() {
313 assert_eq!(
314 decode_cursor_resource_uri(
315 "file:///c%3A/Users/floris/Documents/GitHub/xbp/.github/workflows/publish-crates.yml"
316 ),
317 "c:\\Users\\floris\\Documents\\GitHub\\xbp\\.github\\workflows\\publish-crates.yml"
318 );
319 }
320
321 #[test]
322 fn collects_workspace_manifest_and_entry_content() {
323 let root = temp_dir("history");
324 let workspace = root.join("-2ac5da1");
325 fs::create_dir_all(&workspace).expect("workspace dir");
326 fs::write(
327 workspace.join("entries.json"),
328 r#"{
329 "version": 1,
330 "resource": "file:///c%3A/Users/floris/Documents/GitHub/athena/config.yaml",
331 "entries": [{ "id": "wuKn.yaml", "timestamp": 1781529330927 }]
332}"#,
333 )
334 .expect("entries.json");
335 fs::write(workspace.join("wuKn.yaml"), "project: demo\n").expect("entry file");
336
337 let collection = collect_cursor_history(Some(root.as_path()));
338 assert_eq!(collection.workspace_count, 1);
339 assert_eq!(collection.entry_count, 1);
340 assert_eq!(
341 collection.workspaces[0].resource,
342 "c:\\Users\\floris\\Documents\\GitHub\\athena\\config.yaml"
343 );
344 assert_eq!(
345 collection.workspaces[0].entries[0].content.as_deref(),
346 Some("project: demo\n")
347 );
348 }
349}