Skip to main content

xbp_cli/codetime/
history.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::fs;
4use std::path::{Path, PathBuf};
5
6const MAX_ENTRY_CONTENT_BYTES: usize = 256 * 1024;
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
9pub struct CursorHistoryEntrySnapshot {
10    pub entry_id: String,
11    pub timestamp: i64,
12    #[serde(default, skip_serializing_if = "Option::is_none")]
13    pub content: Option<String>,
14    #[serde(default, skip_serializing_if = "Option::is_none")]
15    pub content_sha256: Option<String>,
16    #[serde(default, skip_serializing_if = "Option::is_none")]
17    pub content_encoding: Option<String>,
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub content_bytes: Option<usize>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct CursorHistoryWorkspaceSnapshot {
24    pub folder_key: String,
25    pub version: u32,
26    pub resource: String,
27    pub entries: Vec<CursorHistoryEntrySnapshot>,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
31pub struct CursorHistoryCollection {
32    #[serde(default)]
33    pub supported: bool,
34    #[serde(default)]
35    pub history_root: String,
36    #[serde(default)]
37    pub exists: bool,
38    #[serde(default)]
39    pub workspace_count: usize,
40    #[serde(default)]
41    pub entry_count: usize,
42    #[serde(default)]
43    pub collected_at: Option<DateTime<Utc>>,
44    #[serde(default)]
45    pub workspaces: Vec<CursorHistoryWorkspaceSnapshot>,
46    #[serde(default, skip_serializing_if = "Option::is_none")]
47    pub note: Option<String>,
48}
49
50#[derive(Debug, Deserialize)]
51struct CursorEntriesManifest {
52    version: u32,
53    resource: String,
54    entries: Vec<CursorEntriesManifestEntry>,
55}
56
57#[derive(Debug, Deserialize)]
58struct CursorEntriesManifestEntry {
59    id: String,
60    timestamp: i64,
61}
62
63pub fn default_cursor_history_root() -> Option<PathBuf> {
64    dirs::data_dir().map(|path| path.join("Cursor").join("User").join("History"))
65}
66
67pub fn collect_cursor_history(explicit_root: Option<&Path>) -> CursorHistoryCollection {
68    let root = explicit_root
69        .map(Path::to_path_buf)
70        .or_else(default_cursor_history_root);
71
72    if !cfg!(windows) && explicit_root.is_none() {
73        return CursorHistoryCollection {
74            supported: false,
75            history_root: root
76                .as_ref()
77                .map(|path| path.display().to_string())
78                .unwrap_or_default(),
79            exists: root.as_ref().map(|path| path.exists()).unwrap_or(false),
80            note: Some(
81                "Cursor local history ingestion is only implemented on Windows right now."
82                    .to_string(),
83            ),
84            ..Default::default()
85        };
86    }
87
88    let Some(root) = root else {
89        return CursorHistoryCollection {
90            supported: true,
91            exists: false,
92            note: Some("Unable to resolve %APPDATA%\\Cursor\\User\\History.".to_string()),
93            ..Default::default()
94        };
95    };
96
97    if !root.exists() {
98        return CursorHistoryCollection {
99            supported: true,
100            history_root: root.display().to_string(),
101            exists: false,
102            note: Some("Cursor local history directory was not found.".to_string()),
103            ..Default::default()
104        };
105    }
106
107    let mut workspaces = Vec::new();
108    let mut entry_count = 0usize;
109
110    let Ok(entries) = fs::read_dir(&root) else {
111        return CursorHistoryCollection {
112            supported: true,
113            history_root: root.display().to_string(),
114            exists: true,
115            note: Some("Failed to read Cursor local history directory.".to_string()),
116            ..Default::default()
117        };
118    };
119
120    for entry in entries.flatten() {
121        let path = entry.path();
122        if !path.is_dir() {
123            continue;
124        }
125
126        let Some(folder_key) = path
127            .file_name()
128            .and_then(|value| value.to_str())
129            .map(str::to_string)
130        else {
131            continue;
132        };
133
134        let manifest_path = path.join("entries.json");
135        if !manifest_path.is_file() {
136            continue;
137        }
138
139        let Ok(content) = fs::read_to_string(&manifest_path) else {
140            continue;
141        };
142
143        let Ok(manifest) = serde_json::from_str::<CursorEntriesManifest>(&content) else {
144            continue;
145        };
146
147        let mut snapshots = Vec::new();
148        for manifest_entry in manifest.entries {
149            let entry_path = path.join(&manifest_entry.id);
150            let snapshot =
151                build_entry_snapshot(&entry_path, &manifest_entry.id, manifest_entry.timestamp);
152            snapshots.push(snapshot);
153        }
154
155        entry_count += snapshots.len();
156        workspaces.push(CursorHistoryWorkspaceSnapshot {
157            folder_key,
158            version: manifest.version,
159            resource: decode_cursor_resource_uri(&manifest.resource),
160            entries: snapshots,
161        });
162    }
163
164    workspaces.sort_by(|left, right| left.folder_key.cmp(&right.folder_key));
165
166    CursorHistoryCollection {
167        supported: true,
168        history_root: root.display().to_string(),
169        exists: true,
170        workspace_count: workspaces.len(),
171        entry_count,
172        collected_at: Some(Utc::now()),
173        workspaces,
174        note: None,
175    }
176}
177
178fn build_entry_snapshot(
179    entry_path: &Path,
180    entry_id: &str,
181    timestamp: i64,
182) -> CursorHistoryEntrySnapshot {
183    if !entry_path.is_file() {
184        return CursorHistoryEntrySnapshot {
185            entry_id: entry_id.to_string(),
186            timestamp,
187            content: None,
188            content_sha256: None,
189            content_encoding: None,
190            content_bytes: None,
191        };
192    }
193
194    let Ok(bytes) = fs::read(entry_path) else {
195        return CursorHistoryEntrySnapshot {
196            entry_id: entry_id.to_string(),
197            timestamp,
198            content: None,
199            content_sha256: None,
200            content_encoding: Some("unreadable".to_string()),
201            content_bytes: None,
202        };
203    };
204
205    let content_bytes = bytes.len();
206    let content_sha256 = Some(sha256_hex(&bytes));
207
208    if content_bytes > MAX_ENTRY_CONTENT_BYTES {
209        return CursorHistoryEntrySnapshot {
210            entry_id: entry_id.to_string(),
211            timestamp,
212            content: None,
213            content_sha256,
214            content_encoding: Some("too-large".to_string()),
215            content_bytes: Some(content_bytes),
216        };
217    }
218
219    match String::from_utf8(bytes) {
220        Ok(content) => CursorHistoryEntrySnapshot {
221            entry_id: entry_id.to_string(),
222            timestamp,
223            content: Some(content),
224            content_sha256,
225            content_encoding: Some("utf-8".to_string()),
226            content_bytes: Some(content_bytes),
227        },
228        Err(error) => CursorHistoryEntrySnapshot {
229            entry_id: entry_id.to_string(),
230            timestamp,
231            content: None,
232            content_sha256,
233            content_encoding: Some("binary".to_string()),
234            content_bytes: Some(error.as_bytes().len()),
235        },
236    }
237}
238
239pub fn decode_cursor_resource_uri(resource: &str) -> String {
240    let trimmed = resource.trim();
241    let without_scheme = trimmed
242        .strip_prefix("file://")
243        .or_else(|| trimmed.strip_prefix("file:"))
244        .unwrap_or(trimmed);
245
246    let decoded = percent_decode(without_scheme);
247    if cfg!(windows) || looks_like_windows_resource_path(&decoded) {
248        return decoded
249            .trim_start_matches("//")
250            .trim_start_matches('/')
251            .replace('/', "\\");
252    }
253
254    decoded
255}
256
257fn percent_decode(input: &str) -> String {
258    let bytes = input.as_bytes();
259    let mut output = Vec::with_capacity(bytes.len());
260    let mut index = 0;
261
262    while index < bytes.len() {
263        if bytes[index] == b'%' && index + 2 < bytes.len() {
264            if let Ok(byte) = u8::from_str_radix(
265                std::str::from_utf8(&bytes[index + 1..index + 3]).unwrap_or(""),
266                16,
267            ) {
268                output.push(byte);
269                index += 3;
270                continue;
271            }
272        }
273
274        output.push(bytes[index]);
275        index += 1;
276    }
277
278    String::from_utf8_lossy(&output).into_owned()
279}
280
281fn looks_like_windows_resource_path(decoded: &str) -> bool {
282    let trimmed = decoded.trim_start_matches("//").trim_start_matches('/');
283    let bytes = trimmed.as_bytes();
284    bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
285}
286
287fn sha256_hex(bytes: &[u8]) -> String {
288    use sha2::{Digest, Sha256};
289
290    let digest = Sha256::digest(bytes);
291    digest.iter().map(|byte| format!("{byte:02x}")).collect()
292}
293
294#[cfg(test)]
295mod tests {
296    use super::{collect_cursor_history, decode_cursor_resource_uri};
297    use std::fs;
298    use std::path::PathBuf;
299    use std::time::{SystemTime, UNIX_EPOCH};
300
301    fn temp_dir(label: &str) -> PathBuf {
302        let nanos = SystemTime::now()
303            .duration_since(UNIX_EPOCH)
304            .expect("time")
305            .as_nanos();
306        let path = std::env::temp_dir().join(format!("xbp-cursor-history-{label}-{nanos}"));
307        fs::create_dir_all(&path).expect("temp dir");
308        path
309    }
310
311    #[test]
312    fn decodes_cursor_file_resource_uri() {
313        assert_eq!(
314            decode_cursor_resource_uri(
315                "file:///c%3A/Users/floris/Documents/GitHub/xbp/.github/workflows/publish-crates.yml"
316            ),
317            "c:\\Users\\floris\\Documents\\GitHub\\xbp\\.github\\workflows\\publish-crates.yml"
318        );
319    }
320
321    #[test]
322    fn collects_workspace_manifest_and_entry_content() {
323        let root = temp_dir("history");
324        let workspace = root.join("-2ac5da1");
325        fs::create_dir_all(&workspace).expect("workspace dir");
326        fs::write(
327            workspace.join("entries.json"),
328            r#"{
329  "version": 1,
330  "resource": "file:///c%3A/Users/floris/Documents/GitHub/athena/config.yaml",
331  "entries": [{ "id": "wuKn.yaml", "timestamp": 1781529330927 }]
332}"#,
333        )
334        .expect("entries.json");
335        fs::write(workspace.join("wuKn.yaml"), "project: demo\n").expect("entry file");
336
337        let collection = collect_cursor_history(Some(root.as_path()));
338        assert_eq!(collection.workspace_count, 1);
339        assert_eq!(collection.entry_count, 1);
340        assert_eq!(
341            collection.workspaces[0].resource,
342            "c:\\Users\\floris\\Documents\\GitHub\\athena\\config.yaml"
343        );
344        assert_eq!(
345            collection.workspaces[0].entries[0].content.as_deref(),
346            Some("project: demo\n")
347        );
348    }
349}