use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};
const MAX_ENTRY_CONTENT_BYTES: usize = 256 * 1024;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CursorHistoryEntrySnapshot {
pub entry_id: String,
pub timestamp: i64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub content_sha256: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub content_encoding: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub content_bytes: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CursorHistoryWorkspaceSnapshot {
pub folder_key: String,
pub version: u32,
pub resource: String,
pub entries: Vec<CursorHistoryEntrySnapshot>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct CursorHistoryCollection {
#[serde(default)]
pub supported: bool,
#[serde(default)]
pub history_root: String,
#[serde(default)]
pub exists: bool,
#[serde(default)]
pub workspace_count: usize,
#[serde(default)]
pub entry_count: usize,
#[serde(default)]
pub collected_at: Option<DateTime<Utc>>,
#[serde(default)]
pub workspaces: Vec<CursorHistoryWorkspaceSnapshot>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub note: Option<String>,
}
#[derive(Debug, Deserialize)]
struct CursorEntriesManifest {
version: u32,
resource: String,
entries: Vec<CursorEntriesManifestEntry>,
}
#[derive(Debug, Deserialize)]
struct CursorEntriesManifestEntry {
id: String,
timestamp: i64,
}
pub fn default_cursor_history_root() -> Option<PathBuf> {
dirs::data_dir().map(|path| path.join("Cursor").join("User").join("History"))
}
pub fn collect_cursor_history(explicit_root: Option<&Path>) -> CursorHistoryCollection {
let root = explicit_root
.map(Path::to_path_buf)
.or_else(default_cursor_history_root);
if !cfg!(windows) && explicit_root.is_none() {
return CursorHistoryCollection {
supported: false,
history_root: root
.as_ref()
.map(|path| path.display().to_string())
.unwrap_or_default(),
exists: root.as_ref().map(|path| path.exists()).unwrap_or(false),
note: Some(
"Cursor local history ingestion is only implemented on Windows right now."
.to_string(),
),
..Default::default()
};
}
let Some(root) = root else {
return CursorHistoryCollection {
supported: true,
exists: false,
note: Some("Unable to resolve %APPDATA%\\Cursor\\User\\History.".to_string()),
..Default::default()
};
};
if !root.exists() {
return CursorHistoryCollection {
supported: true,
history_root: root.display().to_string(),
exists: false,
note: Some("Cursor local history directory was not found.".to_string()),
..Default::default()
};
}
let mut workspaces = Vec::new();
let mut entry_count = 0usize;
let Ok(entries) = fs::read_dir(&root) else {
return CursorHistoryCollection {
supported: true,
history_root: root.display().to_string(),
exists: true,
note: Some("Failed to read Cursor local history directory.".to_string()),
..Default::default()
};
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
let Some(folder_key) = path
.file_name()
.and_then(|value| value.to_str())
.map(str::to_string)
else {
continue;
};
let manifest_path = path.join("entries.json");
if !manifest_path.is_file() {
continue;
}
let Ok(content) = fs::read_to_string(&manifest_path) else {
continue;
};
let Ok(manifest) = serde_json::from_str::<CursorEntriesManifest>(&content) else {
continue;
};
let mut snapshots = Vec::new();
for manifest_entry in manifest.entries {
let entry_path = path.join(&manifest_entry.id);
let snapshot =
build_entry_snapshot(&entry_path, &manifest_entry.id, manifest_entry.timestamp);
snapshots.push(snapshot);
}
entry_count += snapshots.len();
workspaces.push(CursorHistoryWorkspaceSnapshot {
folder_key,
version: manifest.version,
resource: decode_cursor_resource_uri(&manifest.resource),
entries: snapshots,
});
}
workspaces.sort_by(|left, right| left.folder_key.cmp(&right.folder_key));
CursorHistoryCollection {
supported: true,
history_root: root.display().to_string(),
exists: true,
workspace_count: workspaces.len(),
entry_count,
collected_at: Some(Utc::now()),
workspaces,
note: None,
}
}
fn build_entry_snapshot(
entry_path: &Path,
entry_id: &str,
timestamp: i64,
) -> CursorHistoryEntrySnapshot {
if !entry_path.is_file() {
return CursorHistoryEntrySnapshot {
entry_id: entry_id.to_string(),
timestamp,
content: None,
content_sha256: None,
content_encoding: None,
content_bytes: None,
};
}
let Ok(bytes) = fs::read(entry_path) else {
return CursorHistoryEntrySnapshot {
entry_id: entry_id.to_string(),
timestamp,
content: None,
content_sha256: None,
content_encoding: Some("unreadable".to_string()),
content_bytes: None,
};
};
let content_bytes = bytes.len();
let content_sha256 = Some(sha256_hex(&bytes));
if content_bytes > MAX_ENTRY_CONTENT_BYTES {
return CursorHistoryEntrySnapshot {
entry_id: entry_id.to_string(),
timestamp,
content: None,
content_sha256,
content_encoding: Some("too-large".to_string()),
content_bytes: Some(content_bytes),
};
}
match String::from_utf8(bytes) {
Ok(content) => CursorHistoryEntrySnapshot {
entry_id: entry_id.to_string(),
timestamp,
content: Some(content),
content_sha256,
content_encoding: Some("utf-8".to_string()),
content_bytes: Some(content_bytes),
},
Err(error) => CursorHistoryEntrySnapshot {
entry_id: entry_id.to_string(),
timestamp,
content: None,
content_sha256,
content_encoding: Some("binary".to_string()),
content_bytes: Some(error.as_bytes().len()),
},
}
}
pub fn decode_cursor_resource_uri(resource: &str) -> String {
let trimmed = resource.trim();
let without_scheme = trimmed
.strip_prefix("file://")
.or_else(|| trimmed.strip_prefix("file:"))
.unwrap_or(trimmed);
let decoded = percent_decode(without_scheme);
if cfg!(windows) || looks_like_windows_resource_path(&decoded) {
return decoded
.trim_start_matches("//")
.trim_start_matches('/')
.replace('/', "\\");
}
decoded
}
fn percent_decode(input: &str) -> String {
let bytes = input.as_bytes();
let mut output = Vec::with_capacity(bytes.len());
let mut index = 0;
while index < bytes.len() {
if bytes[index] == b'%' && index + 2 < bytes.len() {
if let Ok(byte) = u8::from_str_radix(
std::str::from_utf8(&bytes[index + 1..index + 3]).unwrap_or(""),
16,
) {
output.push(byte);
index += 3;
continue;
}
}
output.push(bytes[index]);
index += 1;
}
String::from_utf8_lossy(&output).into_owned()
}
fn looks_like_windows_resource_path(decoded: &str) -> bool {
let trimmed = decoded.trim_start_matches("//").trim_start_matches('/');
let bytes = trimmed.as_bytes();
bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
}
fn sha256_hex(bytes: &[u8]) -> String {
use sha2::{Digest, Sha256};
let digest = Sha256::digest(bytes);
digest.iter().map(|byte| format!("{byte:02x}")).collect()
}
#[cfg(test)]
mod tests {
use super::{collect_cursor_history, decode_cursor_resource_uri};
use std::fs;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
fn temp_dir(label: &str) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("time")
.as_nanos();
let path = std::env::temp_dir().join(format!("xbp-cursor-history-{label}-{nanos}"));
fs::create_dir_all(&path).expect("temp dir");
path
}
#[test]
fn decodes_cursor_file_resource_uri() {
assert_eq!(
decode_cursor_resource_uri(
"file:///c%3A/Users/floris/Documents/GitHub/xbp/.github/workflows/publish-crates.yml"
),
"c:\\Users\\floris\\Documents\\GitHub\\xbp\\.github\\workflows\\publish-crates.yml"
);
}
#[test]
fn collects_workspace_manifest_and_entry_content() {
let root = temp_dir("history");
let workspace = root.join("-2ac5da1");
fs::create_dir_all(&workspace).expect("workspace dir");
fs::write(
workspace.join("entries.json"),
r#"{
"version": 1,
"resource": "file:///c%3A/Users/floris/Documents/GitHub/athena/config.yaml",
"entries": [{ "id": "wuKn.yaml", "timestamp": 1781529330927 }]
}"#,
)
.expect("entries.json");
fs::write(workspace.join("wuKn.yaml"), "project: demo\n").expect("entry file");
let collection = collect_cursor_history(Some(root.as_path()));
assert_eq!(collection.workspace_count, 1);
assert_eq!(collection.entry_count, 1);
assert_eq!(
collection.workspaces[0].resource,
"c:\\Users\\floris\\Documents\\GitHub\\athena\\config.yaml"
);
assert_eq!(
collection.workspaces[0].entries[0].content.as_deref(),
Some("project: demo\n")
);
}
}