xbp 10.30.1

XBP is a zero-config build pack that can also interact with proxies, kafka, sockets, synthetic monitors.
Documentation
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};

const MAX_ENTRY_CONTENT_BYTES: usize = 256 * 1024;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CursorHistoryEntrySnapshot {
    pub entry_id: String,
    pub timestamp: i64,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub content: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub content_sha256: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub content_encoding: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub content_bytes: Option<usize>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CursorHistoryWorkspaceSnapshot {
    pub folder_key: String,
    pub version: u32,
    pub resource: String,
    pub entries: Vec<CursorHistoryEntrySnapshot>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct CursorHistoryCollection {
    #[serde(default)]
    pub supported: bool,
    #[serde(default)]
    pub history_root: String,
    #[serde(default)]
    pub exists: bool,
    #[serde(default)]
    pub workspace_count: usize,
    #[serde(default)]
    pub entry_count: usize,
    #[serde(default)]
    pub collected_at: Option<DateTime<Utc>>,
    #[serde(default)]
    pub workspaces: Vec<CursorHistoryWorkspaceSnapshot>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub note: Option<String>,
}

#[derive(Debug, Deserialize)]
struct CursorEntriesManifest {
    version: u32,
    resource: String,
    entries: Vec<CursorEntriesManifestEntry>,
}

#[derive(Debug, Deserialize)]
struct CursorEntriesManifestEntry {
    id: String,
    timestamp: i64,
}

pub fn default_cursor_history_root() -> Option<PathBuf> {
    dirs::data_dir().map(|path| path.join("Cursor").join("User").join("History"))
}

pub fn collect_cursor_history(explicit_root: Option<&Path>) -> CursorHistoryCollection {
    let root = explicit_root
        .map(Path::to_path_buf)
        .or_else(default_cursor_history_root);

    if !cfg!(windows) && explicit_root.is_none() {
        return CursorHistoryCollection {
            supported: false,
            history_root: root
                .as_ref()
                .map(|path| path.display().to_string())
                .unwrap_or_default(),
            exists: root.as_ref().map(|path| path.exists()).unwrap_or(false),
            note: Some(
                "Cursor local history ingestion is only implemented on Windows right now."
                    .to_string(),
            ),
            ..Default::default()
        };
    }

    let Some(root) = root else {
        return CursorHistoryCollection {
            supported: true,
            exists: false,
            note: Some("Unable to resolve %APPDATA%\\Cursor\\User\\History.".to_string()),
            ..Default::default()
        };
    };

    if !root.exists() {
        return CursorHistoryCollection {
            supported: true,
            history_root: root.display().to_string(),
            exists: false,
            note: Some("Cursor local history directory was not found.".to_string()),
            ..Default::default()
        };
    }

    let mut workspaces = Vec::new();
    let mut entry_count = 0usize;

    let Ok(entries) = fs::read_dir(&root) else {
        return CursorHistoryCollection {
            supported: true,
            history_root: root.display().to_string(),
            exists: true,
            note: Some("Failed to read Cursor local history directory.".to_string()),
            ..Default::default()
        };
    };

    for entry in entries.flatten() {
        let path = entry.path();
        if !path.is_dir() {
            continue;
        }

        let Some(folder_key) = path
            .file_name()
            .and_then(|value| value.to_str())
            .map(str::to_string)
        else {
            continue;
        };

        let manifest_path = path.join("entries.json");
        if !manifest_path.is_file() {
            continue;
        }

        let Ok(content) = fs::read_to_string(&manifest_path) else {
            continue;
        };

        let Ok(manifest) = serde_json::from_str::<CursorEntriesManifest>(&content) else {
            continue;
        };

        let mut snapshots = Vec::new();
        for manifest_entry in manifest.entries {
            let entry_path = path.join(&manifest_entry.id);
            let snapshot =
                build_entry_snapshot(&entry_path, &manifest_entry.id, manifest_entry.timestamp);
            snapshots.push(snapshot);
        }

        entry_count += snapshots.len();
        workspaces.push(CursorHistoryWorkspaceSnapshot {
            folder_key,
            version: manifest.version,
            resource: decode_cursor_resource_uri(&manifest.resource),
            entries: snapshots,
        });
    }

    workspaces.sort_by(|left, right| left.folder_key.cmp(&right.folder_key));

    CursorHistoryCollection {
        supported: true,
        history_root: root.display().to_string(),
        exists: true,
        workspace_count: workspaces.len(),
        entry_count,
        collected_at: Some(Utc::now()),
        workspaces,
        note: None,
    }
}

fn build_entry_snapshot(
    entry_path: &Path,
    entry_id: &str,
    timestamp: i64,
) -> CursorHistoryEntrySnapshot {
    if !entry_path.is_file() {
        return CursorHistoryEntrySnapshot {
            entry_id: entry_id.to_string(),
            timestamp,
            content: None,
            content_sha256: None,
            content_encoding: None,
            content_bytes: None,
        };
    }

    let Ok(bytes) = fs::read(entry_path) else {
        return CursorHistoryEntrySnapshot {
            entry_id: entry_id.to_string(),
            timestamp,
            content: None,
            content_sha256: None,
            content_encoding: Some("unreadable".to_string()),
            content_bytes: None,
        };
    };

    let content_bytes = bytes.len();
    let content_sha256 = Some(sha256_hex(&bytes));

    if content_bytes > MAX_ENTRY_CONTENT_BYTES {
        return CursorHistoryEntrySnapshot {
            entry_id: entry_id.to_string(),
            timestamp,
            content: None,
            content_sha256,
            content_encoding: Some("too-large".to_string()),
            content_bytes: Some(content_bytes),
        };
    }

    match String::from_utf8(bytes) {
        Ok(content) => CursorHistoryEntrySnapshot {
            entry_id: entry_id.to_string(),
            timestamp,
            content: Some(content),
            content_sha256,
            content_encoding: Some("utf-8".to_string()),
            content_bytes: Some(content_bytes),
        },
        Err(error) => CursorHistoryEntrySnapshot {
            entry_id: entry_id.to_string(),
            timestamp,
            content: None,
            content_sha256,
            content_encoding: Some("binary".to_string()),
            content_bytes: Some(error.as_bytes().len()),
        },
    }
}

pub fn decode_cursor_resource_uri(resource: &str) -> String {
    let trimmed = resource.trim();
    let without_scheme = trimmed
        .strip_prefix("file://")
        .or_else(|| trimmed.strip_prefix("file:"))
        .unwrap_or(trimmed);

    let decoded = percent_decode(without_scheme);
    if cfg!(windows) || looks_like_windows_resource_path(&decoded) {
        return decoded
            .trim_start_matches("//")
            .trim_start_matches('/')
            .replace('/', "\\");
    }

    decoded
}

fn percent_decode(input: &str) -> String {
    let bytes = input.as_bytes();
    let mut output = Vec::with_capacity(bytes.len());
    let mut index = 0;

    while index < bytes.len() {
        if bytes[index] == b'%' && index + 2 < bytes.len() {
            if let Ok(byte) = u8::from_str_radix(
                std::str::from_utf8(&bytes[index + 1..index + 3]).unwrap_or(""),
                16,
            ) {
                output.push(byte);
                index += 3;
                continue;
            }
        }

        output.push(bytes[index]);
        index += 1;
    }

    String::from_utf8_lossy(&output).into_owned()
}

fn looks_like_windows_resource_path(decoded: &str) -> bool {
    let trimmed = decoded.trim_start_matches("//").trim_start_matches('/');
    let bytes = trimmed.as_bytes();
    bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
}

fn sha256_hex(bytes: &[u8]) -> String {
    use sha2::{Digest, Sha256};

    let digest = Sha256::digest(bytes);
    digest.iter().map(|byte| format!("{byte:02x}")).collect()
}

#[cfg(test)]
mod tests {
    use super::{collect_cursor_history, decode_cursor_resource_uri};
    use std::fs;
    use std::path::PathBuf;
    use std::time::{SystemTime, UNIX_EPOCH};

    fn temp_dir(label: &str) -> PathBuf {
        let nanos = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .expect("time")
            .as_nanos();
        let path = std::env::temp_dir().join(format!("xbp-cursor-history-{label}-{nanos}"));
        fs::create_dir_all(&path).expect("temp dir");
        path
    }

    #[test]
    fn decodes_cursor_file_resource_uri() {
        assert_eq!(
            decode_cursor_resource_uri(
                "file:///c%3A/Users/floris/Documents/GitHub/xbp/.github/workflows/publish-crates.yml"
            ),
            "c:\\Users\\floris\\Documents\\GitHub\\xbp\\.github\\workflows\\publish-crates.yml"
        );
    }

    #[test]
    fn collects_workspace_manifest_and_entry_content() {
        let root = temp_dir("history");
        let workspace = root.join("-2ac5da1");
        fs::create_dir_all(&workspace).expect("workspace dir");
        fs::write(
            workspace.join("entries.json"),
            r#"{
  "version": 1,
  "resource": "file:///c%3A/Users/floris/Documents/GitHub/athena/config.yaml",
  "entries": [{ "id": "wuKn.yaml", "timestamp": 1781529330927 }]
}"#,
        )
        .expect("entries.json");
        fs::write(workspace.join("wuKn.yaml"), "project: demo\n").expect("entry file");

        let collection = collect_cursor_history(Some(root.as_path()));
        assert_eq!(collection.workspace_count, 1);
        assert_eq!(collection.entry_count, 1);
        assert_eq!(
            collection.workspaces[0].resource,
            "c:\\Users\\floris\\Documents\\GitHub\\athena\\config.yaml"
        );
        assert_eq!(
            collection.workspaces[0].entries[0].content.as_deref(),
            Some("project: demo\n")
        );
    }
}