Skip to main content

cgx_engine/docs/
incremental.rs

1//! Per-file slice-hash tracking, persisted to `~/.cgx/<repo_id>/docs_state.json`.
2//!
3//! A "slice hash" is a stable digest of just the parts of the graph that drive
4//! a single file's module note: the symbol IDs and line ranges, in/out edge counts,
5//! community membership, complexity bucket, and top owners. If two runs produce
6//! the same slice hash for a file, its note is byte-identical and can be skipped.
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::graph::FileSummary;
15
16#[derive(Debug, Clone, Serialize, Deserialize, Default)]
17pub struct DocsState {
18    pub generated_at: String,
19    pub files: HashMap<String, FileEntry>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FileEntry {
24    pub slice_hash: String,
25    pub last_gen_at: String,
26}
27
28/// Compute the slice hash for a single file from its [`FileSummary`].
29pub fn slice_hash(summary: &FileSummary) -> String {
30    let mut hasher = Sha256::new();
31    hasher.update(summary.path.as_bytes());
32    hasher.update(summary.community.to_be_bytes());
33
34    let mut symbol_ids: Vec<(String, u32, u32)> = summary
35        .symbols
36        .iter()
37        .map(|n| (n.id.clone(), n.line_start, n.line_end))
38        .collect();
39    symbol_ids.sort();
40    for (id, ls, le) in &symbol_ids {
41        hasher.update(id.as_bytes());
42        hasher.update(ls.to_be_bytes());
43        hasher.update(le.to_be_bytes());
44    }
45
46    hasher.update(summary.callers.len().to_be_bytes());
47    hasher.update(summary.callees.len().to_be_bytes());
48    hasher.update(summary.tests.len().to_be_bytes());
49
50    // Bucket complexity into deciles so trivial drift doesn't invalidate the hash.
51    let complexity_bucket = (summary.complexity / 10.0).floor() as i64;
52    hasher.update(complexity_bucket.to_be_bytes());
53
54    let mut owner_ids: Vec<&String> = summary.owners.iter().map(|(n, _)| n).collect();
55    owner_ids.sort();
56    owner_ids.truncate(3);
57    for o in owner_ids {
58        hasher.update(o.as_bytes());
59    }
60
61    format!("{:x}", hasher.finalize())
62}
63
64/// Read the persisted state file, returning a default if missing.
65pub fn load_state(repo_id: &str) -> DocsState {
66    match state_path(repo_id) {
67        Some(p) => std::fs::read_to_string(&p)
68            .ok()
69            .and_then(|s| serde_json::from_str(&s).ok())
70            .unwrap_or_default(),
71        None => DocsState::default(),
72    }
73}
74
75/// Persist the state file (creates parent dirs as needed).
76pub fn save_state(repo_id: &str, state: &DocsState) -> anyhow::Result<()> {
77    let Some(path) = state_path(repo_id) else {
78        return Ok(());
79    };
80    if let Some(parent) = path.parent() {
81        std::fs::create_dir_all(parent)?;
82    }
83    let json = serde_json::to_string_pretty(state)?;
84    std::fs::write(&path, json)?;
85    Ok(())
86}
87
88fn state_path(repo_id: &str) -> Option<PathBuf> {
89    Some(
90        dirs::home_dir()?
91            .join(".cgx")
92            .join(repo_id)
93            .join("docs_state.json"),
94    )
95}
96
97/// Convenience: build a fresh entry for the current time.
98pub fn entry_now(slice_hash: String) -> FileEntry {
99    FileEntry {
100        slice_hash,
101        last_gen_at: chrono::Utc::now().to_rfc3339(),
102    }
103}
104
105/// Whether a file should be regenerated in incremental mode.
106pub fn needs_regen(state: &DocsState, file_path: &Path, new_hash: &str) -> bool {
107    match state.files.get(&file_path.to_string_lossy().to_string()) {
108        Some(entry) => entry.slice_hash != new_hash,
109        None => true,
110    }
111}