Skip to main content

lean_ctx/core/
context_ir.rs

1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3
4const STORE_FILENAME: &str = "context_ir_v1.json";
5
6// Hard bounds: IR is an observability artifact; keep it small and safe.
7const MAX_ITEMS: usize = 128;
8const MAX_ITEM_CONTENT_CHARS: usize = 4096;
9const MAX_TOTAL_CONTENT_CHARS: usize = 65_536;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ContextIrV1 {
13    pub schema_version: u32,
14    pub created_at: String,
15    pub updated_at: String,
16    pub next_seq: u64,
17    pub totals: ContextIrTotalsV1,
18    pub items: Vec<ContextIrItemV1>,
19}
20
21#[derive(Debug, Clone, Default, Serialize, Deserialize)]
22pub struct ContextIrTotalsV1 {
23    pub items_recorded: u64,
24    pub input_tokens: u64,
25    pub output_tokens: u64,
26    pub tokens_saved: u64,
27}
28
29#[derive(Debug, Clone, Default, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum ContextIrSourceKindV1 {
32    Read,
33    Shell,
34    Search,
35    Provider,
36    #[default]
37    Other,
38}
39
40#[derive(Debug, Clone, Default, Serialize, Deserialize)]
41pub struct ContextIrSourceV1 {
42    pub kind: ContextIrSourceKindV1,
43    pub tool: String,
44    pub client_name: Option<String>,
45    pub agent_id: Option<String>,
46    pub path: Option<String>,
47    pub command: Option<String>,
48    pub pattern: Option<String>,
49}
50
51#[derive(Debug, Clone, Default, Serialize, Deserialize)]
52pub struct ContextIrSafetyV1 {
53    /// True if redaction has been applied to any stored text fields.
54    pub redacted: bool,
55    /// Human hint for the boundary mode at the time of collection, if known.
56    pub boundary_mode: Option<String>,
57}
58
59#[derive(Debug, Clone, Default, Serialize, Deserialize)]
60pub struct ContextIrVerificationV1 {
61    pub content_md5: Option<String>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct ContextIrItemV1 {
66    pub seq: u64,
67    pub created_at: String,
68    pub source: ContextIrSourceV1,
69    pub input_tokens: usize,
70    pub output_tokens: usize,
71    pub duration_us: u64,
72    pub compression_ratio: f64,
73    pub content_excerpt: String,
74    pub truncated: bool,
75    pub safety: ContextIrSafetyV1,
76    pub verification: ContextIrVerificationV1,
77}
78
79#[derive(Debug, Clone)]
80pub struct RecordIrInput<'a> {
81    pub kind: ContextIrSourceKindV1,
82    pub tool: &'a str,
83    pub client_name: Option<String>,
84    pub agent_id: Option<String>,
85    pub path: Option<&'a str>,
86    pub command: Option<&'a str>,
87    pub pattern: Option<&'a str>,
88    pub input_tokens: usize,
89    pub output_tokens: usize,
90    pub duration: std::time::Duration,
91    pub content_excerpt: &'a str,
92}
93
94impl ContextIrV1 {
95    pub fn new() -> Self {
96        let now = chrono::Utc::now().to_rfc3339();
97        Self {
98            schema_version: crate::core::contracts::CONTEXT_IR_V1_SCHEMA_VERSION,
99            created_at: now.clone(),
100            updated_at: now,
101            next_seq: 1,
102            totals: ContextIrTotalsV1::default(),
103            items: Vec::new(),
104        }
105    }
106
107    pub fn record(&mut self, input: RecordIrInput<'_>) {
108        let now = chrono::Utc::now().to_rfc3339();
109
110        let (content_excerpt, truncated) = bound_and_redact_excerpt(input.content_excerpt);
111        let command = input.command.map(crate::core::redaction::redact_text);
112        let pattern = input.pattern.map(crate::core::redaction::redact_text);
113
114        let ratio = if input.input_tokens == 0 {
115            1.0
116        } else {
117            input.output_tokens as f64 / input.input_tokens as f64
118        };
119
120        let content_md5 = if content_excerpt.trim().is_empty() {
121            None
122        } else {
123            Some(crate::core::hasher::hash_str(&content_excerpt))
124        };
125
126        let item = ContextIrItemV1 {
127            seq: self.next_seq,
128            created_at: now.clone(),
129            source: ContextIrSourceV1 {
130                kind: input.kind,
131                tool: input.tool.to_string(),
132                client_name: input.client_name,
133                agent_id: input.agent_id,
134                path: input.path.map(std::string::ToString::to_string),
135                command,
136                pattern,
137            },
138            input_tokens: input.input_tokens,
139            output_tokens: input.output_tokens,
140            duration_us: input.duration.as_micros() as u64,
141            compression_ratio: ratio,
142            content_excerpt,
143            truncated,
144            safety: ContextIrSafetyV1 {
145                redacted: true,
146                boundary_mode: Some(format!(
147                    "{:?}",
148                    crate::core::io_boundary::boundary_mode_effective(
149                        &crate::core::roles::active_role()
150                    )
151                )),
152            },
153            verification: ContextIrVerificationV1 { content_md5 },
154        };
155
156        self.next_seq = self.next_seq.saturating_add(1);
157        self.updated_at = now;
158
159        self.totals.items_recorded = self.totals.items_recorded.saturating_add(1);
160        self.totals.input_tokens = self
161            .totals
162            .input_tokens
163            .saturating_add(item.input_tokens as u64);
164        self.totals.output_tokens = self
165            .totals
166            .output_tokens
167            .saturating_add(item.output_tokens as u64);
168        self.totals.tokens_saved = self
169            .totals
170            .tokens_saved
171            .saturating_add(item.input_tokens.saturating_sub(item.output_tokens) as u64);
172
173        self.items.push(item);
174        self.prune_in_place();
175    }
176
177    fn prune_in_place(&mut self) {
178        while self.items.len() > MAX_ITEMS
179            || total_content_chars(&self.items) > MAX_TOTAL_CONTENT_CHARS
180        {
181            if self.items.is_empty() {
182                break;
183            }
184            self.items.remove(0);
185        }
186    }
187
188    pub fn save(&self) {
189        if let Ok(dir) = crate::core::data_dir::lean_ctx_data_dir() {
190            let path = dir.join(STORE_FILENAME);
191            if let Ok(json) = serde_json::to_string_pretty(self) {
192                let json = crate::core::redaction::redact_text(&json);
193                let _ = std::fs::write(path, json);
194            }
195        }
196    }
197
198    pub fn load() -> Self {
199        crate::core::data_dir::lean_ctx_data_dir()
200            .ok()
201            .map(|d| d.join(STORE_FILENAME))
202            .and_then(|p| std::fs::read_to_string(p).ok())
203            .and_then(|s| serde_json::from_str(&s).ok())
204            .unwrap_or_default()
205    }
206}
207
208impl Default for ContextIrV1 {
209    fn default() -> Self {
210        Self::new()
211    }
212}
213
214pub fn write_project_context_ir(
215    project_root: &Path,
216    ir: &ContextIrV1,
217    filename: Option<&str>,
218) -> Result<PathBuf, String> {
219    let proofs_dir = project_root.join(".lean-ctx").join("proofs");
220    std::fs::create_dir_all(&proofs_dir).map_err(|e| e.to_string())?;
221
222    let ts = chrono::Utc::now().format("%Y-%m-%d_%H%M%S");
223    let name = filename.map_or_else(
224        || format!("context-ir-v1_{ts}.json"),
225        std::string::ToString::to_string,
226    );
227    let path = proofs_dir.join(name);
228
229    let json = serde_json::to_string_pretty(ir).map_err(|e| e.to_string())?;
230    let json = crate::core::redaction::redact_text(&json);
231    crate::config_io::write_atomic(&path, &json)?;
232    Ok(path)
233}
234
235fn bound_and_redact_excerpt(s: &str) -> (String, bool) {
236    let redacted = crate::core::redaction::redact_text(s);
237    let mut out = redacted;
238    let truncated = out.chars().count() > MAX_ITEM_CONTENT_CHARS;
239    if truncated {
240        out = out.chars().take(MAX_ITEM_CONTENT_CHARS).collect();
241    }
242    (out, truncated)
243}
244
245fn total_content_chars(items: &[ContextIrItemV1]) -> usize {
246    items
247        .iter()
248        .map(|i| i.content_excerpt.chars().count())
249        .sum()
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn store_is_bounded() {
258        let mut ir = ContextIrV1::new();
259        let big = "x".repeat(MAX_ITEM_CONTENT_CHARS + 10);
260        for _ in 0..(MAX_ITEMS + 10) {
261            ir.record(RecordIrInput {
262                kind: ContextIrSourceKindV1::Read,
263                tool: "ctx_read",
264                client_name: None,
265                agent_id: None,
266                path: Some("src/lib.rs"),
267                command: None,
268                pattern: None,
269                input_tokens: 100,
270                output_tokens: 10,
271                duration: std::time::Duration::from_millis(1),
272                content_excerpt: &big,
273            });
274        }
275        assert!(ir.items.len() <= MAX_ITEMS);
276        assert!(total_content_chars(&ir.items) <= MAX_TOTAL_CONTENT_CHARS);
277    }
278
279    #[test]
280    fn excerpt_is_truncated() {
281        let (s, truncated) = bound_and_redact_excerpt(&"x".repeat(MAX_ITEM_CONTENT_CHARS + 1));
282        assert!(truncated);
283        assert!(s.chars().count() <= MAX_ITEM_CONTENT_CHARS);
284    }
285}