Skip to main content

lean_ctx/core/
context_column.rs

1//! Context Column — the cortical column abstraction for data source pipelines.
2//!
3//! Each data source (filesystem, GitHub, Jira, DB, shell) is modeled as a
4//! neocortical column with four processing layers:
5//!
6//!   L4 (Input)      — raw data ingestion, normalization → ContentChunks
7//!   L2/3 (Predict)  — compression mode selection, predictive coding
8//!   L5 (Output)     — verification, budget check, quality gate
9//!   L6 (Feedback)   — top-down modulation from active task context
10//!
11//! Scientific basis: Mountcastle (Nature Rev Neurosci 2022) — every cortical
12//! column applies the same computational template to different input modalities.
13//!
14//! The trait is async-ready (returns Results) so that network-backed columns
15//! (GitHub API, DB queries) work naturally alongside local columns (filesystem).
16
17use crate::core::content_chunk::ContentChunk;
18
19/// Parameters flowing top-down from L6 to modulate processing.
20#[derive(Debug, Clone, Default)]
21pub struct ColumnContext {
22    /// Active task description (modulates saliency scoring).
23    pub task: Option<String>,
24    /// Current context pressure (0.0 = relaxed, 1.0 = critical).
25    pub pressure: f64,
26    /// Token budget remaining for this delivery cycle.
27    pub budget_tokens: Option<usize>,
28    /// Compression mode hint from the mode predictor.
29    pub compression_hint: Option<String>,
30}
31
32/// Result of L4 (input layer) processing.
33#[derive(Debug, Clone)]
34pub struct ColumnInput {
35    pub chunks: Vec<ContentChunk>,
36    pub raw_token_count: usize,
37}
38
39/// Result of L2/3 (prediction/compression layer) processing.
40#[derive(Debug, Clone)]
41pub struct ColumnCompressed {
42    pub chunks: Vec<ContentChunk>,
43    pub compressed_token_count: usize,
44    pub compression_ratio: f64,
45    pub mode_used: String,
46}
47
48/// Result of L5 (output/verification layer) processing.
49#[derive(Debug, Clone)]
50pub struct ColumnOutput {
51    pub chunks: Vec<ContentChunk>,
52    pub token_count: usize,
53    pub budget_ok: bool,
54    pub quality_score: f64,
55    /// Cross-source hints discovered during processing.
56    pub hints: Vec<CrossSourceHint>,
57}
58
59/// A lateral connection hint to related data in other columns.
60#[derive(Debug, Clone, serde::Serialize)]
61pub struct CrossSourceHint {
62    pub source_column: String,
63    pub target_uri: String,
64    pub relation: String,
65    pub confidence: f64,
66    pub summary: String,
67}
68
69/// The cortical column trait — uniform processing pipeline for any data source.
70///
71/// Each implementation represents one "column" in the cortex: filesystem,
72/// GitHub, Jira, PostgreSQL, etc. All columns share the same interface
73/// but process different input modalities.
74pub trait ContextColumn: Send + Sync {
75    /// Unique column identifier (matches provider ID for external columns).
76    fn id(&self) -> &'static str;
77
78    /// Human-readable name for discovery/logging.
79    fn display_name(&self) -> &'static str;
80
81    /// Whether this column is currently operational.
82    fn is_active(&self) -> bool;
83
84    /// **L4 (Input Layer):** Ingest raw data and produce ContentChunks.
85    ///
86    /// For filesystem: read file, parse AST, extract chunks.
87    /// For GitHub: fetch API, parse JSON, normalize to chunks.
88    /// For DB: query schema/data, structure as chunks.
89    fn ingest(&self, query: &str, ctx: &ColumnContext) -> Result<ColumnInput, String>;
90
91    /// **L2/3 (Predictive Compression):** Compress chunks based on task context.
92    ///
93    /// Uses the mode predictor (Thompson Sampling) to select the optimal
94    /// compression mode, then applies it. The prediction compares expected
95    /// vs actual information content (predictive coding).
96    fn compress(
97        &self,
98        input: &ColumnInput,
99        ctx: &ColumnContext,
100    ) -> Result<ColumnCompressed, String> {
101        let mode = ctx.compression_hint.as_deref().unwrap_or("full");
102        let raw = input.raw_token_count;
103        let compressed = match mode {
104            "map" => (raw as f64 * 0.3) as usize,
105            "signatures" => (raw as f64 * 0.15) as usize,
106            "aggressive" => (raw as f64 * 0.1) as usize,
107            _ => raw,
108        };
109        Ok(ColumnCompressed {
110            chunks: input.chunks.clone(),
111            compressed_token_count: compressed.max(1),
112            compression_ratio: if raw > 0 {
113                1.0 - (compressed as f64 / raw as f64)
114            } else {
115                0.0
116            },
117            mode_used: mode.to_string(),
118        })
119    }
120
121    /// **L5 (Output + Verification):** Validate output, check budget, discover hints.
122    ///
123    /// Ensures the compressed output meets quality thresholds and stays
124    /// within the token budget. Also discovers cross-source hints by
125    /// checking chunk references against the graph index.
126    fn verify(
127        &self,
128        compressed: &ColumnCompressed,
129        ctx: &ColumnContext,
130    ) -> Result<ColumnOutput, String> {
131        let budget_ok = ctx
132            .budget_tokens
133            .is_none_or(|b| compressed.compressed_token_count <= b);
134
135        Ok(ColumnOutput {
136            chunks: compressed.chunks.clone(),
137            token_count: compressed.compressed_token_count,
138            budget_ok,
139            quality_score: if compressed.compression_ratio > 0.95 {
140                0.5
141            } else {
142                1.0
143            },
144            hints: Vec::new(),
145        })
146    }
147
148    /// Full pipeline: L4 → L2/3 → L5, with L6 context flowing top-down.
149    ///
150    /// Convenience method that chains all layers. Override individual
151    /// layers for custom behavior per column.
152    fn process(&self, query: &str, ctx: &ColumnContext) -> Result<ColumnOutput, String> {
153        let input = self.ingest(query, ctx)?;
154        let compressed = self.compress(&input, ctx)?;
155        self.verify(&compressed, ctx)
156    }
157}
158
159// ---------------------------------------------------------------------------
160// Filesystem Column (built-in, always active)
161// ---------------------------------------------------------------------------
162
163/// The filesystem column — processes local files through the cortical pipeline.
164pub struct FilesystemColumn;
165
166impl ContextColumn for FilesystemColumn {
167    fn id(&self) -> &'static str {
168        "filesystem"
169    }
170
171    fn display_name(&self) -> &'static str {
172        "Local Filesystem"
173    }
174
175    fn is_active(&self) -> bool {
176        true
177    }
178
179    fn ingest(&self, query: &str, _ctx: &ColumnContext) -> Result<ColumnInput, String> {
180        let path = std::path::Path::new(query);
181        if !path.exists() {
182            return Err(format!("File not found: {query}"));
183        }
184
185        let content = std::fs::read_to_string(path).map_err(|e| format!("Read error: {e}"))?;
186
187        let token_count = content.split_whitespace().count();
188        let chunk = ContentChunk::from(crate::core::bm25_index::CodeChunk {
189            file_path: query.to_string(),
190            symbol_name: path
191                .file_name()
192                .and_then(|n| n.to_str())
193                .unwrap_or(query)
194                .to_string(),
195            kind: crate::core::bm25_index::ChunkKind::Module,
196            start_line: 1,
197            end_line: content.lines().count(),
198            content,
199            tokens: Vec::new(),
200            token_count,
201        });
202
203        Ok(ColumnInput {
204            chunks: vec![chunk],
205            raw_token_count: token_count,
206        })
207    }
208}
209
210/// Provider-backed column — wraps any `ContextProvider` as a cortical column.
211pub struct ProviderColumn {
212    provider: std::sync::Arc<dyn crate::core::providers::ContextProvider>,
213}
214
215impl ProviderColumn {
216    pub fn new(provider: std::sync::Arc<dyn crate::core::providers::ContextProvider>) -> Self {
217        Self { provider }
218    }
219}
220
221impl ContextColumn for ProviderColumn {
222    fn id(&self) -> &'static str {
223        self.provider.id()
224    }
225
226    fn display_name(&self) -> &'static str {
227        self.provider.display_name()
228    }
229
230    fn is_active(&self) -> bool {
231        self.provider.is_available()
232    }
233
234    fn ingest(&self, query: &str, _ctx: &ColumnContext) -> Result<ColumnInput, String> {
235        let (action, params) = parse_column_query(query)?;
236
237        let result = self.provider.execute(&action, &params)?;
238        let chunks = crate::core::providers::registry::result_to_chunks(&result);
239
240        let raw_tokens: usize = chunks.iter().map(|c| c.token_count).sum();
241
242        Ok(ColumnInput {
243            chunks,
244            raw_token_count: raw_tokens,
245        })
246    }
247}
248
249/// Parse a column query string into action + params.
250/// Format: `action[?key=value&key=value]`
251/// Example: `issues?state=open&limit=10`
252fn parse_column_query(
253    query: &str,
254) -> Result<(String, crate::core::providers::ProviderParams), String> {
255    let (action, query_str) = query.split_once('?').unwrap_or((query, ""));
256
257    let mut params = crate::core::providers::ProviderParams::default();
258    for pair in query_str.split('&') {
259        if pair.is_empty() {
260            continue;
261        }
262        let (key, value) = pair
263            .split_once('=')
264            .ok_or_else(|| format!("Invalid query param: {pair}"))?;
265        match key {
266            "state" => params.state = Some(value.to_string()),
267            "limit" => {
268                params.limit = value.parse().ok();
269            }
270            "project" => params.project = Some(value.to_string()),
271            "query" | "q" => params.query = Some(value.to_string()),
272            "id" => params.id = Some(value.to_string()),
273            _ => {}
274        }
275    }
276
277    Ok((action.to_string(), params))
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn filesystem_column_is_always_active() {
286        let col = FilesystemColumn;
287        assert!(col.is_active());
288        assert_eq!(col.id(), "filesystem");
289    }
290
291    #[test]
292    fn filesystem_column_ingest_nonexistent_file() {
293        let col = FilesystemColumn;
294        let ctx = ColumnContext::default();
295        let result = col.ingest("/nonexistent/path.rs", &ctx);
296        assert!(result.is_err());
297    }
298
299    #[test]
300    fn filesystem_column_ingest_real_file() {
301        let col = FilesystemColumn;
302        let ctx = ColumnContext::default();
303        let result = col.ingest(file!(), &ctx);
304        assert!(result.is_ok());
305        let input = result.unwrap();
306        assert!(!input.chunks.is_empty());
307        assert!(input.raw_token_count > 0);
308    }
309
310    #[test]
311    fn default_compress_preserves_chunks() {
312        let col = FilesystemColumn;
313        let input = ColumnInput {
314            chunks: vec![],
315            raw_token_count: 100,
316        };
317        let ctx = ColumnContext {
318            compression_hint: Some("map".to_string()),
319            ..Default::default()
320        };
321        let compressed = col.compress(&input, &ctx).unwrap();
322        assert_eq!(compressed.mode_used, "map");
323        assert!(compressed.compression_ratio > 0.0);
324    }
325
326    #[test]
327    fn verify_respects_budget() {
328        let col = FilesystemColumn;
329        let compressed = ColumnCompressed {
330            chunks: vec![],
331            compressed_token_count: 500,
332            compression_ratio: 0.5,
333            mode_used: "full".into(),
334        };
335
336        let ctx_ok = ColumnContext {
337            budget_tokens: Some(1000),
338            ..Default::default()
339        };
340        assert!(col.verify(&compressed, &ctx_ok).unwrap().budget_ok);
341
342        let ctx_over = ColumnContext {
343            budget_tokens: Some(100),
344            ..Default::default()
345        };
346        assert!(!col.verify(&compressed, &ctx_over).unwrap().budget_ok);
347    }
348
349    #[test]
350    fn parse_column_query_basic() {
351        let (action, params) = parse_column_query("issues?state=open&limit=10").unwrap();
352        assert_eq!(action, "issues");
353        assert_eq!(params.state.as_deref(), Some("open"));
354        assert_eq!(params.limit, Some(10));
355    }
356
357    #[test]
358    fn parse_column_query_no_params() {
359        let (action, params) = parse_column_query("issues").unwrap();
360        assert_eq!(action, "issues");
361        assert!(params.state.is_none());
362    }
363
364    #[test]
365    fn full_pipeline_works() {
366        let col = FilesystemColumn;
367        let ctx = ColumnContext::default();
368        let result = col.process(file!(), &ctx);
369        assert!(result.is_ok());
370        let output = result.unwrap();
371        assert!(output.token_count > 0);
372        assert!(output.budget_ok);
373    }
374}