use crate::core::content_chunk::ContentChunk;
#[derive(Debug, Clone, Default)]
pub struct ColumnContext {
pub task: Option<String>,
pub pressure: f64,
pub budget_tokens: Option<usize>,
pub compression_hint: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ColumnInput {
pub chunks: Vec<ContentChunk>,
pub raw_token_count: usize,
}
#[derive(Debug, Clone)]
pub struct ColumnCompressed {
pub chunks: Vec<ContentChunk>,
pub compressed_token_count: usize,
pub compression_ratio: f64,
pub mode_used: String,
}
#[derive(Debug, Clone)]
pub struct ColumnOutput {
pub chunks: Vec<ContentChunk>,
pub token_count: usize,
pub budget_ok: bool,
pub quality_score: f64,
pub hints: Vec<CrossSourceHint>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CrossSourceHint {
pub source_column: String,
pub target_uri: String,
pub relation: String,
pub confidence: f64,
pub summary: String,
}
pub trait ContextColumn: Send + Sync {
fn id(&self) -> &'static str;
fn display_name(&self) -> &'static str;
fn is_active(&self) -> bool;
fn ingest(&self, query: &str, ctx: &ColumnContext) -> Result<ColumnInput, String>;
fn compress(
&self,
input: &ColumnInput,
ctx: &ColumnContext,
) -> Result<ColumnCompressed, String> {
let mode = ctx.compression_hint.as_deref().unwrap_or("full");
let raw = input.raw_token_count;
let compressed = match mode {
"map" => (raw as f64 * 0.3) as usize,
"signatures" => (raw as f64 * 0.15) as usize,
"aggressive" => (raw as f64 * 0.1) as usize,
_ => raw,
};
Ok(ColumnCompressed {
chunks: input.chunks.clone(),
compressed_token_count: compressed.max(1),
compression_ratio: if raw > 0 {
1.0 - (compressed as f64 / raw as f64)
} else {
0.0
},
mode_used: mode.to_string(),
})
}
fn verify(
&self,
compressed: &ColumnCompressed,
ctx: &ColumnContext,
) -> Result<ColumnOutput, String> {
let budget_ok = ctx
.budget_tokens
.is_none_or(|b| compressed.compressed_token_count <= b);
Ok(ColumnOutput {
chunks: compressed.chunks.clone(),
token_count: compressed.compressed_token_count,
budget_ok,
quality_score: if compressed.compression_ratio > 0.95 {
0.5
} else {
1.0
},
hints: Vec::new(),
})
}
fn process(&self, query: &str, ctx: &ColumnContext) -> Result<ColumnOutput, String> {
let input = self.ingest(query, ctx)?;
let compressed = self.compress(&input, ctx)?;
self.verify(&compressed, ctx)
}
}
pub struct FilesystemColumn;
impl ContextColumn for FilesystemColumn {
fn id(&self) -> &'static str {
"filesystem"
}
fn display_name(&self) -> &'static str {
"Local Filesystem"
}
fn is_active(&self) -> bool {
true
}
fn ingest(&self, query: &str, _ctx: &ColumnContext) -> Result<ColumnInput, String> {
let path = std::path::Path::new(query);
if !path.exists() {
return Err(format!("File not found: {query}"));
}
let content = std::fs::read_to_string(path).map_err(|e| format!("Read error: {e}"))?;
let token_count = content.split_whitespace().count();
let chunk = ContentChunk::from(crate::core::bm25_index::CodeChunk {
file_path: query.to_string(),
symbol_name: path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(query)
.to_string(),
kind: crate::core::bm25_index::ChunkKind::Module,
start_line: 1,
end_line: content.lines().count(),
content,
tokens: Vec::new(),
token_count,
});
Ok(ColumnInput {
chunks: vec![chunk],
raw_token_count: token_count,
})
}
}
pub struct ProviderColumn {
provider: std::sync::Arc<dyn crate::core::providers::ContextProvider>,
}
impl ProviderColumn {
pub fn new(provider: std::sync::Arc<dyn crate::core::providers::ContextProvider>) -> Self {
Self { provider }
}
}
impl ContextColumn for ProviderColumn {
fn id(&self) -> &'static str {
self.provider.id()
}
fn display_name(&self) -> &'static str {
self.provider.display_name()
}
fn is_active(&self) -> bool {
self.provider.is_available()
}
fn ingest(&self, query: &str, _ctx: &ColumnContext) -> Result<ColumnInput, String> {
let (action, params) = parse_column_query(query)?;
let result = self.provider.execute(&action, ¶ms)?;
let chunks = crate::core::providers::registry::result_to_chunks(&result);
let raw_tokens: usize = chunks.iter().map(|c| c.token_count).sum();
Ok(ColumnInput {
chunks,
raw_token_count: raw_tokens,
})
}
}
fn parse_column_query(
query: &str,
) -> Result<(String, crate::core::providers::ProviderParams), String> {
let (action, query_str) = query.split_once('?').unwrap_or((query, ""));
let mut params = crate::core::providers::ProviderParams::default();
for pair in query_str.split('&') {
if pair.is_empty() {
continue;
}
let (key, value) = pair
.split_once('=')
.ok_or_else(|| format!("Invalid query param: {pair}"))?;
match key {
"state" => params.state = Some(value.to_string()),
"limit" => {
params.limit = value.parse().ok();
}
"project" => params.project = Some(value.to_string()),
"query" | "q" => params.query = Some(value.to_string()),
"id" => params.id = Some(value.to_string()),
_ => {}
}
}
Ok((action.to_string(), params))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn filesystem_column_is_always_active() {
let col = FilesystemColumn;
assert!(col.is_active());
assert_eq!(col.id(), "filesystem");
}
#[test]
fn filesystem_column_ingest_nonexistent_file() {
let col = FilesystemColumn;
let ctx = ColumnContext::default();
let result = col.ingest("/nonexistent/path.rs", &ctx);
assert!(result.is_err());
}
#[test]
fn filesystem_column_ingest_real_file() {
let col = FilesystemColumn;
let ctx = ColumnContext::default();
let result = col.ingest(file!(), &ctx);
assert!(result.is_ok());
let input = result.unwrap();
assert!(!input.chunks.is_empty());
assert!(input.raw_token_count > 0);
}
#[test]
fn default_compress_preserves_chunks() {
let col = FilesystemColumn;
let input = ColumnInput {
chunks: vec![],
raw_token_count: 100,
};
let ctx = ColumnContext {
compression_hint: Some("map".to_string()),
..Default::default()
};
let compressed = col.compress(&input, &ctx).unwrap();
assert_eq!(compressed.mode_used, "map");
assert!(compressed.compression_ratio > 0.0);
}
#[test]
fn verify_respects_budget() {
let col = FilesystemColumn;
let compressed = ColumnCompressed {
chunks: vec![],
compressed_token_count: 500,
compression_ratio: 0.5,
mode_used: "full".into(),
};
let ctx_ok = ColumnContext {
budget_tokens: Some(1000),
..Default::default()
};
assert!(col.verify(&compressed, &ctx_ok).unwrap().budget_ok);
let ctx_over = ColumnContext {
budget_tokens: Some(100),
..Default::default()
};
assert!(!col.verify(&compressed, &ctx_over).unwrap().budget_ok);
}
#[test]
fn parse_column_query_basic() {
let (action, params) = parse_column_query("issues?state=open&limit=10").unwrap();
assert_eq!(action, "issues");
assert_eq!(params.state.as_deref(), Some("open"));
assert_eq!(params.limit, Some(10));
}
#[test]
fn parse_column_query_no_params() {
let (action, params) = parse_column_query("issues").unwrap();
assert_eq!(action, "issues");
assert!(params.state.is_none());
}
#[test]
fn full_pipeline_works() {
let col = FilesystemColumn;
let ctx = ColumnContext::default();
let result = col.process(file!(), &ctx);
assert!(result.is_ok());
let output = result.unwrap();
assert!(output.token_count > 0);
assert!(output.budget_ok);
}
}