roder-context 0.1.1

Agentic software development tools and SDKs for Roder.
Documentation
use std::collections::HashSet;
use std::path::{Path, PathBuf};

use roder_api::context::{
    ContextBlock, ContextBlockKind, ContextPlan, ContextPlanner, ContextPlannerId, ContextQuery,
};
use roder_search::{DEFAULT_MAX_FILE_SIZE, SearchMode, SearchOptions, search_workspace};
use serde::Serialize;
use serde_json::json;

const MAX_CANDIDATES: usize = 5;
const MAX_FILES_SCANNED: usize = 2_000;
const MAX_CONTENT_BYTES: u64 = 8 * 1024;

#[derive(Debug, Clone)]
pub struct EntrypointContextPlanner {
    workspace: PathBuf,
}

impl EntrypointContextPlanner {
    pub fn new(workspace: impl Into<PathBuf>) -> Self {
        Self {
            workspace: workspace.into(),
        }
    }
}

#[async_trait::async_trait]
impl ContextPlanner for EntrypointContextPlanner {
    fn id(&self) -> ContextPlannerId {
        "entrypoint-context-planner".to_string()
    }

    async fn plan(
        &self,
        query: &ContextQuery,
        mut provider_blocks: Vec<ContextBlock>,
    ) -> anyhow::Result<ContextPlan> {
        let workspace = query
            .workspace
            .as_deref()
            .map(Path::new)
            .unwrap_or(&self.workspace);
        if !workspace.is_dir() {
            provider_blocks.sort_by_key(|block| std::cmp::Reverse(block.priority));
            return Ok(ContextPlan {
                blocks: provider_blocks,
            });
        }
        let candidates = discover_entrypoints(workspace, &query.prompt)?;
        if !candidates.is_empty() {
            provider_blocks.push(render_entrypoint_block(&candidates));
        }
        provider_blocks.sort_by_key(|block| std::cmp::Reverse(block.priority));
        Ok(ContextPlan {
            blocks: provider_blocks,
        })
    }
}

#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
struct EntrypointCandidate {
    path: String,
    score: i32,
    reasons: Vec<String>,
}

fn discover_entrypoints(root: &Path, prompt: &str) -> anyhow::Result<Vec<EntrypointCandidate>> {
    let tokens = prompt_tokens(prompt);
    if tokens.is_empty() {
        return Ok(Vec::new());
    }

    let changed = changed_paths(root);
    let search_hits = fresh_search_hits(root, &tokens);
    let mut candidates = Vec::new();
    let mut scanned = 0usize;
    visit_files(root, &mut |path| {
        if scanned >= MAX_FILES_SCANNED {
            return Ok(());
        }
        scanned += 1;
        if let Some(candidate) = score_file(root, path, &tokens, &changed, &search_hits)? {
            candidates.push(candidate);
        }
        Ok(())
    })?;
    candidates.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
    candidates.truncate(MAX_CANDIDATES);
    Ok(candidates)
}

fn score_file(
    root: &Path,
    path: &Path,
    tokens: &[String],
    changed: &HashSet<String>,
    search_hits: &HashSet<String>,
) -> anyhow::Result<Option<EntrypointCandidate>> {
    let rel = path
        .strip_prefix(root)
        .unwrap_or(path)
        .to_string_lossy()
        .replace('\\', "/");
    let rel_lower = rel.to_ascii_lowercase();
    let file_type_score = extension_score(path);
    let mut score = 0;
    let mut reasons = Vec::new();

    for token in tokens {
        if rel_lower.contains(token) {
            score += 8;
            reasons.push(format!("path matches `{token}`"));
        }
    }

    if changed.contains(&rel) {
        score += 10;
        reasons.push("recent git change".to_string());
    }

    if search_hits.contains(&rel) {
        score += 6;
        reasons.push("fresh search hit".to_string());
    }

    if likely_entrypoint_name(path) {
        score += 4;
        reasons.push("entrypoint-like filename".to_string());
    }

    if let Ok(metadata) = std::fs::metadata(path)
        && metadata.len() <= MAX_CONTENT_BYTES
        && let Ok(text) = std::fs::read_to_string(path)
    {
        let text = text.to_ascii_lowercase();
        for token in tokens {
            if text.contains(token) {
                score += 3;
                reasons.push(format!("bounded content matches `{token}`"));
            }
        }
    }

    Ok((score > 0).then_some(EntrypointCandidate {
        path: rel,
        score: score + file_type_score,
        reasons,
    }))
}

fn render_entrypoint_block(candidates: &[EntrypointCandidate]) -> ContextBlock {
    let mut text = String::from("Likely entry points:");
    for (index, candidate) in candidates.iter().enumerate() {
        let reason = candidate
            .reasons
            .first()
            .map(String::as_str)
            .unwrap_or("workspace evidence");
        text.push_str(&format!("\n{}. {} - {}", index + 1, candidate.path, reason));
    }

    ContextBlock {
        id: "entrypoint-context-planner".to_string(),
        kind: ContextBlockKind::EntrypointHint,
        text,
        priority: 90,
        token_estimate: None,
        metadata: json!({
            "planner": "entrypoint-context-planner",
            "candidate_count": candidates.len(),
            "candidates": candidates,
            "source": "fresh_filesystem_heuristics",
        }),
    }
}

fn visit_files(
    root: &Path,
    visitor: &mut dyn FnMut(&Path) -> anyhow::Result<()>,
) -> anyhow::Result<()> {
    if root.is_file() {
        return visitor(root);
    }
    for entry in std::fs::read_dir(root)? {
        let entry = entry?;
        let path = entry.path();
        let name = entry.file_name();
        let name = name.to_string_lossy();
        if entry.file_type()?.is_dir() {
            if matches!(name.as_ref(), ".git" | "target" | "node_modules" | ".roder") {
                continue;
            }
            visit_files(&path, visitor)?;
        } else {
            visitor(&path)?;
        }
    }
    Ok(())
}

fn prompt_tokens(prompt: &str) -> Vec<String> {
    let mut seen = HashSet::new();
    prompt
        .split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-')
        .map(|token| token.trim().to_ascii_lowercase())
        .filter(|token| token.len() >= 3 && !STOP_WORDS.contains(&token.as_str()))
        .filter(|token| seen.insert(token.clone()))
        .collect()
}

fn extension_score(path: &Path) -> i32 {
    match path.extension().and_then(|ext| ext.to_str()) {
        Some("rs" | "toml" | "md") => 2,
        Some("ts" | "tsx" | "js" | "jsx" | "py") => 1,
        _ => 0,
    }
}

fn likely_entrypoint_name(path: &Path) -> bool {
    let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
        return false;
    };
    matches!(
        name,
        "lib.rs" | "main.rs" | "mod.rs" | "runtime.rs" | "server.rs" | "index.ts" | "index.tsx"
    )
}

fn changed_paths(root: &Path) -> HashSet<String> {
    let Ok(output) = std::process::Command::new("git")
        .arg("-C")
        .arg(root)
        .arg("status")
        .arg("--short")
        .output()
    else {
        return HashSet::new();
    };
    if !output.status.success() {
        return HashSet::new();
    }
    String::from_utf8_lossy(&output.stdout)
        .lines()
        .filter_map(|line| line.get(3..))
        .map(|path| path.trim().replace('\\', "/"))
        .collect()
}

fn fresh_search_hits(root: &Path, tokens: &[String]) -> HashSet<String> {
    let mut hits = HashSet::new();
    for token in tokens.iter().take(6) {
        let mut options = SearchOptions::new(token.clone())
            .with_mode(SearchMode::Scan)
            .case_sensitive(false);
        options.max_file_size = DEFAULT_MAX_FILE_SIZE.min(MAX_CONTENT_BYTES);
        let Ok(results) = search_workspace(root, &options) else {
            continue;
        };
        for hit in results.matches.iter().take(100) {
            hits.insert(hit.path.to_string_lossy().replace('\\', "/"));
        }
    }
    hits
}

const STOP_WORDS: &[&str] = &[
    "the", "and", "for", "with", "that", "this", "from", "into", "where", "when", "what", "why",
    "how", "need", "needs", "find", "file", "files", "code", "task", "work",
];

#[cfg(test)]
mod tests {
    use super::*;
    use roder_api::context::ContextQuery;

    #[tokio::test]
    async fn entrypoint_planner_puts_relevant_file_in_top_five() {
        let root = test_workspace("entrypoint-top-five");
        write(
            &root,
            "crates/roder-core/src/runtime.rs",
            "fn route_tools() {}\n",
        );
        write(
            &root,
            "crates/roder-tools/src/files.rs",
            "fn read_file() {}\n",
        );
        write(&root, "README.md", "Roder docs\n");
        let planner = EntrypointContextPlanner::new(root.clone());

        let plan = planner
            .plan(&query("debug runtime tool routing"), Vec::new())
            .await
            .unwrap();

        let block = plan.blocks.first().unwrap();
        assert!(block.text.contains("crates/roder-core/src/runtime.rs"));
        assert_eq!(block.kind, ContextBlockKind::EntrypointHint);
        assert!(block.text.lines().nth(1).unwrap().contains("runtime.rs"));
        assert!(block.metadata["candidate_count"].as_u64().unwrap() <= MAX_CANDIDATES as u64);

        let _ = std::fs::remove_dir_all(root);
    }

    #[tokio::test]
    async fn entrypoint_planner_keeps_output_bounded_for_large_files() {
        let root = test_workspace("entrypoint-bounded");
        write(&root, "src/runtime.rs", &"runtime ".repeat(4_000));
        let planner = EntrypointContextPlanner::new(root.clone());

        let plan = planner
            .plan(&query("runtime entrypoint"), Vec::new())
            .await
            .unwrap();
        let block = plan.blocks.first().unwrap();

        assert!(block.text.len() < 1_000);
        assert!(!block.text.contains(&"runtime ".repeat(100)));

        let _ = std::fs::remove_dir_all(root);
    }

    fn query(prompt: &str) -> ContextQuery {
        ContextQuery {
            thread_id: "thread-a".to_string(),
            turn_id: "turn-a".to_string(),
            prompt: prompt.to_string(),
            workspace: None,
            token_budget: None,
        }
    }

    fn write(root: &Path, path: &str, text: &str) {
        let path = root.join(path);
        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
        std::fs::write(path, text).unwrap();
    }

    fn test_workspace(name: &str) -> PathBuf {
        let stamp = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let path = std::env::temp_dir().join(format!("roder-context-{name}-{stamp}"));
        let _ = std::fs::remove_dir_all(&path);
        std::fs::create_dir_all(&path).unwrap();
        path
    }
}