agent-source-repository 0.1.0

use std::collections::BTreeSet;

use super::context_core::{bounded_chunk_range, context_selection_order};
use super::search_core::{reasons_for_result, round_score, search_results_from_snapshot};
use super::{
    exact_shard, open_store, search_index_state, shell_word, validate_context_budget,
    validate_repo_name, AsrError, AsrResult, ContextOutput, ContextRelated, ContextSelection,
    MAX_CONTEXT_BUDGET, MAX_CONTEXT_CANDIDATES, MAX_CONTEXT_RELATED, MAX_TOP_K,
};
use crate::{Chunk, SearchResult};

pub(crate) fn context(query: &str, repo_name: &str, budget: usize) -> AsrResult<ContextOutput> {
    validate_repo_name(repo_name)?;
    let query = query.trim();
    if query.is_empty() {
        return Err(AsrError::new(
            "invalid_query",
            "Context query must not be empty",
        ));
    }
    exact_shard::validate_asr_query(query)?;
    validate_context_budget(budget, MAX_CONTEXT_BUDGET)?;

    let (paths, store) = open_store()?;
    let snapshot = super::require_ready_index_snapshot(&paths, &store, repo_name)?;
    let mut candidates =
        search_results_from_snapshot(query, &snapshot, MAX_CONTEXT_CANDIDATES.min(MAX_TOP_K))?;
    let orientation_fallback = candidates.is_empty();
    if candidates.is_empty() {
        candidates =
            orientation_context_candidates(&snapshot.chunks, MAX_CONTEXT_CANDIDATES.min(MAX_TOP_K));
        if candidates.is_empty() {
            return Err(AsrError::new(
                "context_no_results",
                format!("No indexed context candidates matched query in repo: {repo_name}"),
            ));
        }
    }
    let mut selected = Vec::new();
    let mut related_paths = BTreeSet::new();
    let mut used = 0usize;
    let mut seen_ranges = BTreeSet::new();

    for result in context_selection_order(&candidates) {
        if used >= budget {
            break;
        }
        let key = (
            result.chunk.file_path.clone(),
            result.chunk.start_line,
            result.chunk.end_line,
        );
        if seen_ranges.contains(&key) {
            continue;
        }
        let remaining = budget.saturating_sub(used);
        let Some((start_line, end_line, estimated_tokens)) =
            bounded_chunk_range(&result.chunk, &result.match_lines, remaining)
        else {
            if related_paths.len() < MAX_CONTEXT_RELATED {
                related_paths.insert(result.chunk.file_path.clone());
            }
            continue;
        };
        used += estimated_tokens;
        seen_ranges.insert(key);
        selected.push(ContextSelection {
            path: result.chunk.file_path.clone(),
            start_line,
            end_line,
            language: result.chunk.language.clone(),
            estimated_tokens,
            score: round_score(result.score),
            reasons: if orientation_fallback {
                vec!["repo_orientation_fallback".to_string()]
            } else {
                reasons_for_result(query, result)
            },
            match_lines: result
                .match_lines
                .iter()
                .filter(|line| line.line >= start_line && line.line <= end_line)
                .cloned()
                .collect(),
        });
    }

    if selected.is_empty() {
        return Err(AsrError::new(
            "context_budget_too_small",
            "Context budget is too small to include any matched file range",
        ));
    }

    for result in &candidates {
        if related_paths.len() >= MAX_CONTEXT_RELATED {
            break;
        }
        if selected
            .iter()
            .any(|item: &ContextSelection| item.path == result.chunk.file_path)
        {
            continue;
        }
        related_paths.insert(result.chunk.file_path.clone());
    }

    let related = related_paths
        .into_iter()
        .filter(|path| !selected.iter().any(|item| item.path == *path))
        .take(MAX_CONTEXT_RELATED)
        .map(|path| ContextRelated {
            path,
            reason: if orientation_fallback {
                "orientation_candidate_not_selected".to_string()
            } else {
                "search_candidate_not_selected".to_string()
            },
        })
        .collect::<Vec<_>>();

    let next_commands = selected
        .iter()
        .take(5)
        .map(|item| {
            format!(
                "asr read {} {} --lines {}:{} --json",
                shell_word(&snapshot.repo.name),
                shell_word(&item.path),
                item.start_line,
                item.end_line
            )
        })
        .collect::<Vec<_>>();

    Ok(ContextOutput {
        query: query.to_string(),
        repo: snapshot.repo.name,
        budget,
        estimated_tokens: used,
        analysis_level: if orientation_fallback {
            "indexed_orientation_budget_pack".to_string()
        } else {
            "indexed_search_budget_pack".to_string()
        },
        selected,
        related,
        next_commands,
        index_state: search_index_state(
            snapshot.state,
            false,
            snapshot.exact_shard.summary.clone(),
        ),
    })
}

fn orientation_context_candidates(chunks: &[Chunk], limit: usize) -> Vec<SearchResult> {
    let mut ranked = chunks.iter().collect::<Vec<_>>();
    ranked.sort_by(|left, right| {
        orientation_path_rank(&left.file_path)
            .cmp(&orientation_path_rank(&right.file_path))
            .then_with(|| left.file_path.cmp(&right.file_path))
            .then_with(|| left.start_line.cmp(&right.start_line))
            .then_with(|| left.end_line.cmp(&right.end_line))
    });

    let mut seen_files = BTreeSet::new();
    ranked
        .into_iter()
        .filter(|chunk| seen_files.insert(chunk.file_path.clone()))
        .take(limit)
        .enumerate()
        .map(|(index, chunk)| SearchResult {
            chunk: chunk.clone(),
            score: 1.0 / ((index + 1) as f64),
            match_lines: Vec::new(),
        })
        .collect()
}

fn orientation_path_rank(path: &str) -> u8 {
    let normalized = path.replace('\\', "/").to_ascii_lowercase();
    let file_name = normalized.rsplit('/').next().unwrap_or(normalized.as_str());

    if normalized.contains("/bin/") || matches!(file_name, "main.rs" | "lib.rs") {
        return 0;
    }
    if matches!(
        file_name,
        "cargo.toml" | "package.json" | "pyproject.toml" | "go.mod" | "readme.md"
    ) {
        return 1;
    }
    if normalized.starts_with("src/") {
        return 2;
    }
    if normalized.starts_with("tests/") {
        return 3;
    }
    if normalized.starts_with("scripts/") {
        return 4;
    }
    5
}