#![cfg_attr(coverage_nightly, coverage(off))]
use super::helpers::*;
use super::types::*;
use crate::services::semantic::chunk_code;
use ignore::WalkBuilder;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
fn has_coverage_off(content: &str) -> bool {
content.lines().any(|line| {
let t = line.trim();
t.starts_with("#!")
&& (t.contains("cfg_attr(coverage_nightly, coverage(off))")
|| t.contains("cfg_attr(coverage_nightly,coverage(off))"))
})
}
fn load_coverage_off_files(conn: &rusqlite::Connection) -> HashSet<String> {
let json: String = conn
.query_row(
"SELECT value FROM metadata WHERE key = 'coverage_off_files'",
[],
|r| r.get(0),
)
.unwrap_or_default();
serde_json::from_str(&json).unwrap_or_default()
}
impl AgentContextIndex {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn build(project_path: &Path) -> Result<Self, String> {
let project_root = project_path
.canonicalize()
.map_err(|e| format!("Invalid project path: {e}"))?;
let mut functions = Vec::with_capacity(20_000);
let mut file_count = 0;
let mut languages_seen = HashMap::new();
let mut file_checksums: HashMap<String, String> = HashMap::with_capacity(4_000);
let mut coverage_off_files = HashSet::new();
let mut read_buf = String::with_capacity(32 * 1024);
let _compile_commands = load_compile_commands(&project_root);
for entry in WalkBuilder::new(&project_root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.filter_entry(|e| !is_ignored_dir(e.path()))
.build()
.filter_map(|e| e.ok())
{
let path = entry.path();
if !path.is_file() {
continue;
}
let language = match detect_language(path) {
Some(lang) => lang,
None => continue,
};
read_buf.clear();
let content = match std::fs::File::open(path).and_then(|mut f| {
use std::io::Read;
f.read_to_string(&mut read_buf)
}) {
Ok(_) => read_buf.as_str(),
Err(_) => continue, };
let relative_path = path
.strip_prefix(&project_root)
.unwrap_or(path)
.to_string_lossy()
.to_string();
let checksum = compute_file_sha256(content);
file_checksums.insert(relative_path.clone(), checksum);
if has_coverage_off(content) {
coverage_off_files.insert(relative_path.clone());
}
let chunks = match chunk_code(content, language) {
Ok(c) => c,
Err(_) => continue, };
let lang_str = format!("{language:?}");
*languages_seen.entry(lang_str.clone()).or_insert(0) += 1;
for mut chunk in chunks {
use crate::services::semantic::ChunkType;
let definition_type = match &chunk.chunk_type {
ChunkType::Function => DefinitionType::Function,
ChunkType::Struct => DefinitionType::Struct,
ChunkType::Enum => DefinitionType::Enum,
ChunkType::Trait => DefinitionType::Trait,
ChunkType::TypeAlias => DefinitionType::TypeAlias,
_ => continue, };
if is_test_chunk(&chunk.chunk_name, &relative_path) {
continue;
}
let quality = extract_quality_metrics(&chunk, content);
let signature = chunk
.content
.lines()
.next()
.unwrap_or(&chunk.chunk_name)
.to_string();
let doc_comment = extract_doc_comment(content, chunk.start_line);
let entry = FunctionEntry {
file_path: relative_path.clone(),
function_name: std::mem::take(&mut chunk.chunk_name),
signature,
definition_type,
doc_comment,
source: std::mem::take(&mut chunk.content),
start_line: chunk.start_line,
end_line: chunk.end_line,
language: lang_str.clone(),
quality,
checksum: std::mem::take(&mut chunk.content_checksum),
commit_count: 0,
churn_score: 0.0,
clone_count: 0,
pattern_diversity: 0.0,
fault_annotations: Vec::new(),
linked_definition: None,
};
functions.push(entry);
}
file_count += 1;
if file_count % 500 == 0 {
eprint!("\r Indexing... {} files", file_count);
}
}
if file_count >= 500 {
eprintln!("\r Indexed {} files", file_count);
}
let indices = build_indices(&functions);
let (calls, called_by) = build_call_graph(&functions, &indices.name_index);
let graph_metrics = compute_graph_metrics(functions.len(), &calls, &called_by);
let name_frequency = compute_name_frequency(&indices.name_index, functions.len());
populate_cached_annotations(&mut functions, &indices.file_index, &project_root);
link_declarations_to_definitions(&mut functions);
let avg_tdg = if !functions.is_empty() {
functions.iter().map(|f| f.quality.tdg_score).sum::<f32>() / functions.len() as f32
} else {
0.0
};
let manifest = IndexManifest {
version: "1.4.0".to_string(), built_at: chrono::Utc::now().to_rfc3339(),
project_root: project_root.to_string_lossy().to_string(),
function_count: functions.len(),
file_count,
languages: languages_seen.keys().cloned().collect(),
avg_tdg_score: avg_tdg,
file_checksums,
last_incremental_changes: 0, };
let corpus_lower: Vec<String> = indices.corpus.iter().map(|d| d.to_lowercase()).collect();
Ok(Self {
functions,
name_index: indices.name_index,
file_index: indices.file_index,
corpus: indices.corpus,
corpus_lower,
name_frequency,
calls,
called_by,
graph_metrics,
project_root,
manifest,
db_path: None, coverage_off_files,
})
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn stats(&self) -> IndexStats {
let mut by_language: HashMap<String, usize> = HashMap::new();
let mut by_grade: HashMap<String, usize> = HashMap::new();
let mut total_complexity: u32 = 0;
for func in &self.functions {
*by_language.entry(func.language.clone()).or_default() += 1;
*by_grade.entry(func.quality.tdg_grade.clone()).or_default() += 1;
total_complexity += func.quality.complexity;
}
let avg_complexity = if !self.functions.is_empty() {
total_complexity as f32 / self.functions.len() as f32
} else {
0.0
};
IndexStats {
total_functions: self.functions.len(),
by_language,
by_grade,
avg_complexity,
index_size_bytes: (std::mem::size_of_val(&self.functions)
+ self.functions.len() * std::mem::size_of::<FunctionEntry>()
+ self.name_index.len() * 64 + self.file_index.len() * 64) as u64,
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn manifest(&self) -> &IndexManifest {
&self.manifest
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn get_by_name(&self, name: &str) -> Vec<&FunctionEntry> {
self.name_index
.get(name)
.map(|indices| indices.iter().map(|&i| &self.functions[i]).collect())
.unwrap_or_default()
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn get_by_file(&self, file_path: &str) -> Vec<&FunctionEntry> {
self.file_index
.get(file_path)
.map(|indices| indices.iter().map(|&i| &self.functions[i]).collect())
.unwrap_or_default()
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn all_functions(&self) -> &[FunctionEntry] {
&self.functions
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn corpus(&self) -> &[String] {
&self.corpus
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn project_root(&self) -> &Path {
&self.project_root
}
}
include!("build_workspace.rs");
include!("build_persistence.rs");
include!("build_incremental.rs");
include!("build_accessors.rs");
include!("build_helpers.rs");