use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use color_eyre::eyre::{ContextCompat, Result, WrapErr, ensure};
use knowdit_kg_model::{ExtractedFunction, ExtractedSemantic, db::project as project_model};
use sea_orm::{
ActiveValue::Set, ConnectionTrait, Database, DatabaseBackend, DatabaseConnection, EntityTrait,
QueryOrder, Schema, TransactionTrait,
};
use crate::cg::{
CallGraph, Contract, FileChunk, Function, FunctionCall, Interface, location_from_db,
location_to_db,
};
pub use crate::db::code_gen::CodeGenStatus;
pub use crate::db::harness_run::RunKind;
use crate::db::{
code_gen as code_gen_model, contract as contract_model,
contract_functions as contract_functions_model, contract_inherit as contract_inherit_model,
contract_variable as contract_variable_model, function as function_model,
function_call as function_call_model, function_state_variable as function_state_variable_model,
harness_run as harness_run_model, historical_finding as historical_finding_model,
historical_semantic as historical_semantic_model,
historical_semantic_finding_link as historical_semantic_finding_link_model,
interface as interface_model, interface_functions as interface_functions_model,
line_coverage as line_coverage_model, project_semantic as project_semantic_model,
project_semantic_function as project_semantic_function_model, reflection as reflection_model,
semantic_matched as semantic_matched_model, specification as specification_model,
state_variable as state_variable_model,
};
use crate::storage::{
ContractInherit, ContractVariable, FunctionStateVariable, StateVariable, StorageDotOptions,
StorageGraph, dot_escape,
};
#[derive(Clone)]
pub struct RepoDatabase {
db: DatabaseConnection,
url: String,
path: Option<PathBuf>,
}
impl RepoDatabase {
pub fn default_sqlite_path(repo_root: &Path, override_path: Option<PathBuf>) -> PathBuf {
override_path.unwrap_or_else(|| repo_root.join("knowdit.sqlite3"))
}
pub fn sqlite_url_for(path: &Path) -> String {
format!("sqlite://{}?mode=rwc", path.display())
}
async fn connect(url: &str) -> Result<DatabaseConnection> {
let db = Database::connect(url)
.await
.wrap_err_with(|| format!("failed to connect to project database {url}"))?;
if db.get_database_backend() == DatabaseBackend::Sqlite {
db.execute_unprepared("PRAGMA journal_mode=WAL;")
.await
.wrap_err("failed to enable SQLite WAL mode for project database")?;
db.execute_unprepared("PRAGMA foreign_keys=ON;")
.await
.wrap_err("failed to enable SQLite foreign keys for project database")?;
}
Ok(db)
}
pub async fn open_url(url: impl Into<String>) -> Result<Self> {
let url = url.into();
let db = Self::connect(&url).await?;
Ok(Self {
db,
url,
path: None,
})
}
pub async fn open_sqlite(path: PathBuf) -> Result<Self> {
let url = Self::sqlite_url_for(&path);
let db = Self::connect(&url).await?;
Ok(Self {
db,
url,
path: Some(path),
})
}
pub fn connection(&self) -> &DatabaseConnection {
&self.db
}
pub fn url(&self) -> &str {
&self.url
}
pub fn path(&self) -> Option<&Path> {
self.path.as_deref()
}
pub fn redacted_url(&self) -> String {
let Ok(mut parsed) = url::Url::parse(&self.url) else {
return "<unparseable database url>".to_string();
};
if !parsed.username().is_empty() {
let _ = parsed.set_username("");
}
if parsed.password().is_some() {
let _ = parsed.set_password(None);
}
parsed.to_string()
}
pub async fn init_schema(&self) -> Result<()> {
let schema = Schema::new(self.db.get_database_backend());
let tables = vec![
schema.create_table_from_entity(project_semantic_model::Entity),
schema.create_table_from_entity(project_semantic_function_model::Entity),
schema.create_table_from_entity(contract_model::Entity),
schema.create_table_from_entity(interface_model::Entity),
schema.create_table_from_entity(function_model::Entity),
schema.create_table_from_entity(contract_functions_model::Entity),
schema.create_table_from_entity(interface_functions_model::Entity),
schema.create_table_from_entity(function_call_model::Entity),
schema.create_table_from_entity(state_variable_model::Entity),
schema.create_table_from_entity(contract_inherit_model::Entity),
schema.create_table_from_entity(contract_variable_model::Entity),
schema.create_table_from_entity(function_state_variable_model::Entity),
schema.create_table_from_entity(historical_semantic_model::Entity),
schema.create_table_from_entity(historical_finding_model::Entity),
schema.create_table_from_entity(historical_semantic_finding_link_model::Entity),
schema.create_table_from_entity(semantic_matched_model::Entity),
schema.create_table_from_entity(specification_model::Entity),
schema.create_table_from_entity(code_gen_model::Entity),
schema.create_table_from_entity(harness_run_model::Entity),
schema.create_table_from_entity(reflection_model::Entity),
schema.create_table_from_entity(line_coverage_model::Entity),
schema.create_table_from_entity(project_model::Entity),
];
for mut table in tables {
table.if_not_exists();
self.db
.execute(&table)
.await
.wrap_err("failed to create project database schema")?;
}
Ok(())
}
pub async fn ensure_project(&self, project_name: &str) -> Result<()> {
let projects = project_model::Entity::find()
.order_by_asc(project_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load project identity rows from project database")?;
ensure!(
projects.len() <= 1,
"project database contains multiple projects ({}): {}; use one project database per project",
projects.len(),
projects
.iter()
.map(|project| format!("{}:{}", project.id, project.name))
.collect::<Vec<_>>()
.join(", ")
);
if let Some(project) = projects.first() {
ensure!(
project.name == project_name,
"project database belongs to project '{}' (id={}) but current project is '{}'; use a different --database-url or clear the project database",
project.name,
project.id,
project_name
);
return Ok(());
}
project_model::Entity::insert(project_model::ActiveModel {
name: Set(project_name.to_string()),
status: Set("pending".to_string()),
..Default::default()
})
.exec(&self.db)
.await
.wrap_err_with(|| {
format!("failed to write project identity '{project_name}' to project database")
})?;
Ok(())
}
pub async fn load_project_semantics(&self) -> Result<Vec<ExtractedSemantic>> {
let semantic_rows = project_semantic_model::Entity::find()
.order_by_asc(project_semantic_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load project_semantic rows")?;
let function_rows = project_semantic_function_model::Entity::find()
.order_by_asc(project_semantic_function_model::Column::SemanticId)
.order_by_asc(project_semantic_function_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load project_semantic_function rows")?;
let mut semantics = semantic_rows
.into_iter()
.map(|row| {
(
row.id,
ExtractedSemantic {
name: row.name,
category: row.category,
definition: row.definition,
description: row.description,
functions: Vec::new(),
},
)
})
.collect::<BTreeMap<_, _>>();
for row in function_rows {
let semantic = semantics.get_mut(&row.semantic_id).wrap_err_with(|| {
format!(
"project_semantic_function row {} references missing project semantic {}",
row.id, row.semantic_id
)
})?;
semantic.functions.push(ExtractedFunction {
name: row.name,
contract: row.contract,
signature: row.signature,
});
}
Ok(semantics.into_values().collect())
}
pub async fn replace_project_semantics(&self, semantics: &[ExtractedSemantic]) -> Result<()> {
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin project semantic write transaction")?;
project_semantic_function_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear project_semantic_function rows")?;
project_semantic_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear project_semantic rows")?;
let mut next_function_id: i32 = 1;
for (index, semantic) in semantics.iter().enumerate() {
let semantic_id = (index + 1) as i32;
project_semantic_model::Entity::insert(project_semantic_model::ActiveModel {
id: Set(semantic_id),
name: Set(semantic.name.clone()),
category: Set(semantic.category),
definition: Set(semantic.definition.clone()),
description: Set(semantic.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert project semantic {}", semantic_id))?;
for function in &semantic.functions {
let function_id = next_function_id;
next_function_id += 1;
project_semantic_function_model::Entity::insert(
project_semantic_function_model::ActiveModel {
id: Set(function_id),
semantic_id: Set(semantic_id),
name: Set(function.name.clone()),
contract: Set(function.contract.clone()),
signature: Set(function.signature.clone()),
},
)
.exec(&txn)
.await
.wrap_err_with(|| {
format!("failed to insert project semantic function {}", function_id)
})?;
}
}
txn.commit()
.await
.wrap_err("failed to commit project semantic write transaction")?;
Ok(())
}
pub async fn load_call_graph(&self) -> Result<CallGraph> {
let db = &self.db;
let contract_rows = contract_model::Entity::find()
.order_by_asc(contract_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load contracts from database")?;
let interface_rows = interface_model::Entity::find()
.order_by_asc(interface_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load interfaces from database")?;
let function_rows = function_model::Entity::find()
.order_by_asc(function_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load functions from database")?;
let call_rows = function_call_model::Entity::find()
.order_by_asc(function_call_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load function calls from database")?;
let contract_function_rows = contract_functions_model::Entity::find()
.order_by_asc(contract_functions_model::Column::ContractId)
.order_by_asc(contract_functions_model::Column::FunctionId)
.all(db)
.await
.wrap_err("failed to load contract/function links from database")?;
let interface_function_rows = interface_functions_model::Entity::find()
.order_by_asc(interface_functions_model::Column::InterfaceId)
.order_by_asc(interface_functions_model::Column::FunctionId)
.all(db)
.await
.wrap_err("failed to load interface/function links from database")?;
let mut contracts = BTreeMap::new();
for row in contract_rows {
let loc = location_from_db(
"contract",
row.id,
row.start_line,
row.start_column,
row.end_line,
row.end_column,
)?;
contracts.insert(
row.id,
Contract {
id: row.id,
name: row.name,
relative_file_path: PathBuf::from(row.relative_file_path),
chunk: FileChunk {
loc,
content: row.content,
},
functions: Vec::new(),
description: row.description,
},
);
}
let mut interfaces = BTreeMap::new();
for row in interface_rows {
let loc = location_from_db(
"interface",
row.id,
row.start_line,
row.start_column,
row.end_line,
row.end_column,
)?;
interfaces.insert(
row.id,
Interface {
id: row.id,
name: row.name,
relative_file_path: PathBuf::from(row.relative_file_path),
chunk: FileChunk {
loc,
content: row.content,
},
functions: Vec::new(),
description: row.description,
},
);
}
let mut functions = BTreeMap::new();
for row in function_rows {
let loc = location_from_db(
"function",
row.id,
row.start_line,
row.start_column,
row.end_line,
row.end_column,
)?;
functions.insert(
row.id,
Function {
id: row.id,
name: row.name,
args: row.args,
relative_file_path: PathBuf::from(row.relative_file_path),
loc,
content: row.content,
calls: Vec::new(),
description: row.description,
},
);
}
for row in call_rows {
ensure!(
functions.contains_key(&row.rhs_id),
"function_call row {} references missing rhs function {}",
row.id,
row.rhs_id
);
let call = FunctionCall {
id: row.id,
from_id: row.lhs_id,
to_id: row.rhs_id,
description: row.description,
};
let function = functions.get_mut(&row.lhs_id).wrap_err_with(|| {
format!(
"function_call row {} references missing lhs function {}",
row.id, row.lhs_id
)
})?;
function.calls.push(call);
}
for row in contract_function_rows {
let function = functions.get(&row.function_id).cloned().wrap_err_with(|| {
format!(
"contract_functions row {} references missing function {}",
row.id, row.function_id
)
})?;
let contract = contracts.get_mut(&row.contract_id).wrap_err_with(|| {
format!(
"contract_functions row {} references missing contract {}",
row.id, row.contract_id
)
})?;
contract.functions.push(function);
}
for row in interface_function_rows {
let function = functions.get(&row.function_id).cloned().wrap_err_with(|| {
format!(
"interface_functions row {} references missing function {}",
row.id, row.function_id
)
})?;
let interface = interfaces.get_mut(&row.interface_id).wrap_err_with(|| {
format!(
"interface_functions row {} references missing interface {}",
row.id, row.interface_id
)
})?;
interface.functions.push(function);
}
for contract in contracts.values_mut() {
contract.functions.sort_by_key(|function| {
(
function.loc.start_line,
function.loc.start_column,
function.id,
)
});
}
for interface in interfaces.values_mut() {
interface.functions.sort_by_key(|function| {
(
function.loc.start_line,
function.loc.start_column,
function.id,
)
});
}
Ok(CallGraph {
contracts,
interfaces,
})
}
pub async fn write_call_graph(&self, call_graph: &CallGraph) -> Result<()> {
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin callgraph write transaction")?;
function_call_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear function_call rows")?;
contract_functions_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear contract_functions rows")?;
interface_functions_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear interface_functions rows")?;
function_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear function rows")?;
contract_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear contract rows")?;
interface_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear interface rows")?;
let mut inserted_function_ids = BTreeSet::new();
let mut contract_function_id = 1;
let mut interface_function_id = 1;
for contract in call_graph.contracts.values() {
let (start_line, start_column, end_line, end_column) =
location_to_db("contract", contract.id, &contract.chunk.loc)?;
contract_model::Entity::insert(contract_model::ActiveModel {
id: Set(contract.id),
name: Set(contract.name.clone()),
relative_file_path: Set(contract.relative_file_path.to_string_lossy().to_string()),
start_line: Set(start_line),
start_column: Set(start_column),
end_line: Set(end_line),
end_column: Set(end_column),
content: Set(contract.chunk.content.clone()),
description: Set(contract.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert contract {}", contract.id))?;
for function in &contract.functions {
if inserted_function_ids.insert(function.id) {
let (start_line, start_column, end_line, end_column) =
location_to_db("function", function.id, &function.loc)?;
function_model::Entity::insert(function_model::ActiveModel {
id: Set(function.id),
name: Set(function.name.clone()),
args: Set(function.args.clone()),
relative_file_path: Set(function
.relative_file_path
.to_string_lossy()
.to_string()),
start_line: Set(start_line),
start_column: Set(start_column),
end_line: Set(end_line),
end_column: Set(end_column),
content: Set(function.content.clone()),
description: Set(function.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert function {}", function.id))?;
}
contract_functions_model::Entity::insert(contract_functions_model::ActiveModel {
id: Set(contract_function_id),
contract_id: Set(contract.id),
function_id: Set(function.id),
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to link contract {} to function {}",
contract.id, function.id
)
})?;
contract_function_id += 1;
}
}
for interface in call_graph.interfaces.values() {
let (start_line, start_column, end_line, end_column) =
location_to_db("interface", interface.id, &interface.chunk.loc)?;
interface_model::Entity::insert(interface_model::ActiveModel {
id: Set(interface.id),
name: Set(interface.name.clone()),
relative_file_path: Set(interface.relative_file_path.to_string_lossy().to_string()),
start_line: Set(start_line),
start_column: Set(start_column),
end_line: Set(end_line),
end_column: Set(end_column),
content: Set(interface.chunk.content.clone()),
description: Set(interface.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert interface {}", interface.id))?;
for function in &interface.functions {
if inserted_function_ids.insert(function.id) {
let (start_line, start_column, end_line, end_column) =
location_to_db("function", function.id, &function.loc)?;
function_model::Entity::insert(function_model::ActiveModel {
id: Set(function.id),
name: Set(function.name.clone()),
args: Set(function.args.clone()),
relative_file_path: Set(function
.relative_file_path
.to_string_lossy()
.to_string()),
start_line: Set(start_line),
start_column: Set(start_column),
end_line: Set(end_line),
end_column: Set(end_column),
content: Set(function.content.clone()),
description: Set(function.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert function {}", function.id))?;
}
interface_functions_model::Entity::insert(interface_functions_model::ActiveModel {
id: Set(interface_function_id),
interface_id: Set(interface.id),
function_id: Set(function.id),
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to link interface {} to function {}",
interface.id, function.id
)
})?;
interface_function_id += 1;
}
}
for contract in call_graph.contracts.values() {
for function in &contract.functions {
for call in &function.calls {
function_call_model::Entity::insert(function_call_model::ActiveModel {
id: Set(call.id),
lhs_id: Set(call.from_id),
rhs_id: Set(call.to_id),
description: Set(call.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert function_call {}", call.id))?;
}
}
}
for interface in call_graph.interfaces.values() {
for function in &interface.functions {
for call in &function.calls {
function_call_model::Entity::insert(function_call_model::ActiveModel {
id: Set(call.id),
lhs_id: Set(call.from_id),
rhs_id: Set(call.to_id),
description: Set(call.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert function_call {}", call.id))?;
}
}
}
txn.commit()
.await
.wrap_err("failed to commit callgraph write transaction")?;
Ok(())
}
pub async fn load_storage_graph(&self) -> Result<StorageGraph> {
let db = &self.db;
let mut graph = StorageGraph::default();
let svs = state_variable_model::Entity::find()
.order_by_asc(state_variable_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load state_variable rows")?;
for sv in svs {
graph.state_variables.insert(
sv.id,
StateVariable {
id: sv.id,
name: sv.name,
type_name: sv.type_name,
relative_file_path: PathBuf::from(sv.relative_file_path),
loc: crate::cg::FileLocation {
start_line: sv.start_line as usize,
start_column: sv.start_column as usize,
end_line: sv.end_line as usize,
end_column: sv.end_column as usize,
},
content: sv.content,
},
);
}
let cvs = contract_variable_model::Entity::find()
.order_by_asc(contract_variable_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load contract_variable rows")?;
for cv in cvs {
graph.contract_variables.push(ContractVariable {
contract_id: cv.contract_id,
state_variable_id: cv.state_variable_id,
description: cv.description,
});
}
let inherits = contract_inherit_model::Entity::find()
.all(db)
.await
.wrap_err("failed to load contract_inherit rows")?;
for inh in inherits {
graph.contract_inherits.push(ContractInherit {
contract_id: inh.contract_id,
inherited_id: inh.inherited_id,
});
}
let fsvs = function_state_variable_model::Entity::find()
.order_by_asc(function_state_variable_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load function_state_variable rows")?;
for fsv in fsvs {
graph.function_state_variables.push(FunctionStateVariable {
function_id: fsv.function_id,
state_variable_id: fsv.state_variable_id,
is_write: fsv.is_write,
description: fsv.description,
});
}
Ok(graph)
}
pub async fn write_storage_graph(&self, storage: &StorageGraph) -> Result<()> {
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin storage write transaction")?;
function_state_variable_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear function_state_variable rows")?;
contract_variable_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear contract_variable rows")?;
contract_inherit_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear contract_inherit rows")?;
state_variable_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear state_variable rows")?;
for sv in storage.state_variables.values() {
state_variable_model::Entity::insert(state_variable_model::ActiveModel {
id: Set(sv.id),
name: Set(sv.name.clone()),
type_name: Set(sv.type_name.clone()),
relative_file_path: Set(sv.relative_file_path.to_string_lossy().to_string()),
start_line: Set(sv.loc.start_line as i32),
start_column: Set(sv.loc.start_column as i32),
end_line: Set(sv.loc.end_line as i32),
end_column: Set(sv.loc.end_column as i32),
content: Set(sv.content.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert state_variable {}", sv.id))?;
}
for inh in &storage.contract_inherits {
contract_inherit_model::Entity::insert(contract_inherit_model::ActiveModel {
contract_id: Set(inh.contract_id),
inherited_id: Set(inh.inherited_id),
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert contract_inherit ({}, {})",
inh.contract_id, inh.inherited_id
)
})?;
}
let mut next_cv_id = 1i32;
for cv in &storage.contract_variables {
contract_variable_model::Entity::insert(contract_variable_model::ActiveModel {
id: Set(next_cv_id),
contract_id: Set(cv.contract_id),
state_variable_id: Set(cv.state_variable_id),
description: Set(cv.description.clone()),
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert contract_variable contract={} sv={}",
cv.contract_id, cv.state_variable_id
)
})?;
next_cv_id += 1;
}
let mut next_fsv_id = 1i32;
for fsv in &storage.function_state_variables {
function_state_variable_model::Entity::insert(
function_state_variable_model::ActiveModel {
id: Set(next_fsv_id),
function_id: Set(fsv.function_id),
state_variable_id: Set(fsv.state_variable_id),
is_write: Set(fsv.is_write),
description: Set(fsv.description.clone()),
},
)
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert function_state_variable function={} sv={}",
fsv.function_id, fsv.state_variable_id
)
})?;
next_fsv_id += 1;
}
txn.commit()
.await
.wrap_err("failed to commit storage write transaction")?;
Ok(())
}
pub async fn export_storage_dot(
&self,
storage: &StorageGraph,
options: StorageDotOptions,
) -> Result<String> {
let db = &self.db;
let contracts = contract_model::Entity::find()
.order_by_asc(contract_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load contracts for storage DOT export")?;
let interfaces = interface_model::Entity::find()
.order_by_asc(interface_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load interfaces for storage DOT export")?;
let functions = function_model::Entity::find()
.order_by_asc(function_model::Column::Id)
.all(db)
.await
.wrap_err("failed to load functions for storage DOT export")?;
let mut contract_name_by_id: HashMap<i32, String> = HashMap::new();
for c in &contracts {
contract_name_by_id.insert(c.id, c.name.clone());
}
for i in &interfaces {
contract_name_by_id.insert(i.id, i.name.clone());
}
let mut function_label_by_id: HashMap<i32, String> = HashMap::new();
for f in &functions {
function_label_by_id.insert(f.id, f.name.clone());
}
let mut declaring_contract: HashMap<i32, i32> = HashMap::new();
for cv in &storage.contract_variables {
declaring_contract
.entry(cv.state_variable_id)
.or_insert(cv.contract_id);
}
let touched_state_vars: BTreeSet<i32> = storage
.function_state_variables
.iter()
.map(|fsv| fsv.state_variable_id)
.collect();
let touching_functions: BTreeSet<i32> = storage
.function_state_variables
.iter()
.map(|fsv| fsv.function_id)
.collect();
let mut out = String::new();
writeln!(out, "digraph StorageRW {{").unwrap();
writeln!(out, " rankdir=LR;").unwrap();
writeln!(out, " node [fontname=Helvetica];").unwrap();
writeln!(out, " edge [fontname=Helvetica, fontsize=10];").unwrap();
for sv in storage.state_variables.values() {
if !options.include_isolated_state_variables && !touched_state_vars.contains(&sv.id) {
continue;
}
let owner = declaring_contract
.get(&sv.id)
.and_then(|cid| contract_name_by_id.get(cid))
.map(|n| n.as_str())
.unwrap_or("?");
let label = format!("{}.{}\\n{}", owner, sv.name, sv.type_name);
writeln!(
out,
" sv{} [shape=box, style=\"filled\", fillcolor=\"#fff2cc\", label=\"{}\"];",
sv.id,
dot_escape(&label)
)
.unwrap();
}
for fid in &touching_functions {
let label = function_label_by_id
.get(fid)
.map(|s| s.as_str())
.unwrap_or("?");
writeln!(
out,
" fn{} [shape=ellipse, label=\"{}\"];",
fid,
dot_escape(label)
)
.unwrap();
}
for fsv in &storage.function_state_variables {
if fsv.is_write {
writeln!(
out,
" fn{} -> sv{} [color=\"#cc0000\", label=\"W\"];",
fsv.function_id, fsv.state_variable_id
)
.unwrap();
} else {
writeln!(
out,
" fn{} -> sv{} [color=\"#1f4faf\", style=dashed, label=\"R\"];",
fsv.function_id, fsv.state_variable_id
)
.unwrap();
}
}
writeln!(out, "}}").unwrap();
Ok(out)
}
}
#[derive(Debug, Clone)]
pub struct HistoricalSemanticRecord {
pub semantic: knowdit_kg_model::db::semantic_node::Model,
pub findings: Vec<knowdit_kg_model::db::audit_finding::Model>,
}
#[derive(Debug, Clone, Default)]
pub struct SemanticMatchSet {
pub historicals: Vec<HistoricalSemanticRecord>,
pub matches: Vec<(i32, i32)>,
}
impl RepoDatabase {
pub async fn write_semantic_match_results(&self, set: &SemanticMatchSet) -> Result<()> {
use std::collections::BTreeSet;
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin semantic-match write transaction")?;
semantic_matched_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear semantic_matched rows")?;
historical_semantic_finding_link_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear historical_semantic_finding_link rows")?;
historical_finding_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear historical_finding rows")?;
historical_semantic_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear historical_semantic rows")?;
let mut inserted_finding_ids = BTreeSet::new();
for record in &set.historicals {
let mirror: historical_semantic_model::Model = record.semantic.clone().into();
historical_semantic_model::Entity::insert(historical_semantic_model::ActiveModel {
id: Set(mirror.id),
name: Set(mirror.name),
definition: Set(mirror.definition),
description: Set(mirror.description),
category: Set(mirror.category),
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert historical_semantic {}",
record.semantic.id
)
})?;
for finding in &record.findings {
if inserted_finding_ids.insert(finding.id) {
let mirror: historical_finding_model::Model = finding.clone().into();
historical_finding_model::Entity::insert(
historical_finding_model::ActiveModel {
id: Set(mirror.id),
title: Set(mirror.title),
severity: Set(mirror.severity),
root_cause: Set(mirror.root_cause),
description: Set(mirror.description),
patterns: Set(mirror.patterns),
exploits: Set(mirror.exploits),
},
)
.exec(&txn)
.await
.wrap_err_with(|| {
format!("failed to insert historical_finding {}", finding.id)
})?;
}
historical_semantic_finding_link_model::Entity::insert(
historical_semantic_finding_link_model::ActiveModel {
historical_semantic_id: Set(record.semantic.id),
historical_finding_id: Set(finding.id),
},
)
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to link historical_semantic {} to historical_finding {}",
record.semantic.id, finding.id
)
})?;
}
}
for (extract_id, historical_id) in &set.matches {
semantic_matched_model::Entity::insert(semantic_matched_model::ActiveModel {
extract_id: Set(*extract_id),
historical_id: Set(*historical_id),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert semantic_matched extract={} historical={}",
extract_id, historical_id
)
})?;
}
txn.commit()
.await
.wrap_err("failed to commit semantic-match write transaction")?;
Ok(())
}
pub async fn load_semantic_match_results(&self) -> Result<SemanticMatchSet> {
use std::collections::BTreeMap;
let semantic_rows = historical_semantic_model::Entity::find()
.order_by_asc(historical_semantic_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load historical_semantic rows")?;
let finding_rows = historical_finding_model::Entity::find()
.order_by_asc(historical_finding_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load historical_finding rows")?;
let link_rows = historical_semantic_finding_link_model::Entity::find()
.all(&self.db)
.await
.wrap_err("failed to load historical_semantic_finding_link rows")?;
let match_rows = semantic_matched_model::Entity::find()
.order_by_asc(semantic_matched_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load semantic_matched rows")?;
let findings_by_id: BTreeMap<i32, knowdit_kg_model::db::audit_finding::Model> =
finding_rows
.into_iter()
.map(|row| (row.id, row.into()))
.collect();
let mut findings_for_semantic: BTreeMap<i32, Vec<i32>> = BTreeMap::new();
for link in link_rows {
findings_for_semantic
.entry(link.historical_semantic_id)
.or_default()
.push(link.historical_finding_id);
}
let historicals = semantic_rows
.into_iter()
.map(|row| {
let semantic_id = row.id;
let semantic: knowdit_kg_model::db::semantic_node::Model = row.into();
let findings = findings_for_semantic
.remove(&semantic_id)
.unwrap_or_default()
.into_iter()
.filter_map(|id| findings_by_id.get(&id).cloned())
.collect();
HistoricalSemanticRecord { semantic, findings }
})
.collect();
let matches = match_rows
.into_iter()
.map(|row| (row.extract_id, row.historical_id))
.collect();
Ok(SemanticMatchSet {
historicals,
matches,
})
}
}
#[derive(Debug, Clone)]
pub struct SpecificationRecord {
pub semantic_id: i32,
pub finding_id: i32,
pub specification_json: String,
}
#[derive(Debug, Clone)]
pub struct LoadedSpecification {
pub id: i32,
pub semantic_id: i32,
pub finding_id: i32,
pub specification_json: String,
}
impl RepoDatabase {
pub async fn write_specifications(&self, records: &[SpecificationRecord]) -> Result<()> {
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin specification write transaction")?;
specification_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear specification rows")?;
for record in records {
specification_model::Entity::insert(specification_model::ActiveModel {
semantic_id: Set(record.semantic_id),
finding_id: Set(record.finding_id),
specification: Set(record.specification_json.clone()),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert specification semantic={} finding={}",
record.semantic_id, record.finding_id
)
})?;
}
txn.commit()
.await
.wrap_err("failed to commit specification write transaction")?;
Ok(())
}
pub async fn append_specifications(&self, records: &[SpecificationRecord]) -> Result<()> {
if records.is_empty() {
return Ok(());
}
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin specification append transaction")?;
for record in records {
specification_model::Entity::insert(specification_model::ActiveModel {
semantic_id: Set(record.semantic_id),
finding_id: Set(record.finding_id),
specification: Set(record.specification_json.clone()),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert specification semantic={} finding={}",
record.semantic_id, record.finding_id
)
})?;
}
txn.commit()
.await
.wrap_err("failed to commit specification append transaction")?;
Ok(())
}
pub async fn clear_specifications(&self) -> Result<()> {
specification_model::Entity::delete_many()
.exec(&self.db)
.await
.wrap_err("failed to clear specification rows")?;
Ok(())
}
pub async fn loaded_specification_pairs(
&self,
) -> Result<std::collections::HashSet<(i32, i32)>> {
let rows = specification_model::Entity::find()
.all(&self.db)
.await
.wrap_err("failed to load specification rows for pair index")?;
Ok(rows
.into_iter()
.map(|row| (row.semantic_id, row.finding_id))
.collect())
}
pub async fn load_specifications(&self) -> Result<Vec<LoadedSpecification>> {
let rows = specification_model::Entity::find()
.order_by_asc(specification_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load specification rows")?;
Ok(rows
.into_iter()
.map(|row| LoadedSpecification {
id: row.id,
semantic_id: row.semantic_id,
finding_id: row.finding_id,
specification_json: row.specification,
})
.collect())
}
}
#[derive(Debug, Clone)]
pub struct HarnessRunRecord {
pub kind: RunKind,
pub seed: Option<i64>,
pub runs: i64,
pub forge_args: Vec<String>,
pub exit_code: i32,
pub stdout: String,
pub stderr: String,
pub duration_ms: i64,
pub violated: bool,
pub sequence: Option<serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct CodeGenRecord {
pub spec_id: i32,
pub harness_relative_path: String,
pub harness_source: String,
pub status: CodeGenStatus,
pub final_reason: String,
pub agent_steps: i32,
pub runs: Vec<HarnessRunRecord>,
}
#[derive(Debug, Clone)]
pub struct CoverageEntry {
pub relative_contract_path: String,
pub line_number: i32,
pub hit_count: i64,
}
#[derive(Debug, Clone)]
pub struct LoadedCodeGen {
pub id: i32,
pub spec_id: i32,
pub harness_relative_path: String,
pub harness_source: String,
pub status: CodeGenStatus,
pub final_reason: String,
pub agent_steps: i32,
}
impl RepoDatabase {
pub async fn write_code_gen_with_runs(
&self,
record: &CodeGenRecord,
coverage_per_run: &[Vec<CoverageEntry>],
) -> Result<(i32, Vec<i32>)> {
ensure!(
coverage_per_run.is_empty() || coverage_per_run.len() == record.runs.len(),
"coverage_per_run must match runs.len() (got {} vs {})",
coverage_per_run.len(),
record.runs.len()
);
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin code_gen+runs transaction")?;
let inserted = code_gen_model::Entity::insert(code_gen_model::ActiveModel {
spec_id: Set(record.spec_id),
harness_relative_path: Set(record.harness_relative_path.clone()),
harness_source: Set(record.harness_source.clone()),
status: Set(record.status),
final_reason: Set(record.final_reason.clone()),
agent_steps: Set(record.agent_steps),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| format!("failed to insert code_gen for spec={}", record.spec_id))?;
let code_id = inserted.last_insert_id;
let mut run_ids = Vec::with_capacity(record.runs.len());
for (idx, run) in record.runs.iter().enumerate() {
let forge_args_json = serde_json::to_string(&run.forge_args)
.wrap_err("failed to JSON-serialize forge_args")?;
let sequence_json = run
.sequence
.as_ref()
.map(serde_json::to_string)
.transpose()
.wrap_err("failed to JSON-serialize counter-example sequence")?;
let run_inserted = harness_run_model::Entity::insert(harness_run_model::ActiveModel {
code_id: Set(code_id),
kind: Set(run.kind),
seed: Set(run.seed),
runs: Set(run.runs),
forge_args: Set(forge_args_json),
exit_code: Set(run.exit_code),
stdout: Set(run.stdout.clone()),
stderr: Set(run.stderr.clone()),
duration_ms: Set(run.duration_ms),
violated: Set(run.violated),
sequence_json: Set(sequence_json),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!("failed to insert harness_run #{idx} for code_gen={code_id}")
})?;
let run_id = run_inserted.last_insert_id;
run_ids.push(run_id);
if let Some(entries) = coverage_per_run.get(idx) {
for entry in entries {
line_coverage_model::Entity::insert(line_coverage_model::ActiveModel {
run_id: Set(run_id),
relative_contract_path: Set(entry.relative_contract_path.clone()),
line_number: Set(entry.line_number),
hit_count: Set(entry.hit_count),
..Default::default()
})
.exec(&txn)
.await
.wrap_err_with(|| {
format!(
"failed to insert line_coverage for run_id={run_id} path={}",
entry.relative_contract_path
)
})?;
}
}
}
txn.commit()
.await
.wrap_err("failed to commit code_gen+runs transaction")?;
Ok((code_id, run_ids))
}
pub async fn loaded_completed_code_gen_spec_ids(
&self,
) -> Result<std::collections::HashSet<i32>> {
let rows = code_gen_model::Entity::find()
.all(&self.db)
.await
.wrap_err("failed to load code_gen rows for resume index")?;
Ok(rows
.into_iter()
.filter(|row| row.status.counts_as_resumable_skip())
.map(|row| row.spec_id)
.collect())
}
pub async fn clear_fuzz_tables(&self) -> Result<()> {
let txn = self
.db
.begin()
.await
.wrap_err("failed to begin clear_fuzz_tables transaction")?;
line_coverage_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear line_coverage")?;
harness_run_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear harness_run")?;
code_gen_model::Entity::delete_many()
.exec(&txn)
.await
.wrap_err("failed to clear code_gen")?;
txn.commit()
.await
.wrap_err("failed to commit clear_fuzz_tables")?;
Ok(())
}
pub async fn load_code_gens(&self) -> Result<Vec<LoadedCodeGen>> {
let rows = code_gen_model::Entity::find()
.order_by_asc(code_gen_model::Column::Id)
.all(&self.db)
.await
.wrap_err("failed to load code_gen rows")?;
Ok(rows
.into_iter()
.map(|row| LoadedCodeGen {
id: row.id,
spec_id: row.spec_id,
harness_relative_path: row.harness_relative_path,
harness_source: row.harness_source,
status: row.status,
final_reason: row.final_reason,
agent_steps: row.agent_steps,
})
.collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cg::{FileLocation, FunctionCall};
use std::time::{SystemTime, UNIX_EPOCH};
struct TempDb {
repo: RepoDatabase,
path: PathBuf,
}
impl Drop for TempDb {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
let _ = std::fs::remove_file(self.path.with_extension("sqlite3-shm"));
let _ = std::fs::remove_file(self.path.with_extension("sqlite3-wal"));
}
}
async fn temp_db() -> TempDb {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system clock should be after unix epoch")
.as_nanos();
let path = std::env::temp_dir().join(format!(
"knowdit-repo-db-test-{}-{unique}.sqlite3",
std::process::id()
));
let repo = RepoDatabase::open_sqlite(path.clone())
.await
.expect("test repo database should connect");
TempDb { repo, path }
}
#[tokio::test]
async fn ensure_project_records_current_project_when_empty() {
let temp = temp_db().await;
temp.repo
.init_schema()
.await
.expect("schema should initialize");
temp.repo
.ensure_project("current-project")
.await
.expect("empty project database should accept current project");
let projects = project_model::Entity::find()
.all(temp.repo.connection())
.await
.expect("project rows should load");
assert_eq!(projects.len(), 1);
assert_eq!(projects[0].name, "current-project");
}
#[tokio::test]
async fn ensure_project_rejects_multiple_projects() {
let temp = temp_db().await;
temp.repo
.init_schema()
.await
.expect("schema should initialize");
project_model::Entity::insert_many([
project_model::ActiveModel {
name: Set("one".to_string()),
status: Set("completed".to_string()),
..Default::default()
},
project_model::ActiveModel {
name: Set("two".to_string()),
status: Set("completed".to_string()),
..Default::default()
},
])
.exec(temp.repo.connection())
.await
.expect("test projects should insert");
let err = temp
.repo
.ensure_project("one")
.await
.expect_err("multiple projects should be rejected")
.to_string();
assert!(err.contains("project database contains multiple projects"));
}
fn loc(start_line: usize, start_column: usize, end_column: usize) -> FileLocation {
FileLocation {
start_line,
start_column,
end_line: start_line,
end_column,
}
}
fn chunk(
content: &str,
start_line: usize,
start_column: usize,
end_column: usize,
) -> FileChunk {
FileChunk {
loc: loc(start_line, start_column, end_column),
content: content.to_string(),
}
}
#[tokio::test]
async fn writes_and_reads_call_graph_database() {
let temp = temp_db().await;
temp.repo
.init_schema()
.await
.expect("schema should initialize");
let call_graph = CallGraph {
contracts: BTreeMap::from([(
1,
Contract {
id: 1,
name: "Vault".to_string(),
relative_file_path: PathBuf::from("src/Vault.sol"),
chunk: chunk("contract Vault {}", 1, 0, 8),
functions: vec![
Function {
id: 1,
name: "deposit".to_string(),
args: "uint256 amount".to_string(),
relative_file_path: PathBuf::from("src/Vault.sol"),
loc: loc(2, 4, 40),
content: Some("function deposit(uint256 amount) {}".to_string()),
calls: vec![FunctionCall {
id: 1,
from_id: 1,
to_id: 2,
description: Some("updates accounting".to_string()),
}],
description: Some("deposit entrypoint".to_string()),
},
Function {
id: 2,
name: "account".to_string(),
args: "uint256 amount".to_string(),
relative_file_path: PathBuf::from("src/Vault.sol"),
loc: loc(3, 4, 40),
content: Some("function account(uint256 amount) {}".to_string()),
calls: Vec::new(),
description: Some("accounting helper".to_string()),
},
],
description: Some("Vault contract".to_string()),
},
)]),
interfaces: BTreeMap::from([(
10,
Interface {
id: 10,
name: "IERC20".to_string(),
relative_file_path: PathBuf::from("src/IERC20.sol"),
chunk: chunk("interface IERC20 {}", 1, 0, 19),
functions: vec![Function {
id: 10,
name: "transfer".to_string(),
args: "address to, uint256 amount".to_string(),
relative_file_path: PathBuf::from("src/IERC20.sol"),
loc: loc(2, 4, 70),
content: None,
calls: Vec::new(),
description: Some("interface declaration".to_string()),
}],
description: Some("IERC20 interface".to_string()),
},
)]),
};
temp.repo
.write_call_graph(&call_graph)
.await
.expect("callgraph should write");
temp.repo
.replace_project_semantics(&[ExtractedSemantic {
name: "Token Transfer".to_string(),
category: knowdit_kg_model::category::DeFiCategory::Services,
definition: "Moves ERC20 balances between accounts".to_string(),
description: "Tracks project-specific token transfer semantics".to_string(),
functions: vec![ExtractedFunction {
name: "transfer".to_string(),
contract: "src/IERC20.sol".to_string(),
signature: Some("transfer(address,uint256)".to_string()),
}],
}])
.await
.expect("project semantics should write");
let restored = temp
.repo
.load_call_graph()
.await
.expect("callgraph should read back");
let restored_semantics = temp
.repo
.load_project_semantics()
.await
.expect("project semantics should read back");
let contract = restored
.contracts
.get(&1)
.expect("contract should be restored");
assert_eq!(contract.relative_file_path, PathBuf::from("src/Vault.sol"));
assert_eq!(contract.functions.len(), 2);
assert_eq!(contract.functions[0].calls.len(), 1);
assert_eq!(contract.functions[0].calls[0].to_id, 2);
assert_eq!(
contract.functions[0].calls[0].description.as_deref(),
Some("updates accounting")
);
let interface = restored
.interfaces
.get(&10)
.expect("interface should be restored");
assert_eq!(interface.name, "IERC20");
assert_eq!(interface.functions.len(), 1);
assert_eq!(interface.functions[0].content, None);
assert_eq!(restored_semantics.len(), 1);
assert_eq!(restored_semantics[0].functions.len(), 1);
assert_eq!(restored_semantics[0].functions[0].name, "transfer");
}
}