use crate::chunks::{
SymbolRow, build_commit_chunk, build_markdown_chunk, build_symbol_chunk,
split_markdown_sections,
};
use crate::config::EmbeddingsConfig;
use crate::embedder::{Embedder, encode_vector};
use crate::git_staleness::compute_staleness_batch;
use crate::store;
use crate::vec_ext::VecConnection;
use gix::bstr::ByteSlice as _;
use libsql::Connection;
use std::collections::HashMap;
use std::path::Path;
use std::time::Instant;
use tracing::{info, warn};
#[derive(Debug, Default)]
pub struct PopulateStats {
pub symbols_embedded: usize,
pub symbols_skipped: usize,
pub docs_embedded: usize,
pub commits_embedded: usize,
pub contexts_embedded: usize,
pub errors: usize,
}
const EMBED_BATCH_SIZE: usize = 64;
pub const DEFAULT_MAX_COMMITS: usize = 500;
pub async fn populate_embeddings(
conn: &Connection,
config: &EmbeddingsConfig,
changed_paths: Option<&[String]>,
head_commit: Option<&str>,
repo_root: Option<&std::path::Path>,
db_path: Option<&Path>,
) -> anyhow::Result<PopulateStats> {
let started = Instant::now();
let is_full_rebuild = changed_paths.is_none();
let vec_conn: Option<VecConnection> = db_path.and_then(VecConnection::open);
if is_full_rebuild {
store::drop_embedding_tables(conn, vec_conn.as_ref()).await?;
}
store::ensure_schema(conn).await?;
eprintln!("Loading embedding model {}...", config.model);
let mut embedder = Embedder::load(&config.model, None)?;
info!(model = %config.model, dims = embedder.dimensions, "Embedding model loaded");
eprintln!(
"Loaded model {} ({} dimensions)",
config.model, embedder.dimensions
);
store::ensure_vec_schema(conn, embedder.dimensions, vec_conn.as_ref()).await;
let mut stats = PopulateStats::default();
let symbols = load_symbols(conn, changed_paths).await?;
if symbols.is_empty() {
eprintln!("No symbols to embed.");
return Ok(stats);
}
let total = symbols.len();
eprintln!("Embedding {total} symbols...");
if let Some(paths) = changed_paths {
for path in paths {
store::delete_embeddings_for_path(conn, path, vec_conn.as_ref()).await?;
}
}
let co_change_map = load_co_change_map(conn).await?;
eprintln!("Loading callers...");
let all_callers = load_all_callers(conn).await;
eprintln!("Loading callees...");
let all_callees = load_all_callees(conn).await;
eprintln!("Loading doc comments...");
let all_docs = load_all_doc_comments(conn).await;
let file_paths: Vec<&str> = symbols.iter().map(|s| s.file.as_str()).collect();
let staleness_map: HashMap<String, f64> = if let Some(root) = repo_root {
compute_staleness_batch(root, &file_paths)
} else {
HashMap::new()
};
let mut batch_symbols: Vec<SymbolRow> = Vec::new();
let mut batch_texts: Vec<String> = Vec::new();
let mut batch_staleness: Vec<f64> = Vec::new();
let mut done = 0usize;
for symbol in symbols {
let callers = all_callers.get(&symbol.name).cloned().unwrap_or_default();
let callees = all_callees
.get(&(symbol.name.clone(), symbol.file.clone()))
.cloned()
.unwrap_or_default();
let co_changes = co_change_map.get(&symbol.file).cloned().unwrap_or_default();
let doc = all_docs
.get(&(symbol.name.clone(), symbol.file.clone()))
.cloned();
let chunk_text =
build_symbol_chunk(&symbol, doc.as_deref(), &callers, &callees, &co_changes);
let staleness = *staleness_map.get(&symbol.file).unwrap_or(&0.0);
batch_symbols.push(symbol);
batch_texts.push(chunk_text);
batch_staleness.push(staleness);
if batch_texts.len() >= EMBED_BATCH_SIZE {
flush_batch(
conn,
&mut embedder,
&batch_symbols,
&batch_texts,
&batch_staleness,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
done += batch_symbols.len();
eprintln!("Embedded {done}/{total} symbols");
batch_symbols.clear();
batch_texts.clear();
batch_staleness.clear();
}
}
if !batch_texts.is_empty() {
flush_batch(
conn,
&mut embedder,
&batch_symbols,
&batch_texts,
&batch_staleness,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
done += batch_symbols.len();
eprintln!("Embedded {done}/{total} symbols");
}
if is_full_rebuild {
eprintln!("Running VACUUM to reclaim space...");
store::vacuum(conn).await;
}
let elapsed = started.elapsed().as_secs_f64();
eprintln!("Embedding complete. {total} symbols in {elapsed:.1}s");
info!(
embedded = stats.symbols_embedded,
errors = stats.errors,
elapsed_secs = elapsed,
"Embedding population complete"
);
Ok(stats)
}
pub async fn populate_incremental_for_paths(
conn: &Connection,
config: &EmbeddingsConfig,
changed_paths: &[String],
head_commit: Option<&str>,
repo_root: Option<&Path>,
db_path: Option<&Path>,
) -> anyhow::Result<PopulateStats> {
if changed_paths.is_empty() {
return Ok(PopulateStats::default());
}
let vec_conn: Option<VecConnection> = db_path.and_then(VecConnection::open);
store::ensure_schema(conn).await?;
let mut embedder = Embedder::load(&config.model, None)?;
store::ensure_vec_schema(conn, embedder.dimensions, vec_conn.as_ref()).await;
let mut stats = PopulateStats::default();
for path in changed_paths {
if let Err(e) = store::delete_embeddings_for_path(conn, path, vec_conn.as_ref()).await {
warn!(path, error = %e, "Failed to delete old embeddings for changed path");
}
}
let symbols = load_symbols(conn, Some(changed_paths)).await?;
if !symbols.is_empty() {
let co_change_map = load_co_change_map(conn).await?;
let all_callers = load_all_callers(conn).await;
let all_callees = load_all_callees(conn).await;
let all_docs = load_all_doc_comments(conn).await;
let file_paths: Vec<&str> = symbols.iter().map(|s| s.file.as_str()).collect();
let staleness_map: HashMap<String, f64> = if let Some(root) = repo_root {
compute_staleness_batch(root, &file_paths)
} else {
HashMap::new()
};
let mut batch_symbols: Vec<SymbolRow> = Vec::new();
let mut batch_texts: Vec<String> = Vec::new();
let mut batch_staleness: Vec<f64> = Vec::new();
for symbol in symbols {
let callers = all_callers.get(&symbol.name).cloned().unwrap_or_default();
let callees = all_callees
.get(&(symbol.name.clone(), symbol.file.clone()))
.cloned()
.unwrap_or_default();
let co_changes = co_change_map.get(&symbol.file).cloned().unwrap_or_default();
let doc = all_docs
.get(&(symbol.name.clone(), symbol.file.clone()))
.cloned();
let chunk_text =
build_symbol_chunk(&symbol, doc.as_deref(), &callers, &callees, &co_changes);
let staleness = *staleness_map.get(&symbol.file).unwrap_or(&0.0);
batch_symbols.push(symbol);
batch_texts.push(chunk_text);
batch_staleness.push(staleness);
if batch_texts.len() >= EMBED_BATCH_SIZE {
flush_batch(
conn,
&mut embedder,
&batch_symbols,
&batch_texts,
&batch_staleness,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
batch_symbols.clear();
batch_texts.clear();
batch_staleness.clear();
}
}
if !batch_texts.is_empty() {
flush_batch(
conn,
&mut embedder,
&batch_symbols,
&batch_texts,
&batch_staleness,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
}
}
if let Some(root) = repo_root {
for path in changed_paths {
if path.ends_with(".md") {
let abs_path = root.join(path);
if let Ok(content) = std::fs::read_to_string(&abs_path) {
let sections = split_markdown_sections(&content);
let mut md_texts: Vec<String> = Vec::new();
let mut md_ids: Vec<i64> = Vec::new();
for (i, (breadcrumb, body)) in sections.iter().enumerate() {
if body.trim().is_empty() {
continue;
}
md_texts.push(build_markdown_chunk(path, breadcrumb, body));
md_ids.push(i as i64);
}
if !md_texts.is_empty() {
let is_context = path.contains(".normalize/context/")
|| path.contains(".normalize\\context\\");
if is_context {
flush_context_batch(
conn,
&mut embedder,
path,
&md_texts,
&md_ids,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
} else {
flush_doc_batch(
conn,
&mut embedder,
path,
&md_texts,
&md_ids,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
}
}
}
}
}
}
info!(
symbols = stats.symbols_embedded,
docs = stats.docs_embedded,
contexts = stats.contexts_embedded,
paths = changed_paths.len(),
"Incremental re-embedding complete"
);
Ok(stats)
}
pub async fn populate_markdown_docs(
conn: &Connection,
config: &EmbeddingsConfig,
repo_root: &Path,
head_commit: Option<&str>,
db_path: Option<&Path>,
) -> anyhow::Result<PopulateStats> {
let vec_conn: Option<VecConnection> = db_path.and_then(VecConnection::open);
store::ensure_schema(conn).await?;
let mut embedder = Embedder::load(&config.model, None)?;
store::ensure_vec_schema(conn, embedder.dimensions, vec_conn.as_ref()).await;
let mut stats = PopulateStats::default();
let mut md_files: Vec<std::path::PathBuf> = Vec::new();
for name in &["SUMMARY.md", "CLAUDE.md", "README.md"] {
let p = repo_root.join(name);
if p.exists() {
md_files.push(p);
}
}
let docs_dir = repo_root.join("docs");
if docs_dir.is_dir() {
collect_md_files(&docs_dir, &mut md_files);
}
if md_files.is_empty() {
return Ok(stats);
}
eprintln!("Embedding {} markdown document(s)...", md_files.len());
for abs_path in &md_files {
let rel_path = abs_path
.strip_prefix(repo_root)
.unwrap_or(abs_path)
.to_string_lossy()
.into_owned();
if let Err(e) = store::delete_embeddings_for_path(conn, &rel_path, vec_conn.as_ref()).await
{
warn!(path = %rel_path, error = %e, "Failed to delete old doc embeddings");
}
let content = match std::fs::read_to_string(abs_path) {
Ok(c) => c,
Err(e) => {
warn!(path = %rel_path, error = %e, "Could not read markdown file");
stats.errors += 1;
continue;
}
};
let sections = split_markdown_sections(&content);
let mut texts: Vec<String> = Vec::new();
let mut section_ids: Vec<i64> = Vec::new();
for (i, (breadcrumb, body)) in sections.iter().enumerate() {
if body.trim().is_empty() {
continue;
}
texts.push(build_markdown_chunk(&rel_path, breadcrumb, body));
section_ids.push(i as i64);
}
if texts.is_empty() {
continue;
}
flush_doc_batch(
conn,
&mut embedder,
&rel_path,
&texts,
§ion_ids,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
}
info!(
docs = stats.docs_embedded,
files = md_files.len(),
"Markdown doc embedding complete"
);
Ok(stats)
}
fn collect_md_files(dir: &Path, out: &mut Vec<std::path::PathBuf>) {
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
let mut entries: Vec<_> = entries.filter_map(|e| e.ok()).collect();
entries.sort_by_key(|e| e.file_name());
for entry in entries {
let path = entry.path();
if path.is_dir() {
collect_md_files(&path, out);
} else if path.extension().and_then(|e| e.to_str()) == Some("md") {
out.push(path);
}
}
}
pub async fn populate_context_blocks(
conn: &Connection,
config: &EmbeddingsConfig,
repo_root: &Path,
head_commit: Option<&str>,
db_path: Option<&Path>,
) -> anyhow::Result<PopulateStats> {
let vec_conn: Option<VecConnection> = db_path.and_then(VecConnection::open);
store::ensure_schema(conn).await?;
let mut embedder = Embedder::load(&config.model, None)?;
store::ensure_vec_schema(conn, embedder.dimensions, vec_conn.as_ref()).await;
let mut stats = PopulateStats::default();
let context_dir = repo_root.join(".normalize").join("context");
if !context_dir.is_dir() {
return Ok(stats);
}
let mut md_files: Vec<std::path::PathBuf> = Vec::new();
collect_md_files(&context_dir, &mut md_files);
if md_files.is_empty() {
return Ok(stats);
}
eprintln!("Embedding {} context block(s)...", md_files.len());
for abs_path in &md_files {
let rel_path = abs_path
.strip_prefix(repo_root)
.unwrap_or(abs_path)
.to_string_lossy()
.into_owned();
if let Err(e) = store::delete_embeddings_for_path(conn, &rel_path, vec_conn.as_ref()).await
{
warn!(path = %rel_path, error = %e, "Failed to delete old context embeddings");
}
let content = match std::fs::read_to_string(abs_path) {
Ok(c) => c,
Err(e) => {
warn!(path = %rel_path, error = %e, "Could not read context file");
stats.errors += 1;
continue;
}
};
let sections = split_markdown_sections(&content);
let mut texts: Vec<String> = Vec::new();
let mut section_ids: Vec<i64> = Vec::new();
for (i, (breadcrumb, body)) in sections.iter().enumerate() {
if body.trim().is_empty() {
continue;
}
texts.push(build_markdown_chunk(&rel_path, breadcrumb, body));
section_ids.push(i as i64);
}
if texts.is_empty() {
continue;
}
flush_context_batch(
conn,
&mut embedder,
&rel_path,
&texts,
§ion_ids,
head_commit,
&config.model,
&mut stats,
vec_conn.as_ref(),
)
.await;
}
info!(
contexts = stats.contexts_embedded,
files = md_files.len(),
"Context block embedding complete"
);
Ok(stats)
}
pub async fn populate_commit_messages(
conn: &Connection,
config: &EmbeddingsConfig,
repo_root: &Path,
head_commit: Option<&str>,
db_path: Option<&Path>,
max_commits: usize,
) -> anyhow::Result<PopulateStats> {
let vec_conn: Option<VecConnection> = db_path.and_then(VecConnection::open);
store::ensure_schema(conn).await?;
let mut embedder = Embedder::load(&config.model, None)?;
store::ensure_vec_schema(conn, embedder.dimensions, vec_conn.as_ref()).await;
let mut stats = PopulateStats::default();
let embedded_hashes = load_embedded_commit_hashes(conn, &config.model).await;
let commits = load_recent_commits(repo_root, max_commits);
if commits.is_empty() {
return Ok(stats);
}
let new_commits: Vec<CommitInfo> = commits
.into_iter()
.filter(|c| !embedded_hashes.contains(c.hash.as_str()))
.collect();
if new_commits.is_empty() {
return Ok(stats);
}
eprintln!("Embedding {} new commit message(s)...", new_commits.len());
let mut texts: Vec<String> = Vec::new();
let mut hashes: Vec<String> = Vec::new();
for commit in &new_commits {
let chunk = build_commit_chunk(&commit.hash, &commit.date, &commit.subject, &commit.body);
texts.push(chunk);
hashes.push(commit.hash.clone());
}
for (chunk_texts, chunk_hashes) in texts
.chunks(EMBED_BATCH_SIZE)
.zip(hashes.chunks(EMBED_BATCH_SIZE))
{
let text_refs: Vec<&str> = chunk_texts.iter().map(String::as_str).collect();
match embedder.embed_batch(&text_refs) {
Ok(vectors) => {
if let Err(e) = conn.execute("BEGIN", ()).await {
warn!(error = %e, "Failed to BEGIN transaction for commit batch");
}
for (i, (text, vec)) in chunk_texts.iter().zip(vectors.iter()).enumerate() {
let hash = &chunk_hashes[i];
let blob = encode_vector(vec);
match store::upsert_embedding(
conn,
"commit",
hash,
None,
&config.model,
head_commit,
0.0,
text,
&blob,
vec_conn.as_ref(),
)
.await
{
Ok(()) => stats.commits_embedded += 1,
Err(e) => {
warn!(hash, error = %e, "Failed to store commit embedding");
stats.errors += 1;
}
}
}
if let Err(e) = conn.execute("COMMIT", ()).await {
warn!(error = %e, "Failed to COMMIT transaction for commit batch");
}
}
Err(e) => {
warn!(error = %e, "Commit embedding batch failed");
stats.errors += chunk_texts.len();
}
}
}
info!(
commits = stats.commits_embedded,
"Commit message embedding complete"
);
Ok(stats)
}
struct CommitInfo {
hash: String,
date: String,
subject: String,
body: String,
}
fn load_recent_commits(root: &Path, max_commits: usize) -> Vec<CommitInfo> {
let repo = match gix::discover(root) {
Ok(r) => r.into_sync().to_thread_local(),
Err(_) => return Vec::new(),
};
let head_id = match repo.head_id() {
Ok(id) => id,
Err(_) => return Vec::new(),
};
let walk = match head_id
.ancestors()
.sorting(gix::revision::walk::Sorting::ByCommitTime(
gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
))
.all()
{
Ok(w) => w,
Err(_) => return Vec::new(),
};
let mut commits = Vec::new();
for info in walk.take(max_commits) {
let Ok(info) = info else { continue };
let Ok(commit_obj) = info.object() else {
continue;
};
let Ok(commit) = commit_obj.decode() else {
continue;
};
let hash = info.id().to_string();
let short_hash = if hash.len() >= 12 {
hash[..12].to_string()
} else {
hash.clone()
};
let timestamp = commit.time().map(|t| t.seconds).unwrap_or(0);
let date = epoch_to_date(timestamp);
let full_message = commit.message.to_str_lossy().into_owned();
let msg_ref = commit.message();
let subject = msg_ref.summary().to_str_lossy().trim().to_string();
let body = full_message
.trim_start_matches(subject.as_str())
.trim()
.to_string();
if subject.is_empty() {
continue;
}
commits.push(CommitInfo {
hash: short_hash,
date,
subject,
body,
});
}
commits
}
fn epoch_to_date(secs: i64) -> String {
let days_since_epoch = secs.max(0) as u64 / 86400;
let z = days_since_epoch as i64 + 719_468;
let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
let doe = z - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let y = if m <= 2 { y + 1 } else { y };
format!("{y:04}-{m:02}-{d:02}")
}
async fn load_embedded_commit_hashes(
conn: &Connection,
model: &str,
) -> std::collections::HashSet<String> {
let mut set = std::collections::HashSet::new();
let Ok(mut rows) = conn
.query(
"SELECT source_path FROM embeddings WHERE source_type = 'commit' AND model = ?1",
[model],
)
.await
else {
return set;
};
while let Ok(Some(row)) = rows.next().await {
if let Ok(hash) = row.get::<String>(0) {
set.insert(hash);
}
}
set
}
#[allow(clippy::too_many_arguments)]
async fn flush_doc_batch(
conn: &Connection,
embedder: &mut Embedder,
rel_path: &str,
texts: &[String],
section_ids: &[i64],
head_commit: Option<&str>,
model_name: &str,
stats: &mut PopulateStats,
vec_conn: Option<&VecConnection>,
) {
let text_refs: Vec<&str> = texts.iter().map(String::as_str).collect();
match embedder.embed_batch(&text_refs) {
Ok(vectors) => {
if let Err(e) = conn.execute("BEGIN", ()).await {
warn!(error = %e, "Failed to BEGIN transaction for doc batch");
}
for (i, (text, vec)) in texts.iter().zip(vectors.iter()).enumerate() {
let blob = encode_vector(vec);
let sid = section_ids.get(i).copied().unwrap_or(i as i64);
match store::upsert_embedding(
conn,
"doc",
rel_path,
Some(sid),
model_name,
head_commit,
0.0,
text,
&blob,
vec_conn,
)
.await
{
Ok(()) => stats.docs_embedded += 1,
Err(e) => {
warn!(path = %rel_path, section = i, error = %e, "Failed to store doc embedding");
stats.errors += 1;
}
}
}
if let Err(e) = conn.execute("COMMIT", ()).await {
warn!(error = %e, "Failed to COMMIT transaction for doc batch");
}
}
Err(e) => {
warn!(error = %e, path = %rel_path, "Doc embedding batch failed");
stats.errors += texts.len();
}
}
}
#[allow(clippy::too_many_arguments)]
async fn flush_context_batch(
conn: &Connection,
embedder: &mut Embedder,
rel_path: &str,
texts: &[String],
section_ids: &[i64],
head_commit: Option<&str>,
model_name: &str,
stats: &mut PopulateStats,
vec_conn: Option<&VecConnection>,
) {
let text_refs: Vec<&str> = texts.iter().map(String::as_str).collect();
match embedder.embed_batch(&text_refs) {
Ok(vectors) => {
if let Err(e) = conn.execute("BEGIN", ()).await {
warn!(error = %e, "Failed to BEGIN transaction for context batch");
}
for (i, (text, vec)) in texts.iter().zip(vectors.iter()).enumerate() {
let blob = encode_vector(vec);
let sid = section_ids.get(i).copied().unwrap_or(i as i64);
match store::upsert_embedding(
conn,
"context",
rel_path,
Some(sid),
model_name,
head_commit,
0.0,
text,
&blob,
vec_conn,
)
.await
{
Ok(()) => stats.contexts_embedded += 1,
Err(e) => {
warn!(path = %rel_path, section = i, error = %e, "Failed to store context embedding");
stats.errors += 1;
}
}
}
if let Err(e) = conn.execute("COMMIT", ()).await {
warn!(error = %e, "Failed to COMMIT transaction for context batch");
}
}
Err(e) => {
warn!(error = %e, path = %rel_path, "Context embedding batch failed");
stats.errors += texts.len();
}
}
}
#[allow(clippy::too_many_arguments)]
async fn flush_batch(
conn: &Connection,
embedder: &mut Embedder,
symbols: &[SymbolRow],
texts: &[String],
staleness: &[f64],
head_commit: Option<&str>,
model_name: &str,
stats: &mut PopulateStats,
vec_conn: Option<&VecConnection>,
) {
let text_refs: Vec<&str> = texts.iter().map(String::as_str).collect();
match embedder.embed_batch(&text_refs) {
Ok(vectors) => {
if let Err(e) = conn.execute("BEGIN", ()).await {
warn!(error = %e, "Failed to BEGIN transaction for batch");
}
for (idx, (sym, (text, vec))) in symbols
.iter()
.zip(texts.iter().zip(vectors.iter()))
.enumerate()
{
let blob = encode_vector(vec);
let sym_staleness = staleness.get(idx).copied().unwrap_or(0.0) as f32;
match store::upsert_embedding(
conn,
"symbol",
&sym.file,
Some(sym.rowid),
model_name,
head_commit,
sym_staleness,
text,
&blob,
vec_conn,
)
.await
{
Ok(()) => stats.symbols_embedded += 1,
Err(e) => {
warn!(symbol = %sym.name, file = %sym.file, error = %e, "Failed to store embedding");
stats.errors += 1;
}
}
}
if let Err(e) = conn.execute("COMMIT", ()).await {
warn!(error = %e, "Failed to COMMIT transaction for batch");
}
}
Err(e) => {
warn!(error = %e, batch_size = symbols.len(), "Embedding batch failed");
stats.errors += symbols.len();
}
}
}
async fn load_symbols(
conn: &Connection,
changed_paths: Option<&[String]>,
) -> anyhow::Result<Vec<SymbolRow>> {
let sql = "SELECT rowid, file, name, kind, start_line, end_line, parent FROM symbols";
let mut rows = conn.query(sql, ()).await?;
let mut symbols = Vec::new();
let path_set: Option<std::collections::HashSet<&str>> =
changed_paths.map(|paths| paths.iter().map(String::as_str).collect());
while let Some(row) = rows.next().await? {
let file: String = row.get(1)?;
if path_set
.as_ref()
.is_some_and(|set| !set.contains(file.as_str()))
{
continue;
}
symbols.push(SymbolRow {
rowid: row.get(0)?,
file,
name: row.get(2)?,
kind: row.get(3)?,
start_line: row.get(4)?,
end_line: row.get(5)?,
parent: row.get(6)?,
});
}
Ok(symbols)
}
async fn load_all_callers(conn: &Connection) -> HashMap<String, Vec<String>> {
let mut map: HashMap<String, Vec<String>> = HashMap::new();
let Ok(mut rows) = conn
.query(
"SELECT callee_name, caller_symbol, COUNT(*) as cnt \
FROM calls \
GROUP BY callee_name, caller_symbol \
ORDER BY callee_name, cnt DESC",
(),
)
.await
else {
return map;
};
while let Ok(Some(row)) = rows.next().await {
let Ok(callee) = row.get::<String>(0) else {
continue;
};
let Ok(caller) = row.get::<String>(1) else {
continue;
};
let entry = map.entry(callee).or_default();
if entry.len() < 10 {
entry.push(caller);
}
}
map
}
async fn load_all_callees(conn: &Connection) -> HashMap<(String, String), Vec<String>> {
let mut map: HashMap<(String, String), Vec<String>> = HashMap::new();
let Ok(mut rows) = conn
.query(
"SELECT caller_symbol, caller_file, callee_name FROM calls ORDER BY caller_symbol, caller_file",
(),
)
.await
else {
return map;
};
while let Ok(Some(row)) = rows.next().await {
let Ok(caller_sym) = row.get::<String>(0) else {
continue;
};
let Ok(caller_file) = row.get::<String>(1) else {
continue;
};
let Ok(callee) = row.get::<String>(2) else {
continue;
};
let entry = map.entry((caller_sym, caller_file)).or_default();
if entry.len() < 10 {
entry.push(callee);
}
}
map
}
async fn load_all_doc_comments(conn: &Connection) -> HashMap<(String, String), String> {
let mut map: HashMap<(String, String), String> = HashMap::new();
let Ok(mut rows) = conn
.query(
"SELECT name, file, attribute FROM symbol_attributes WHERE attribute LIKE 'doc:%' ORDER BY name, file",
(),
)
.await
else {
return map;
};
while let Ok(Some(row)) = rows.next().await {
let Ok(name) = row.get::<String>(0) else {
continue;
};
let Ok(file) = row.get::<String>(1) else {
continue;
};
let Ok(attr) = row.get::<String>(2) else {
continue;
};
if let Some(doc) = attr.strip_prefix("doc:") {
let entry = map.entry((name, file)).or_default();
if !entry.is_empty() {
entry.push('\n');
}
entry.push_str(doc);
}
}
map
}
async fn load_co_change_map(
conn: &Connection,
) -> anyhow::Result<std::collections::HashMap<String, Vec<String>>> {
let mut map: std::collections::HashMap<String, Vec<String>> = std::collections::HashMap::new();
let mut rows = match conn
.query(
"SELECT file_a, file_b FROM co_change_edges ORDER BY count DESC",
(),
)
.await
{
Ok(rows) => rows,
Err(_) => {
return Ok(map);
}
};
while let Some(row) = rows.next().await? {
let a: String = row.get(0)?;
let b: String = row.get(1)?;
map.entry(a.clone()).or_default().push(b.clone());
map.entry(b).or_default().push(a);
}
Ok(map)
}