use std::path::Path;
use anyhow::{Context, Result};
use rusqlite::Connection;
use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
use patina::embeddings::create_embedder;
use super::super::{ScryOptions, ScryResult};
use super::enrichment::{enrich_results, SearchResults};
pub fn get_paths(options: &ScryOptions) -> Result<(String, String)> {
if let Some(ref repo_name) = options.repo {
let db_path = crate::commands::repo::get_db_path(repo_name)?;
let embeddings_dir = db_path.replace("patina.db", "embeddings/e5-base-v2/projections");
Ok((db_path, embeddings_dir))
} else {
let model = get_embedding_model();
Ok((
".patina/local/data/patina.db".to_string(),
format!(".patina/local/data/embeddings/{}/projections", model),
))
}
}
pub fn get_embedding_model() -> String {
patina::project::load(std::path::Path::new("."))
.ok()
.map(|c| c.embeddings.model)
.unwrap_or_else(|| "e5-base-v2".to_string())
}
pub fn scry_text(query: &str, options: &ScryOptions) -> Result<Vec<ScryResult>> {
let (db_path, embeddings_dir) = get_paths(options)?;
let dimension = if let Some(ref dim) = options.dimension {
dim.as_str()
} else {
detect_best_dimension(&embeddings_dir)
};
let index_path = format!("{}/{}.usearch", embeddings_dir, dimension);
if !Path::new(&index_path).exists() {
anyhow::bail!(
"Semantic index not found: {}\n\
Run 'patina oxidize' to build the knowledge domain index.\n\
For keyword search, use 'patina assay search <query>' instead.",
index_path
);
}
println!("Embedding query...");
let mut embedder = create_embedder()?;
let query_embedding = embedder.embed_query(query)?;
let projection_path = format!("{}/{}.safetensors", embeddings_dir, dimension);
let projected = if Path::new(&projection_path).exists() {
use crate::commands::oxidize::trainer::Projection;
let projection = Projection::load_safetensors(Path::new(&projection_path))?;
projection.forward(&query_embedding)
} else {
query_embedding
};
println!("Searching {} index...", dimension);
let index_options = IndexOptions {
dimensions: 256,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let index = Index::new(&index_options).with_context(|| "Failed to create index")?;
index
.load(&index_path)
.with_context(|| format!("Failed to load index: {}", index_path))?;
let matches = index
.search(&projected, options.limit)
.with_context(|| "Vector search failed")?;
let results = SearchResults {
keys: matches.keys,
distances: matches.distances,
};
let conn = Connection::open(&db_path)
.with_context(|| format!("Failed to open database: {}", db_path))?;
let enriched = enrich_results(&conn, &results, dimension, options.min_score)?;
Ok(enriched)
}
pub fn scry_file(file_path: &str, options: &ScryOptions) -> Result<Vec<ScryResult>> {
let (db_path, embeddings_dir) = get_paths(options)?;
let dimension = options.dimension.as_deref().unwrap_or("temporal");
let index_path = format!("{}/{}.usearch", embeddings_dir, dimension);
if !Path::new(&index_path).exists() {
anyhow::bail!(
"Index not found: {}. Run 'patina oxidize' first.",
index_path
);
}
let conn = Connection::open(&db_path)
.with_context(|| format!("Failed to open database: {}", db_path))?;
let files: Vec<String> = {
let mut stmt = conn.prepare(
"SELECT DISTINCT file_a FROM co_changes
UNION
SELECT DISTINCT file_b FROM co_changes
ORDER BY 1",
)?;
let mut rows = stmt.query([])?;
let mut files = Vec::new();
while let Some(row) = rows.next()? {
files.push(row.get(0)?);
}
files
};
let file_index = files
.iter()
.position(|f| f == file_path || f.ends_with(file_path) || file_path.ends_with(f))
.ok_or_else(|| anyhow::anyhow!("File '{}' not found in {} index", file_path, dimension))?;
println!("Found file at index {} in {} index", file_index, dimension);
let index_options = IndexOptions {
dimensions: 256,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let index = Index::new(&index_options).with_context(|| "Failed to create index")?;
index
.load(&index_path)
.with_context(|| format!("Failed to load index: {}", index_path))?;
let mut file_vector = vec![0.0_f32; 256];
index
.get(file_index as u64, &mut file_vector)
.with_context(|| format!("Failed to get vector for file index {}", file_index))?;
println!("Searching for neighbors...");
let matches = index
.search(&file_vector, options.limit + 1)
.with_context(|| "Vector search failed")?;
let mut results = Vec::new();
for i in 0..matches.keys.len() {
let key = matches.keys[i] as usize;
let distance = matches.distances[i];
let score = 1.0 - distance;
if key == file_index {
continue;
}
if score < options.min_score {
continue;
}
if key < files.len() {
let related_file = &files[key];
results.push(ScryResult {
id: key as i64,
event_type: "file.cochange".to_string(),
source_id: related_file.clone(),
timestamp: String::new(),
content: format!("Co-changes with: {}", file_path),
score,
});
}
if results.len() >= options.limit {
break;
}
}
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(results)
}
pub fn scry_belief(belief_id: &str, options: &ScryOptions) -> Result<Vec<ScryResult>> {
let (db_path, embeddings_dir) = get_paths(options)?;
let conn = Connection::open(&db_path)
.with_context(|| format!("Failed to open database: {}", db_path))?;
let rowid: i64 = conn
.query_row(
"SELECT rowid FROM beliefs WHERE id = ?",
[belief_id],
|row| row.get(0),
)
.with_context(|| format!("Belief '{}' not found in database", belief_id))?;
const BELIEF_ID_OFFSET: i64 = 4_000_000_000;
let belief_index = (BELIEF_ID_OFFSET + rowid) as u64;
let dimension = detect_best_dimension(&embeddings_dir);
let index_path = format!("{}/{}.usearch", embeddings_dir, dimension);
if !Path::new(&index_path).exists() {
anyhow::bail!(
"Semantic index not found: {}. Run 'patina oxidize' first.",
index_path
);
}
let index_options = IndexOptions {
dimensions: 256,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let index = Index::new(&index_options).with_context(|| "Failed to create index")?;
index
.load(&index_path)
.with_context(|| format!("Failed to load index: {}", index_path))?;
let mut belief_vector = vec![0.0_f32; 256];
index
.get(belief_index, &mut belief_vector)
.with_context(|| {
format!(
"Failed to get vector for belief '{}' (index {})",
belief_id, belief_index
)
})?;
println!("Searching for neighbors of belief '{}'...", belief_id);
let search_limit = if options.content_type.is_some() {
options.limit * 5 + 2
} else {
options.limit + 2 };
let matches = index
.search(&belief_vector, search_limit)
.with_context(|| "Vector search failed")?;
let results = SearchResults {
keys: matches.keys,
distances: matches.distances,
};
let mut enriched = enrich_results(&conn, &results, dimension, options.min_score)?;
enriched.retain(|r| {
if r.event_type == "belief.surface" && r.source_id == belief_id {
return false; }
if r.event_type.starts_with("pattern.") && r.source_id.contains(belief_id) {
return false; }
true
});
if let Some(ref type_filter) = options.content_type {
enriched.retain(|r| match type_filter.as_str() {
"code" => r.event_type.starts_with("code."),
"commits" => r.event_type == "git.commit",
"sessions" => r.event_type.starts_with("session."),
"patterns" => r.event_type.starts_with("pattern."),
"beliefs" => r.event_type == "belief.surface",
_ => true,
});
}
enriched.truncate(options.limit);
Ok(enriched)
}
pub fn scry(query: &str, options: &ScryOptions) -> Result<Vec<ScryResult>> {
scry_text(query, options)
}
pub fn detect_best_dimension(embeddings_dir: &str) -> &'static str {
let knowledge_path = format!("{}/knowledge.usearch", embeddings_dir);
if Path::new(&knowledge_path).exists() {
return "knowledge";
}
let semantic_path = format!("{}/semantic.usearch", embeddings_dir);
if Path::new(&semantic_path).exists() {
return "semantic";
}
let dependency_path = format!("{}/dependency.usearch", embeddings_dir);
if Path::new(&dependency_path).exists() {
return "dependency";
}
let temporal_path = format!("{}/temporal.usearch", embeddings_dir);
if Path::new(&temporal_path).exists() {
return "temporal";
}
"knowledge"
}