pub mod beliefs;
pub mod commits;
pub mod dependency;
pub mod pairs;
pub mod recipe;
pub mod temporal;
pub mod trainer;
use anyhow::{Context, Result};
use beliefs::generate_belief_pairs;
use commits::generate_commit_pairs;
use dependency::generate_dependency_pairs;
use pairs::TrainingPair;
use recipe::{OxidizeRecipe, ProjectionConfig};
use temporal::generate_temporal_pairs;
use trainer::Projection;
pub fn oxidize() -> Result<()> {
println!("๐งช Oxidize - Build embeddings and projections");
let recipe = OxidizeRecipe::load()?;
let model_name = recipe.get_model_name()?;
println!("โ
Recipe loaded: {}", model_name);
println!(" Projections: {}", recipe.projections.len());
for (name, config) in &recipe.projections {
let is_raw = matches!(name.as_str(), "knowledge" | "semantic" | "sessions");
if is_raw {
println!(
" - {}: {}d raw E5 (no projection)",
name,
config.input_dim(&recipe)?,
);
} else {
println!(
" - {}: {}โ{}โ{} ({} epochs)",
name,
config.input_dim(&recipe)?,
config.hidden_dim(),
config.output_dim(),
config.epochs
);
}
}
let db_path = ".patina/local/data/patina.db";
let output_dir = format!(".patina/local/data/embeddings/{}/projections", model_name);
std::fs::create_dir_all(&output_dir)?;
use patina::embeddings::create_embedder;
let mut embedder = create_embedder()?;
let mut sorted_projections: Vec<_> = recipe.projections.iter().collect();
sorted_projections.sort_by(|a, b| a.0.cmp(b.0));
for (name, config) in sorted_projections {
println!("\n{}", "=".repeat(60));
let is_raw_domain = matches!(name.as_str(), "knowledge" | "semantic" | "sessions");
if is_raw_domain {
println!("๐ฎ Building {} index (raw E5, no projection)...", name);
println!("{}", "=".repeat(60));
let weights_path = format!("{}/{}.safetensors", output_dir, name);
if std::path::Path::new(&weights_path).exists() {
std::fs::remove_file(&weights_path)?;
println!(" ๐๏ธ Deleted stale projection: {}", weights_path);
}
let input_dim = config.input_dim(&recipe)?;
println!("\n๐ Building USearch index ({}d raw E5)...", input_dim);
build_projection_index(name, db_path, &mut embedder, None, input_dim, &output_dir)?;
} else {
println!("๐ Training {} projection...", name);
println!("{}", "=".repeat(60));
let projection = train_projection(name, config, &recipe, db_path, &mut embedder)?;
println!("\n๐พ Saving projection weights...");
let weights_path = format!("{}/{}.safetensors", output_dir, name);
projection.save_safetensors(std::path::Path::new(&weights_path))?;
println!(" Saved to: {}", weights_path);
println!("\n๐ Building USearch index...");
build_projection_index(
name,
db_path,
&mut embedder,
Some(&projection),
config.output_dim(),
&output_dir,
)?;
}
println!("\nโ
{} complete!", name);
}
println!("\n{}", "=".repeat(60));
println!("โ
All domains built!");
println!(" Output: {}", output_dir);
Ok(())
}
pub fn oxidize_for_repo(repo_name: &str) -> Result<()> {
use std::os::unix::fs::symlink;
let repo_path = crate::commands::repo::get_path(repo_name)?;
println!("๐งช Oxidize - Building embeddings for {}\n", repo_name);
println!(" Path: {}", repo_path.display());
let original_dir = std::env::current_dir()?;
let resources_path = original_dir.join("resources");
std::env::set_current_dir(&repo_path)?;
let config_path = repo_path.join(".patina/config.toml");
if config_path.exists() {
let config_content = std::fs::read_to_string(&config_path)?;
if !config_content.contains("[embeddings]") {
println!(" Adding embeddings config...");
let updated = format!("{}\n[embeddings]\nmodel = \"e5-base-v2\"\n", config_content);
std::fs::write(&config_path, updated)?;
}
}
let recipe_path = repo_path.join(".patina/oxidize.yaml");
if !recipe_path.exists() {
println!(" Creating oxidize.yaml recipe...\n");
let recipe_content = r#"# Oxidize Recipe for reference repo
version: 1
embedding_model: e5-base-v2
projections:
dependency:
layers: [768, 1024, 256]
epochs: 10
batch_size: 32
temporal:
layers: [768, 1024, 256]
epochs: 10
batch_size: 32
knowledge:
layers: [768, 1024, 256]
epochs: 10
batch_size: 32
"#;
std::fs::write(&recipe_path, recipe_content)?;
}
let repo_resources = repo_path.join("resources");
if !repo_resources.exists() && resources_path.exists() {
println!(" Linking model resources...\n");
symlink(&resources_path, &repo_resources).context("Failed to create resources symlink")?;
}
let result = oxidize();
if repo_resources.is_symlink() {
let _ = std::fs::remove_file(&repo_resources);
}
std::env::set_current_dir(original_dir)?;
result
}
fn train_projection(
name: &str,
config: &ProjectionConfig,
recipe: &OxidizeRecipe,
db_path: &str,
embedder: &mut Box<dyn patina::embeddings::EmbeddingEngine>,
) -> Result<Projection> {
let pairs: Vec<TrainingPair> = match name {
"knowledge" | "semantic" => {
println!(" Strategy: commit pairs + belief-pattern co-references");
let mut all_pairs = generate_commit_pairs(db_path)?;
match generate_belief_pairs(db_path) {
Ok(belief_pairs) => {
println!(" Adding {} belief co-reference pairs", belief_pairs.len());
all_pairs.extend(belief_pairs);
}
Err(e) => {
println!(" โ ๏ธ Belief pairs skipped: {}", e);
}
}
all_pairs.sort_by(|a, b| a.anchor.cmp(&b.anchor));
all_pairs
}
"sessions" => {
println!(" Strategy: commit-based pairs (shared training signal)");
generate_commit_pairs(db_path)?
}
"temporal" => {
println!(" Strategy: files that co-change are related");
generate_temporal_pairs(db_path)?
}
"dependency" => {
println!(" Strategy: functions that call each other are related");
generate_dependency_pairs(db_path)?
}
_ => {
anyhow::bail!(
"Unknown projection type: {}. Supported: knowledge, sessions, semantic, temporal, dependency",
name
);
}
};
println!(" Generated {} training pairs", pairs.len());
println!("\n๐ฎ Generating embeddings...");
let mut anchors = Vec::new();
let mut positives = Vec::new();
let mut negatives = Vec::new();
for pair in &pairs {
anchors.push(embedder.embed_passage(&pair.anchor)?);
positives.push(embedder.embed_passage(&pair.positive)?);
negatives.push(embedder.embed_passage(&pair.negative)?);
}
println!(" Embedded {} triplets", anchors.len());
let input_dim = config.input_dim(recipe)?;
println!(
"\n๐ง Training MLP: {}โ{}โ{}...",
input_dim,
config.hidden_dim(),
config.output_dim()
);
let mut projection = Projection::new(input_dim, config.hidden_dim(), config.output_dim());
let learning_rate = 0.001;
let _losses = projection.train(
&anchors,
&positives,
&negatives,
config.epochs,
learning_rate,
)?;
println!(" Training complete!");
Ok(projection)
}
fn build_projection_index(
projection_name: &str,
db_path: &str,
embedder: &mut Box<dyn patina::embeddings::EmbeddingEngine>,
projection: Option<&Projection>,
index_dim: usize,
output_dir: &str,
) -> Result<()> {
use rusqlite::Connection;
use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
let conn = Connection::open(db_path)
.with_context(|| format!("Failed to open database: {}", db_path))?;
let events: Vec<(i64, String)> = match projection_name {
"knowledge" | "semantic" => query_knowledge_corpus(&conn)?,
"sessions" => query_session_corpus(&conn)?,
"temporal" => query_file_events(&conn)?,
"dependency" => dependency::query_function_events(&conn)?,
_ => {
println!(" โ ๏ธ No index builder for {} - skipping", projection_name);
return Ok(());
}
};
println!(" Found {} items to index", events.len());
if events.is_empty() {
println!(" โ ๏ธ No items found - skipping index build");
return Ok(());
}
let options = IndexOptions {
dimensions: index_dim,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let index = Index::new(&options).context("Failed to create USearch index")?;
index
.reserve(events.len())
.context("Failed to reserve index capacity")?;
let mode = if projection.is_some() {
"projecting"
} else {
"raw"
};
println!(" Embedding vectors ({} mode)...", mode);
for (id, content) in &events {
let embedding = embedder
.embed_passage(content)
.context("Failed to generate embedding")?;
let vector = match projection {
Some(proj) => proj.forward(&embedding),
None => embedding,
};
index
.add(*id as u64, &vector)
.context("Failed to add vector to index")?;
}
let index_path = format!("{}/{}.usearch", output_dir, projection_name);
index
.save(&index_path)
.context("Failed to save USearch index")?;
println!(" โ
Index built: {} vectors", events.len());
println!(" Saved to: {}", index_path);
Ok(())
}
pub(crate) fn query_knowledge_corpus(conn: &rusqlite::Connection) -> Result<Vec<(i64, String)>> {
let mut events = Vec::new();
const PATTERN_ID_OFFSET: i64 = 2_000_000_000;
const COMMIT_ID_OFFSET: i64 = 3_000_000_000;
const BELIEF_ID_OFFSET: i64 = 4_000_000_000;
const MAX_CONTENT_CHARS: usize = 1500;
let has_patterns: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='patterns'",
[],
|row| row.get::<_, i64>(0),
)
.map(|c| c > 0)
.unwrap_or(false);
if has_patterns {
let mut stmt = conn.prepare(
"SELECT p.rowid, p.id, p.title, p.purpose, f.content, p.tags, p.file_path
FROM patterns p
LEFT JOIN pattern_fts f ON p.id = f.id",
)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let rowid: i64 = row.get(0)?;
let id: String = row.get(1)?;
let title: String = row.get(2)?;
let purpose: Option<String> = row.get(3)?;
let content: Option<String> = row.get(4)?;
let tags: Option<String> = row.get(5)?;
let file_path: String = row.get(6)?;
let mut desc = format!("Pattern: {} - {}", title, id);
if let Some(p) = purpose {
desc.push_str(&format!(". Purpose: {}", p));
}
if let Some(t) = tags {
if !t.is_empty() {
desc.push_str(&format!(". Tags: {}", t));
}
}
if let Some(c) = content {
let content_preview: String = c.chars().take(MAX_CONTENT_CHARS).collect();
desc.push_str(&format!(". Content: {}", content_preview));
}
desc.push_str(&format!(". File: {}", file_path));
events.push((PATTERN_ID_OFFSET + rowid, desc));
}
}
let pattern_count = events.len();
let has_commit_files: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='commit_files'",
[],
|row| row.get::<_, i64>(0),
)
.map(|c| c > 0)
.unwrap_or(false);
let commit_query = if has_commit_files {
"SELECT c.rowid, c.sha, c.message FROM commits c
WHERE c.message IS NOT NULL AND length(c.message) > 30
AND (
length(c.message) > 75
OR c.message LIKE '%belief%'
OR c.message LIKE 'release%'
OR (SELECT COUNT(*) FROM commit_files cf WHERE cf.sha = c.sha) > 5
)
ORDER BY c.rowid"
} else {
"SELECT rowid, sha, message FROM commits
WHERE message IS NOT NULL AND length(message) > 30
AND (
length(message) > 75
OR message LIKE '%belief%'
OR message LIKE 'release%'
)
ORDER BY rowid"
};
let mut stmt = conn.prepare(commit_query)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let rowid: i64 = row.get(0)?;
let sha: String = row.get(1)?;
let message: String = row.get(2)?;
let desc = format!("Commit {}: {}", &sha[..7.min(sha.len())], message);
events.push((COMMIT_ID_OFFSET + rowid, desc));
}
let commit_count = events.len() - pattern_count;
let has_beliefs: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='beliefs'",
[],
|row| row.get::<_, i64>(0),
)
.map(|c| c > 0)
.unwrap_or(false);
let has_belief_fts: bool = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='belief_fts'",
[],
|row| row.get::<_, i64>(0),
)
.map(|c| c > 0)
.unwrap_or(false);
if has_beliefs {
let belief_query = if has_belief_fts {
"SELECT b.rowid, b.id, b.statement, b.persona, b.facets,
b.confidence, b.entrenchment, bf.content
FROM beliefs b
LEFT JOIN belief_fts bf ON b.id = bf.id
WHERE b.status = 'active'"
} else {
"SELECT rowid, id, statement, persona, facets,
confidence, entrenchment, NULL as content
FROM beliefs
WHERE status = 'active'"
};
let mut stmt = conn.prepare(belief_query)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let rowid: i64 = row.get(0)?;
let id: String = row.get(1)?;
let statement: String = row.get(2)?;
let persona: String = row.get(3)?;
let facets: Option<String> = row.get(4)?;
let confidence: f64 = row.get(5)?;
let entrenchment: String = row.get(6)?;
let fts_content: Option<String> = row.get(7)?;
let mut desc = format!("Belief: {} - {}", id, statement);
desc.push_str(&format!(". Persona: {}", persona));
if let Some(f) = &facets {
if !f.is_empty() {
desc.push_str(&format!(". Facets: {}", f));
}
}
desc.push_str(&format!(
". Confidence: {:.2}, Entrenchment: {}",
confidence, entrenchment
));
if let Some(content) = fts_content {
let body = strip_frontmatter(&content);
if !body.is_empty() {
let remaining = MAX_CONTENT_CHARS.saturating_sub(desc.len());
if remaining > 50 {
let preview: String = body.chars().take(remaining).collect();
desc.push_str(&format!(". {}", preview));
}
}
}
events.push((BELIEF_ID_OFFSET + rowid, desc));
}
}
let belief_count = events.len() - pattern_count - commit_count;
println!(
" Knowledge corpus: {} patterns + {} commits + {} beliefs = {} items",
pattern_count,
commit_count,
belief_count,
events.len()
);
Ok(events)
}
fn strip_frontmatter(content: &str) -> &str {
if !content.starts_with("---") {
return content;
}
if let Some(end) = content[3..].find("\n---") {
let after_frontmatter = &content[3 + end + 4..];
after_frontmatter.trim_start()
} else {
content
}
}
fn query_session_corpus(conn: &rusqlite::Connection) -> Result<Vec<(i64, String)>> {
let mut stmt = conn.prepare(
"SELECT MIN(seq) as seq, source_id, event_type,
json_extract(data, '$.content') as content
FROM eventlog
WHERE event_type IN ('session.decision', 'session.pattern',
'session.work', 'session.context')
AND length(json_extract(data, '$.content')) > 50
GROUP BY source_id, event_type, json_extract(data, '$.content')
ORDER BY seq",
)?;
let mut events = Vec::new();
let mut rows = stmt.query([])?;
let mut type_counts = std::collections::HashMap::new();
while let Some(row) = rows.next()? {
let seq: i64 = row.get(0)?;
let source_id: String = row.get(1)?;
let event_type: String = row.get(2)?;
let content: String = row.get(3)?;
let type_label = event_type.strip_prefix("session.").unwrap_or(&event_type);
let desc = format!("Session {} ({}): {}", source_id, type_label, content);
events.push((seq, desc));
*type_counts.entry(type_label.to_string()).or_insert(0) += 1;
}
let type_summary: Vec<String> = type_counts
.iter()
.map(|(k, v)| format!("{} {}", v, k))
.collect();
println!(
" Session corpus: {} items ({})",
events.len(),
type_summary.join(" + ")
);
Ok(events)
}
fn query_file_events(conn: &rusqlite::Connection) -> Result<Vec<(i64, String)>> {
let mut stmt = conn.prepare(
"SELECT DISTINCT file_a FROM co_changes
UNION
SELECT DISTINCT file_b FROM co_changes
ORDER BY 1",
)?;
let mut events = Vec::new();
let mut rows = stmt.query([])?;
let mut idx: i64 = 0;
while let Some(row) = rows.next()? {
let file_path: String = row.get(0)?;
let text = temporal::file_to_text(&file_path);
events.push((idx, text));
idx += 1;
}
Ok(events)
}