mod verification;
use anyhow::Result;
use regex::Regex;
use rusqlite::Connection;
use serde_json::json;
use std::path::Path;
use std::time::Instant;
use super::database;
use super::ScrapeStats;
const BELIEFS_DIR: &str = "layer/surface/epistemic/beliefs";
#[derive(Debug)]
struct ParsedBelief {
id: String,
statement: String, persona: String, facets: Vec<String>, confidence: f64, entrenchment: String, status: String, extracted: Option<String>,
revised: Option<String>,
content: String, file_path: String,
metrics: BeliefMetrics,
verification_queries: Vec<verification::VerificationQuery>,
verification: verification::VerificationAggregates,
}
#[derive(Debug, Default)]
struct BeliefMetrics {
cited_by_beliefs: i32, cited_by_sessions: i32, applied_in: i32,
evidence_count: i32, evidence_verified: i32, defeated_attacks: i32, external_sources: i32,
endorsed: bool,
grounding_score: f32, grounding_code_count: i32, grounding_commit_count: i32, grounding_session_count: i32, grounding_forge_count: i32, }
fn create_materialized_views(conn: &Connection) -> Result<()> {
conn.execute_batch(
r#"
-- Beliefs view (materialized from belief.* events)
CREATE TABLE IF NOT EXISTS beliefs (
id TEXT PRIMARY KEY,
statement TEXT,
persona TEXT,
facets TEXT,
confidence REAL,
entrenchment TEXT,
status TEXT,
extracted TEXT,
revised TEXT,
file_path TEXT,
-- E4: Computed use/truth metrics
cited_by_beliefs INTEGER DEFAULT 0,
cited_by_sessions INTEGER DEFAULT 0,
applied_in INTEGER DEFAULT 0,
evidence_count INTEGER DEFAULT 0,
evidence_verified INTEGER DEFAULT 0,
defeated_attacks INTEGER DEFAULT 0,
external_sources INTEGER DEFAULT 0,
endorsed INTEGER DEFAULT 0,
-- E4.6a: Semantic grounding metrics
grounding_score REAL DEFAULT 0.0,
grounding_code_count INTEGER DEFAULT 0,
grounding_commit_count INTEGER DEFAULT 0,
grounding_session_count INTEGER DEFAULT 0,
grounding_forge_count INTEGER DEFAULT 0
);
-- FTS5 for belief content search
CREATE VIRTUAL TABLE IF NOT EXISTS belief_fts USING fts5(
id,
statement,
facets,
content,
tokenize='porter unicode61'
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_beliefs_persona ON beliefs(persona);
CREATE INDEX IF NOT EXISTS idx_beliefs_status ON beliefs(status);
CREATE INDEX IF NOT EXISTS idx_beliefs_entrenchment ON beliefs(entrenchment);
-- E4.6a-fix: Multi-hop code grounding (belief → commit → file → function)
CREATE TABLE IF NOT EXISTS belief_code_reach (
belief_id TEXT NOT NULL,
file_path TEXT NOT NULL,
reach_score REAL,
commit_count INTEGER,
function_count INTEGER,
hop_path TEXT,
PRIMARY KEY (belief_id, file_path)
);
CREATE INDEX IF NOT EXISTS idx_belief_code_reach_file ON belief_code_reach(file_path);
"#,
)?;
let columns_to_add = [
("cited_by_beliefs", "INTEGER DEFAULT 0"),
("cited_by_sessions", "INTEGER DEFAULT 0"),
("applied_in", "INTEGER DEFAULT 0"),
("evidence_count", "INTEGER DEFAULT 0"),
("evidence_verified", "INTEGER DEFAULT 0"),
("defeated_attacks", "INTEGER DEFAULT 0"),
("external_sources", "INTEGER DEFAULT 0"),
("endorsed", "INTEGER DEFAULT 0"),
("grounding_score", "REAL DEFAULT 0.0"),
("grounding_code_count", "INTEGER DEFAULT 0"),
("grounding_commit_count", "INTEGER DEFAULT 0"),
("grounding_session_count", "INTEGER DEFAULT 0"),
("grounding_forge_count", "INTEGER DEFAULT 0"),
];
for (col_name, col_type) in &columns_to_add {
let sql = format!("ALTER TABLE beliefs ADD COLUMN {} {}", col_name, col_type);
let _ = conn.execute(&sql, []);
}
verification::create_tables(conn)?;
Ok(())
}
fn parse_belief_file(path: &Path) -> Result<ParsedBelief> {
let content = std::fs::read_to_string(path)?;
let file_path = path.to_string_lossy().to_string();
let mut id = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown")
.to_string();
let mut persona = "architect".to_string();
let mut facets = Vec::new();
let mut confidence = 0.5;
let mut entrenchment = "medium".to_string();
let mut status = "active".to_string();
let mut extracted = None;
let mut revised = None;
if let Some(after_start) = content.strip_prefix("---") {
if let Some(end) = after_start.find("---") {
let frontmatter = &after_start[..end];
if let Some(cap) = regex::RegexBuilder::new(r"^id:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
id = cap[1].trim().to_string();
}
if let Some(cap) = regex::RegexBuilder::new(r"^persona:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
persona = cap[1].trim().to_string();
}
if let Some(cap) = Regex::new(r"facets:\s*\[([^\]]+)\]")
.ok()
.and_then(|re| re.captures(frontmatter))
{
facets = cap[1]
.split(',')
.map(|s| s.trim().trim_matches(|c| c == '"' || c == '\'').to_string())
.filter(|s| !s.is_empty())
.collect();
}
if let Some(cap) = regex::RegexBuilder::new(r"^\s+score:\s*([\d.]+)")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
confidence = cap[1].trim().parse().unwrap_or(0.5);
}
if let Some(cap) = regex::RegexBuilder::new(r"^entrenchment:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
entrenchment = cap[1].trim().to_string();
}
if let Some(cap) = regex::RegexBuilder::new(r"^status:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
status = cap[1].trim().to_string();
}
if let Some(cap) = regex::RegexBuilder::new(r"^extracted:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
extracted = Some(cap[1].trim().to_string());
}
if let Some(cap) = regex::RegexBuilder::new(r"^revised:\s*(.+)$")
.multi_line(true)
.build()
.ok()
.and_then(|re| re.captures(frontmatter))
{
revised = Some(cap[1].trim().to_string());
}
}
}
let statement = extract_statement(&content, &id);
let mut metrics = extract_file_metrics(&content);
metrics.endorsed = true;
let verification_queries = verification::parse_verification_blocks(&content);
Ok(ParsedBelief {
id,
statement,
persona,
facets,
confidence,
entrenchment,
status,
extracted,
revised,
content,
file_path,
metrics,
verification_queries,
verification: verification::VerificationAggregates::default(),
})
}
fn extract_statement(content: &str, id: &str) -> String {
let heading_pattern = format!(r"^#\s+{}\s*$", regex::escape(id));
let heading_re = Regex::new(&heading_pattern).ok();
let mut found_heading = false;
for line in content.lines() {
if found_heading {
let trimmed = line.trim();
if !trimmed.is_empty() {
return trimmed.to_string();
}
}
if let Some(ref re) = heading_re {
if re.is_match(line) {
found_heading = true;
}
}
}
id.replace('-', " ")
}
fn extract_file_metrics(content: &str) -> BeliefMetrics {
let mut metrics = BeliefMetrics::default();
let mut current_section = "";
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("## ") {
current_section = trimmed;
continue;
}
if !trimmed.starts_with("- ") && !trimmed.starts_with("* ") {
continue;
}
match current_section {
s if s.starts_with("## Evidence") => {
metrics.evidence_count += 1;
}
s if s.starts_with("## Applied-In") => {
metrics.applied_in += 1;
}
s if s.starts_with("## Attacked-By") => {
if trimmed.contains("status: defeated") {
metrics.defeated_attacks += 1;
}
}
_ => {}
}
}
metrics
}
fn verify_evidence_section(content: &str, project_root: &Path) -> (i32, i32) {
let mut verified = 0;
let mut external = 0;
let wikilink_re = Regex::new(r"\[\[([^\]]+)\]\]").unwrap();
let bare_session_re = Regex::new(r"(?:^|\s)(session-)?(\d{8}-\d{6})[\s:,]").unwrap();
let session_id_re = Regex::new(r"\b(\d{8}-\d{6})\b").unwrap();
let mut in_evidence = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("## Evidence") {
in_evidence = true;
continue;
}
if trimmed.starts_with("## ") && in_evidence {
break;
}
if !in_evidence || !trimmed.starts_with("- ") {
continue;
}
let mut line_verified = false;
for cap in wikilink_re.captures_iter(trimmed) {
let link = &cap[1];
if try_verify_link(link, project_root) {
line_verified = true;
} else if is_external_source(link) {
external += 1;
line_verified = true;
}
}
if !line_verified {
for cap in bare_session_re.captures_iter(trimmed) {
let session_id = &cap[2];
let session_path = project_root
.join("layer/sessions")
.join(format!("{}.md", session_id));
if session_path.exists() {
line_verified = true;
break;
}
}
}
if !line_verified {
for cap in session_id_re.captures_iter(trimmed) {
let session_id = &cap[1];
let session_path = project_root
.join("layer/sessions")
.join(format!("{}.md", session_id));
if session_path.exists() {
line_verified = true;
break;
}
}
}
if line_verified {
verified += 1;
}
}
(verified, external)
}
fn try_verify_link(link: &str, project_root: &Path) -> bool {
if link.starts_with("session-") {
let session_id = link.strip_prefix("session-").unwrap_or(link);
let session_path = project_root
.join("layer/sessions")
.join(format!("{}.md", session_id));
if session_path.exists() {
return true;
}
if session_id.len() == 8 {
let sessions_dir = project_root.join("layer/sessions");
if let Ok(entries) = std::fs::read_dir(&sessions_dir) {
for entry in entries.filter_map(|e| e.ok()) {
if let Some(name) = entry.file_name().to_str() {
if name.starts_with(session_id) && name.ends_with(".md") {
return true;
}
}
}
}
}
return false;
}
if link.starts_with("commit-") {
let hash = link.strip_prefix("commit-").unwrap_or(link);
if let Ok(output) = std::process::Command::new("git")
.args(["rev-parse", "--verify", &format!("{}^{{commit}}", hash)])
.current_dir(project_root)
.output()
{
return output.status.success();
}
return false;
}
if link.starts_with("spec-") || link.starts_with("spec/") {
return true; }
let belief_path = project_root
.join("layer/surface/epistemic/beliefs")
.join(format!("{}.md", link));
if belief_path.exists() {
return true;
}
let direct_path = project_root.join(link);
if direct_path.exists() {
return true;
}
false
}
fn is_external_source(link: &str) -> bool {
let lower = link.to_lowercase();
lower.contains("paper")
|| lower.contains("helland")
|| lower.contains("blog")
|| lower.contains("rfc")
|| lower.contains("doi")
}
fn cross_reference_beliefs(beliefs: &mut [ParsedBelief], project_root: &Path) {
let sessions_dir = project_root.join("layer/sessions");
let belief_ids: Vec<String> = beliefs.iter().map(|b| b.id.clone()).collect();
let belief_contents: Vec<(String, String)> = beliefs
.iter()
.map(|b| (b.id.clone(), b.content.clone()))
.collect();
let mut session_citations: std::collections::HashMap<String, i32> =
std::collections::HashMap::new();
if sessions_dir.exists() {
if let Ok(entries) = std::fs::read_dir(&sessions_dir) {
for entry in entries.filter_map(|e| e.ok()) {
let path = entry.path();
if path.extension().map(|ext| ext == "md").unwrap_or(false) {
if let Ok(session_content) = std::fs::read_to_string(&path) {
for bid in &belief_ids {
if session_content.contains(bid.as_str()) {
*session_citations.entry(bid.clone()).or_insert(0) += 1;
}
}
}
}
}
}
}
for belief in beliefs.iter_mut() {
let bid = &belief.id;
let mut belief_citations = 0;
for (other_id, other_content) in &belief_contents {
if other_id != bid && other_content.contains(bid.as_str()) {
belief_citations += 1;
}
}
let (verified, external) = verify_evidence_section(&belief.content, project_root);
belief.metrics.cited_by_beliefs = belief_citations;
belief.metrics.cited_by_sessions = session_citations.get(bid).copied().unwrap_or(0);
belief.metrics.evidence_verified = verified;
belief.metrics.external_sources += external;
}
}
fn extract_belief_keywords(belief_id: &str) -> Vec<String> {
const STOP_WORDS: &[&str] = &[
"is", "the", "a", "an", "in", "on", "of", "for", "to", "and", "or", "not", "no", "over",
"vs", "with", "from", "by", "be", "at", "as", "do", "if", "its", "own", "first", "early",
"often", "before", "after", "always", "never", "every", "many", "requires", "needs",
];
belief_id
.split('-')
.filter(|w| w.len() >= 3)
.filter(|w| !STOP_WORDS.contains(w))
.map(|w| w.to_lowercase())
.collect()
}
fn is_source_code(path: &str) -> bool {
const SOURCE_EXTENSIONS: &[&str] = &[
".rs", ".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".c", ".cpp", ".h", ".java", ".rb",
".sh", ".sql", ".swift", ".kt", ".zig",
];
SOURCE_EXTENSIONS.iter().any(|ext| path.ends_with(ext))
}
fn compute_belief_grounding(conn: &Connection) -> Result<()> {
use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
let model = crate::commands::scry::internal::search::get_embedding_model();
let index_path = format!(
".patina/local/data/embeddings/{}/projections/semantic.usearch",
model
);
if !Path::new(&index_path).exists() {
return Ok(());
}
let index_options = IndexOptions {
dimensions: 256,
metric: MetricKind::Cos,
quantization: ScalarKind::F32,
..Default::default()
};
let index = Index::new(&index_options)?;
index.load(&index_path)?;
const BELIEF_ID_OFFSET: i64 = 4_000_000_000;
const FORGE_ID_OFFSET: i64 = 5_000_000_000;
const CODE_ID_OFFSET: i64 = 1_000_000_000;
const PATTERN_ID_OFFSET: i64 = 2_000_000_000;
const COMMIT_ID_OFFSET: i64 = 3_000_000_000;
const SEARCH_LIMIT: usize = 20;
const MIN_SCORE: f32 = 0.85;
conn.execute("DELETE FROM belief_code_reach", [])?;
let mut stmt = conn.prepare("SELECT rowid, id FROM beliefs")?;
let beliefs: Vec<(i64, String)> = stmt
.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
.filter_map(|r| r.ok())
.collect();
let total = beliefs.len();
let mut grounded = 0;
let mut total_reach_files = 0u32;
let mut total_source_files = 0u32;
let mut total_lexical_fallbacks = 0u32;
for (rowid, belief_id) in &beliefs {
let belief_key = (BELIEF_ID_OFFSET + rowid) as u64;
let mut vector = vec![0.0_f32; 256];
if index.get(belief_key, &mut vector).is_err() {
continue;
}
let magnitude: f32 = vector.iter().map(|v| v * v).sum::<f32>().sqrt();
if magnitude < 0.001 {
continue;
}
let matches = match index.search(&vector, SEARCH_LIMIT + 2) {
Ok(m) => m,
Err(_) => continue,
};
let mut commit_count = 0i32;
let mut session_count = 0i32;
let mut forge_count = 0i32;
let mut total_score: f32 = 0.0;
let mut total_count = 0i32;
let mut commit_neighbors: Vec<(String, f32)> = Vec::new();
for i in 0..matches.keys.len() {
let key = matches.keys[i] as i64;
let score = 1.0 - matches.distances[i];
if key == BELIEF_ID_OFFSET + rowid {
continue;
}
if (PATTERN_ID_OFFSET..COMMIT_ID_OFFSET).contains(&key) {
continue;
}
if (BELIEF_ID_OFFSET..FORGE_ID_OFFSET).contains(&key) && key != BELIEF_ID_OFFSET + rowid
{
continue;
}
if score < MIN_SCORE {
continue;
}
if key >= FORGE_ID_OFFSET {
forge_count += 1;
total_score += score;
total_count += 1;
} else if (COMMIT_ID_OFFSET..BELIEF_ID_OFFSET).contains(&key) {
commit_count += 1;
let commit_rowid = key - COMMIT_ID_OFFSET;
if let Ok(sha) = conn.query_row(
"SELECT sha FROM commits WHERE rowid = ?1",
[commit_rowid],
|row| row.get::<_, String>(0),
) {
commit_neighbors.push((sha, score));
}
total_score += score;
total_count += 1;
} else if key < CODE_ID_OFFSET {
session_count += 1;
total_score += score;
total_count += 1;
} else {
total_score += score;
total_count += 1;
}
}
let mut file_reach: std::collections::HashMap<String, (f32, Vec<String>)> =
std::collections::HashMap::new();
for (sha, score) in &commit_neighbors {
let mut file_stmt =
conn.prepare_cached("SELECT file_path FROM commit_files WHERE sha = ?1")?;
let files: Vec<String> = file_stmt
.query_map([sha], |row| row.get::<_, String>(0))?
.filter_map(|r| r.ok())
.collect();
for file_path in files {
if !is_source_code(&file_path) {
continue;
}
let entry = file_reach
.entry(file_path)
.or_insert_with(|| (0.0_f32, Vec::new()));
if *score > entry.0 {
entry.0 = *score;
}
entry.1.push(sha.clone());
}
}
if file_reach.is_empty() && !commit_neighbors.is_empty() {
let keywords = extract_belief_keywords(belief_id);
if !keywords.is_empty() {
let best_commit_score = commit_neighbors
.iter()
.map(|(_, s)| *s)
.fold(0.0_f32, f32::max);
let hop_tag = "lexical-fallback";
for keyword in &keywords {
let pattern = format!("%{}%", keyword);
let mut kw_stmt = conn.prepare_cached(
"SELECT DISTINCT file FROM function_facts WHERE file LIKE ?1",
)?;
let files: Vec<String> = kw_stmt
.query_map([&pattern], |row| row.get::<_, String>(0))?
.filter_map(|r| r.ok())
.filter(|f| is_source_code(f))
.collect();
for file_path in files {
let entry = file_reach
.entry(file_path)
.or_insert_with(|| (0.0_f32, Vec::new()));
if best_commit_score > entry.0 {
entry.0 = best_commit_score;
}
if !entry.1.contains(&hop_tag.to_string()) {
entry.1.push(hop_tag.to_string());
}
}
}
if !file_reach.is_empty() {
total_lexical_fallbacks += 1;
}
}
}
let source_file_count = file_reach.len() as i32;
for (file_path, (reach_score, shas)) in &file_reach {
let function_count: i32 = conn
.query_row(
"SELECT COUNT(*) FROM function_facts WHERE file = ?1",
[file_path],
|row| row.get(0),
)
.unwrap_or(0);
let hop_path = shas
.iter()
.map(|s| format!("commit:{}", &s[..7.min(s.len())]))
.collect::<Vec<_>>()
.join(",");
conn.execute(
"INSERT OR REPLACE INTO belief_code_reach (belief_id, file_path, reach_score, commit_count, function_count, hop_path)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
belief_id,
file_path,
reach_score,
shas.len() as i32,
function_count,
hop_path,
],
)?;
}
total_reach_files += file_reach.len() as u32;
total_source_files += source_file_count as u32;
let grounding_score = if total_count > 0 {
total_score / total_count as f32
} else {
0.0
};
conn.execute(
"UPDATE beliefs SET grounding_score = ?1, grounding_code_count = ?2, grounding_commit_count = ?3, grounding_session_count = ?4, grounding_forge_count = ?5 WHERE id = ?6",
rusqlite::params![grounding_score, source_file_count, commit_count, session_count, forge_count, belief_id],
)?;
if total_count > 0 {
grounded += 1;
}
}
let precision = if total_reach_files > 0 {
(total_source_files as f64 / total_reach_files as f64 * 100.0) as u32
} else {
0
};
println!(
" Computed grounding for {} beliefs ({} grounded, {} reach files, {} source, {}% precision, {} lexical fallbacks)",
total, grounded, total_reach_files, total_source_files, precision, total_lexical_fallbacks
);
Ok(())
}
fn insert_belief(conn: &Connection, belief: &ParsedBelief) -> Result<()> {
let event_type = "belief.surface";
let timestamp = belief
.revised
.as_deref()
.or(belief.extracted.as_deref())
.unwrap_or("2026-01-01");
conn.execute("DELETE FROM beliefs WHERE id = ?1", [&belief.id])?;
conn.execute("DELETE FROM belief_fts WHERE id = ?1", [&belief.id])?;
conn.execute(
"DELETE FROM eventlog WHERE source_id = ?1 AND event_type = 'belief.surface'",
[&belief.id],
)?;
let event_data = json!({
"statement": &belief.statement,
"persona": &belief.persona,
"facets": &belief.facets,
"confidence": belief.confidence,
"entrenchment": &belief.entrenchment,
"status": &belief.status,
"content": &belief.content,
"metrics": {
"use": {
"cited_by_beliefs": belief.metrics.cited_by_beliefs,
"cited_by_sessions": belief.metrics.cited_by_sessions,
"applied_in": belief.metrics.applied_in,
},
"truth": {
"evidence_count": belief.metrics.evidence_count,
"evidence_verified": belief.metrics.evidence_verified,
"defeated_attacks": belief.metrics.defeated_attacks,
"external_sources": belief.metrics.external_sources,
},
"endorsed": belief.metrics.endorsed,
"grounding": {
"score": belief.metrics.grounding_score,
"code": belief.metrics.grounding_code_count,
"commits": belief.metrics.grounding_commit_count,
"sessions": belief.metrics.grounding_session_count,
"forge": belief.metrics.grounding_forge_count,
},
},
});
database::insert_event(
conn,
event_type,
timestamp,
&belief.id,
Some(&belief.file_path),
&event_data.to_string(),
)?;
let facets_str = belief.facets.join(", ");
conn.execute(
"INSERT INTO beliefs (id, statement, persona, facets, confidence, entrenchment, status, extracted, revised, file_path,
cited_by_beliefs, cited_by_sessions, applied_in, evidence_count, evidence_verified, defeated_attacks, external_sources, endorsed,
verification_total, verification_passed, verification_failed, verification_errored,
grounding_score, grounding_code_count, grounding_commit_count, grounding_session_count, grounding_forge_count)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)",
rusqlite::params![
&belief.id,
&belief.statement,
&belief.persona,
&facets_str,
belief.confidence,
&belief.entrenchment,
&belief.status,
&belief.extracted,
&belief.revised,
&belief.file_path,
belief.metrics.cited_by_beliefs,
belief.metrics.cited_by_sessions,
belief.metrics.applied_in,
belief.metrics.evidence_count,
belief.metrics.evidence_verified,
belief.metrics.defeated_attacks,
belief.metrics.external_sources,
belief.metrics.endorsed as i32,
belief.verification.total,
belief.verification.passed,
belief.verification.failed,
belief.verification.errored,
belief.metrics.grounding_score,
belief.metrics.grounding_code_count,
belief.metrics.grounding_commit_count,
belief.metrics.grounding_session_count,
belief.metrics.grounding_forge_count,
],
)?;
conn.execute(
"INSERT INTO belief_fts (id, statement, facets, content)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![&belief.id, &belief.statement, &facets_str, &belief.content,],
)?;
Ok(())
}
pub fn run(full: bool) -> Result<ScrapeStats> {
let start = Instant::now();
let db_path = Path::new(database::PATINA_DB);
let beliefs_path = Path::new(BELIEFS_DIR);
if !beliefs_path.exists() {
println!(" No beliefs directory found ({})", BELIEFS_DIR);
return Ok(ScrapeStats {
items_processed: 0,
time_elapsed: start.elapsed(),
database_size_kb: 0,
});
}
let conn = database::initialize(db_path)?;
create_materialized_views(&conn)?;
let processed: std::collections::HashSet<String> = if full {
std::collections::HashSet::new()
} else {
let mut stmt = conn.prepare("SELECT id FROM beliefs")?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
rows.filter_map(|r| r.ok()).collect()
};
if full {
println!(" Full belief scrape...");
} else {
println!(
" Incremental belief scrape ({} already processed)...",
processed.len()
);
}
let mut belief_files = Vec::new();
if let Ok(entries) = std::fs::read_dir(beliefs_path) {
for entry in entries.filter_map(|e| e.ok()) {
let path = entry.path();
if path.extension().map(|ext| ext == "md").unwrap_or(false) {
belief_files.push(path);
}
}
}
belief_files.sort();
let mut processed_count = 0;
let mut skipped = 0;
let mut current_file_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut all_beliefs: Vec<ParsedBelief> = Vec::new();
for path in &belief_files {
match parse_belief_file(path) {
Ok(belief) => {
current_file_ids.insert(belief.id.clone());
all_beliefs.push(belief);
}
Err(e) => {
eprintln!(" Warning: failed to parse {}: {}", path.display(), e);
}
}
}
let project_root = Path::new(".");
cross_reference_beliefs(&mut all_beliefs, project_root);
let data_freshness = if full { "full" } else { "incremental" };
let mut verified_count = 0;
for belief in &mut all_beliefs {
if !belief.verification_queries.is_empty() {
let (_results, aggregates) = verification::run_verification_queries(
&conn,
&belief.id,
&belief.verification_queries,
data_freshness,
);
belief.verification = aggregates;
verified_count += 1;
}
}
if verified_count > 0 {
println!(" Ran verification queries for {} beliefs", verified_count);
for belief in &all_beliefs {
if !belief.verification_queries.is_empty() {
let _ = conn.execute(
"UPDATE beliefs SET verification_total = ?1, verification_passed = ?2, verification_failed = ?3, verification_errored = ?4 WHERE id = ?5",
rusqlite::params![
belief.verification.total,
belief.verification.passed,
belief.verification.failed,
belief.verification.errored,
belief.id,
],
);
}
}
}
for belief in &all_beliefs {
if !full && processed.contains(&belief.id) {
skipped += 1;
continue;
}
if let Err(e) = insert_belief(&conn, belief) {
eprintln!(" Warning: failed to insert belief {}: {}", belief.id, e);
} else {
processed_count += 1;
}
}
println!(
" Processed {} beliefs ({} skipped)",
processed_count, skipped
);
if let Err(e) = compute_belief_grounding(&conn) {
eprintln!(" Warning: grounding computation failed: {}", e);
}
let file_ids = current_file_ids;
let mut stmt = conn.prepare("SELECT id FROM beliefs")?;
let db_ids: Vec<String> = stmt
.query_map([], |row| row.get::<_, String>(0))?
.filter_map(|r| r.ok())
.collect();
let mut pruned = 0;
for db_id in &db_ids {
if !file_ids.contains(db_id) {
conn.execute("DELETE FROM beliefs WHERE id = ?1", [db_id])?;
conn.execute("DELETE FROM belief_fts WHERE id = ?1", [db_id])?;
conn.execute(
"DELETE FROM eventlog WHERE source_id = ?1 AND event_type = 'belief.surface'",
[db_id],
)?;
pruned += 1;
}
}
if pruned > 0 {
println!(" Pruned {} stale beliefs", pruned);
}
let elapsed = start.elapsed();
let db_size = std::fs::metadata(db_path)
.map(|m| m.len() / 1024)
.unwrap_or(0);
Ok(ScrapeStats {
items_processed: processed_count,
time_elapsed: elapsed,
database_size_kb: db_size,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_statement() {
let content = r#"---
type: belief
id: test-belief
---
# test-belief
This is the one-sentence statement.
## Statement
Expanded explanation here.
"#;
let statement = extract_statement(content, "test-belief");
assert_eq!(statement, "This is the one-sentence statement.");
}
#[test]
fn test_parse_belief_frontmatter() {
let content = r#"---
type: belief
id: sync-first
persona: architect
facets: [rust, architecture]
confidence:
score: 0.88
entrenchment: high
status: active
extracted: 2025-08-04
revised: 2026-01-16
---
# sync-first
Prefer synchronous code.
"#;
let temp_dir = tempfile::tempdir().unwrap();
let file_path = temp_dir.path().join("sync-first.md");
std::fs::write(&file_path, content).unwrap();
let belief = parse_belief_file(&file_path).unwrap();
assert_eq!(belief.id, "sync-first");
assert_eq!(belief.persona, "architect");
assert_eq!(belief.facets, vec!["rust", "architecture"]);
assert!((belief.confidence - 0.88).abs() < 0.01);
assert_eq!(belief.entrenchment, "high");
assert_eq!(belief.status, "active");
assert_eq!(belief.statement, "Prefer synchronous code.");
}
}