use anyhow::Result;
use clap::{CommandFactory, Parser, Subcommand};
use ed25519_dalek::SigningKey;
use faculties::schemas::gauge::{WIKI_BRANCH_NAME, wiki};
use hifitime::Epoch;
use rand_core::OsRng;
use std::collections::HashMap;
use std::path::PathBuf;
use triblespace::core::metadata;
use triblespace::core::repo::Workspace;
use triblespace::prelude::*;
type Repo = Repository<Pile<valueschemas::Blake3>>;
type Lower = i128;
#[derive(Parser)]
#[command(
name = "gauge",
about = "Research quality gauge — reads wiki tag metadata"
)]
struct Cli {
#[arg(long, env = "PILE")]
pile: PathBuf,
#[arg(long)]
branch_id: Option<String>,
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
Health,
Tags,
Quality,
Hubs {
#[arg(short, long, default_value = "15")]
top: usize,
},
Risk,
Drift,
Orphans {
#[arg(short, long, default_value = "20")]
top: usize,
#[arg(long)]
ids: bool,
},
}
fn main() -> Result<()> {
let cli = Cli::parse();
if cli.command.is_none() {
Cli::command().print_help()?;
return Ok(());
}
let mut pile = Pile::open(&cli.pile)?;
pile.restore()?;
let mut repo = Repository::new(pile, SigningKey::generate(&mut OsRng), TribleSet::new())?;
let bid = if let Some(hex_str) = &cli.branch_id {
let raw = hex::decode(hex_str)?;
Id::new(
raw.try_into()
.map_err(|_| anyhow::anyhow!("bad branch id"))?,
)
.ok_or_else(|| anyhow::anyhow!("nil branch id"))?
} else {
repo.ensure_branch(WIKI_BRANCH_NAME, None)
.map_err(|e| anyhow::anyhow!("ensure wiki branch: {e:?}"))?
};
let mut ws = repo.pull(bid).map_err(|e| anyhow::anyhow!("pull: {e:?}"))?;
let space = ws
.checkout(..)
.map_err(|e| anyhow::anyhow!("checkout: {e:?}"))?;
match cli.command.unwrap() {
Commands::Health => cmd_health(&space, &mut ws),
Commands::Tags => cmd_tags(&space, &mut ws),
Commands::Quality => cmd_quality(&space, &mut ws),
Commands::Hubs { top } => cmd_hubs(&space, &mut ws, top),
Commands::Risk => cmd_risk(&space, &mut ws),
Commands::Drift => cmd_drift(&space, &mut ws),
Commands::Orphans { top, ids } => cmd_orphans(&space, &mut ws, top, ids),
}
}
fn latest_versions(space: &TribleSet) -> HashMap<Id, (Id, Lower)> {
let mut best: HashMap<Id, (Id, Lower)> = HashMap::new();
for (vid, frag, (lower, _upper)) in find!(
(vid: Id, frag: Id, ts: (Epoch, Epoch)),
pattern!(space, [{
?vid @
wiki::fragment: ?frag,
metadata::created_at: ?ts,
}])
) {
let ts = lower.to_tai_duration().total_nanoseconds();
best.entry(frag)
.and_modify(|(old_vid, old_ts)| {
if ts > *old_ts {
*old_vid = vid;
*old_ts = ts;
}
})
.or_insert((vid, ts));
}
best
}
fn tags_of(space: &TribleSet, vid: Id) -> Vec<Id> {
find!(
tag: Id,
pattern!(space, [{ &vid @ metadata::tag: ?tag }])
)
.collect()
}
fn tag_name(
space: &TribleSet,
ws: &mut Workspace<Pile<valueschemas::Blake3>>,
tag_id: Id,
) -> String {
let results: Vec<_> = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ &tag_id @ metadata::name: ?h }])
)
.collect();
if let Some(handle) = results.into_iter().next() {
if let Ok(view) = ws.get::<View<str>, _>(handle) {
let s: &str = view.as_ref();
return s.to_string();
}
}
format!("{:?}", tag_id)
}
fn cmd_health(space: &TribleSet, ws: &mut Workspace<Pile<valueschemas::Blake3>>) -> Result<()> {
let latest = latest_versions(space);
let total = latest.len();
let mut tag_counts: HashMap<String, usize> = HashMap::new();
let mut orphan_count = 0usize;
let mut link_count = 0usize;
for (_frag, (vid, _ts)) in &latest {
let tags = tags_of(space, *vid);
for tag_id in &tags {
let name = tag_name(space, ws, *tag_id);
*tag_counts.entry(name).or_insert(0) += 1;
}
let links: Vec<Id> = find!(
target: Id,
pattern!(space, [{ vid @ wiki::links_to: ?target }])
)
.collect();
if links.is_empty() {
orphan_count += 1;
}
link_count += links.len();
}
let published = tag_counts.get("published").copied().unwrap_or(0);
let refuted = tag_counts.get("refuted").copied().unwrap_or(0);
let preprint = tag_counts.get("preprint").copied().unwrap_or(0);
let hypothesis = tag_counts.get("hypothesis").copied().unwrap_or(0);
let evidence = tag_counts.get("evidence").copied().unwrap_or(0);
let review = tag_counts.get("review").copied().unwrap_or(0);
let synthesis = tag_counts.get("synthesis").copied().unwrap_or(0);
let prediction = tag_counts.get("prediction").copied().unwrap_or(0);
let finding = tag_counts.get("finding").copied().unwrap_or(0);
let audit_warning = tag_counts.get("audit-warning").copied().unwrap_or(0);
println!("=== GAUGE: Research Health ===");
println!();
println!("Versions: {total}");
println!(
"Links: {link_count} ({:.1} per version)",
link_count as f64 / total as f64
);
println!(
"Orphans: {orphan_count} ({:.0}%)",
100.0 * orphan_count as f64 / total as f64
);
println!();
println!("--- Epistemic Status ---");
println!(" Published: {published:>4}");
println!(" Refuted: {refuted:>4}");
println!(" Preprint: {preprint:>4}");
println!(" Audit-warning: {audit_warning:>4}");
println!();
println!("--- Content Type ---");
println!(" Synthesis: {synthesis:>4}");
println!(" Hypothesis: {hypothesis:>4}");
println!(" Evidence: {evidence:>4}");
println!(" Finding: {finding:>4}");
println!(" Review: {review:>4}");
println!(" Prediction: {prediction:>4}");
println!();
println!("--- Ratios ---");
if published + refuted > 0 {
println!(
" Survival rate: {:.0}% ({published} published / {} tested)",
100.0 * published as f64 / (published + refuted) as f64,
published + refuted
);
}
if synthesis > 0 {
println!(
" Theory grounding: {:.1}% ({published} published / {synthesis} synthesis)",
100.0 * published as f64 / synthesis as f64
);
}
if hypothesis > 0 {
let tested = evidence + finding;
println!(
" Hypothesis coverage: {tested} evidence+findings / {hypothesis} hypotheses ({:.0}%)",
100.0 * tested as f64 / hypothesis as f64
);
}
if prediction > 0 {
println!(" Predictions: {prediction} made ({refuted} refuted, track outcomes!)");
}
if review > 0 {
println!(
" Review density: {:.1} reviews per published finding",
review as f64 / published.max(1) as f64
);
}
println!();
Ok(())
}
fn cmd_tags(space: &TribleSet, ws: &mut Workspace<Pile<valueschemas::Blake3>>) -> Result<()> {
let latest = latest_versions(space);
let mut tag_counts: HashMap<String, usize> = HashMap::new();
for (_frag, (vid, _ts)) in &latest {
let tags = tags_of(space, *vid);
for tag_id in &tags {
let name = tag_name(space, ws, *tag_id);
*tag_counts.entry(name).or_insert(0) += 1;
}
}
let mut sorted: Vec<_> = tag_counts.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
println!("=== GAUGE: Tag Counts ===");
println!();
for (name, count) in sorted {
println!(" {name:<25} {count:>4}");
}
println!();
Ok(())
}
fn cmd_quality(space: &TribleSet, ws: &mut Workspace<Pile<valueschemas::Blake3>>) -> Result<()> {
let latest = latest_versions(space);
let mut published_frags = Vec::new();
let mut refuted_frags = Vec::new();
for (_frag, (vid, _ts)) in &latest {
let tags = tags_of(space, *vid);
let tag_names: Vec<String> = tags.iter().map(|t| tag_name(space, ws, *t)).collect();
let title: String = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ vid @ wiki::title: ?h }])
)
.next()
.and_then(|h| ws.get::<View<str>, _>(h).ok())
.map(|v| {
let s: &str = v.as_ref();
s.to_string()
})
.unwrap_or_else(|| "untitled".to_string());
let short_title: String = title.chars().take(60).collect();
if tag_names.iter().any(|t| t == "published") {
published_frags.push(short_title.clone());
}
if tag_names.iter().any(|t| t == "refuted") {
refuted_frags.push(short_title);
}
}
println!("=== GAUGE: Quality Assessment ===");
println!();
println!("PUBLISHED ({}):", published_frags.len());
for t in &published_frags {
println!(" + {t}");
}
println!();
println!("REFUTED ({}):", refuted_frags.len());
for t in &refuted_frags {
println!(" - {t}");
}
println!();
if !published_frags.is_empty() || !refuted_frags.is_empty() {
let total = published_frags.len() + refuted_frags.len();
println!(
"Survival: {}/{} ({:.0}%)",
published_frags.len(),
total,
100.0 * published_frags.len() as f64 / total as f64
);
}
println!();
Ok(())
}
fn cmd_hubs(
space: &TribleSet,
ws: &mut Workspace<Pile<valueschemas::Blake3>>,
top: usize,
) -> Result<()> {
let latest = latest_versions(space);
let vid_to_frag: HashMap<Id, Id> = latest
.iter()
.map(|(frag, (vid, _))| (*vid, *frag))
.collect();
let frag_to_vid: HashMap<Id, Id> = latest
.iter()
.map(|(frag, (vid, _))| (*frag, *vid))
.collect();
let mut incoming: HashMap<Id, usize> = HashMap::new();
for (_frag, (vid, _ts)) in &latest {
let targets: Vec<Id> = find!(
target: Id,
pattern!(space, [{ vid @ wiki::links_to: ?target }])
)
.collect();
for target in targets {
let canonical = vid_to_frag.get(&target).copied().unwrap_or(target);
*incoming.entry(canonical).or_insert(0) += 1;
}
}
let mut sorted: Vec<_> = incoming.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
println!("=== GAUGE: Knowledge Hubs (most-linked fragments) ===");
println!();
for (id, count) in sorted.into_iter().take(top) {
let lookup_vid = frag_to_vid.get(&id).copied().unwrap_or(id);
let title: String = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ &lookup_vid @ wiki::title: ?h }])
)
.next()
.and_then(|h| ws.get::<View<str>, _>(h).ok())
.map(|v| {
let s: &str = v.as_ref();
s.to_string()
})
.unwrap_or_else(|| format!("(unknown {:X?})", &id[..4]));
let short: String = title.chars().take(65).collect();
println!(" {count:>3} links <- {short}");
}
println!();
Ok(())
}
fn cmd_risk(space: &TribleSet, ws: &mut Workspace<Pile<valueschemas::Blake3>>) -> Result<()> {
let latest = latest_versions(space);
let mut flagged: HashMap<Id, (String, Vec<String>)> = HashMap::new(); for (frag, (vid, _ts)) in &latest {
let tags = tags_of(space, *vid);
let tag_names: Vec<String> = tags.iter().map(|t| tag_name(space, ws, *t)).collect();
if tag_names
.iter()
.any(|t| t == "refuted" || t == "audit-warning")
{
let title: String = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ vid @ wiki::title: ?h }])
)
.next()
.and_then(|h| ws.get::<View<str>, _>(h).ok())
.map(|v| {
let s: &str = v.as_ref();
s.to_string()
})
.unwrap_or_else(|| "untitled".to_string());
let risk_tags: Vec<String> = tag_names
.into_iter()
.filter(|t| t == "refuted" || t == "audit-warning")
.collect();
flagged.insert(*frag, (title, risk_tags));
}
}
if flagged.is_empty() {
println!("No audit-warned or refuted fragments found.");
return Ok(());
}
let vid_to_frag: HashMap<Id, Id> = latest
.iter()
.map(|(frag, (vid, _))| (*vid, *frag))
.collect();
println!("=== GAUGE: Risk Scan — Fragments Citing Flagged Sources ===");
println!();
println!("Flagged sources ({}):", flagged.len());
for (frag, (title, tags)) in &flagged {
let short: String = title.chars().take(55).collect();
println!(" [{tags}] {short}", tags = tags.join(", "));
}
println!();
let mut contaminated: Vec<(String, Vec<String>)> = Vec::new(); for (frag, (vid, _ts)) in &latest {
if flagged.contains_key(frag) {
continue;
}
let targets: Vec<Id> = find!(
target: Id,
pattern!(space, [{ vid @ wiki::links_to: ?target }])
)
.collect();
let mut cited_flagged: Vec<String> = Vec::new();
for target in &targets {
let canonical = vid_to_frag.get(target).copied().unwrap_or(*target);
if let Some((flagged_title, _)) = flagged.get(&canonical) {
let short: String = flagged_title.chars().take(30).collect();
cited_flagged.push(short);
}
}
if !cited_flagged.is_empty() {
let title: String = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ vid @ wiki::title: ?h }])
)
.next()
.and_then(|h| ws.get::<View<str>, _>(h).ok())
.map(|v| {
let s: &str = v.as_ref();
s.to_string()
})
.unwrap_or_else(|| "untitled".to_string());
contaminated.push((title, cited_flagged));
}
}
contaminated.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
println!(
"Potentially contaminated fragments ({}):",
contaminated.len()
);
for (title, sources) in &contaminated {
let short: String = title.chars().take(55).collect();
println!(" {short}");
for src in sources {
println!(" cites -> {src}");
}
}
println!();
Ok(())
}
fn cmd_orphans(
space: &TribleSet,
ws: &mut Workspace<Pile<valueschemas::Blake3>>,
top: usize,
ids_only: bool,
) -> Result<()> {
let latest = latest_versions(space);
let mut orphans: Vec<(Id, String, Vec<String>)> = Vec::new();
for (frag, (vid, _ts)) in &latest {
let links: Vec<Id> = find!(
target: Id,
pattern!(space, [{ vid @ wiki::links_to: ?target }])
)
.collect();
if links.is_empty() {
let title: String = find!(
h: Value<valueschemas::Handle<valueschemas::Blake3, blobschemas::LongString>>,
pattern!(space, [{ vid @ wiki::title: ?h }])
)
.next()
.and_then(|h| ws.get::<View<str>, _>(h).ok())
.map(|v| {
let s: &str = v.as_ref();
s.to_string()
})
.unwrap_or_else(|| "untitled".to_string());
let tags = tags_of(space, *vid);
let tag_names: Vec<String> = tags.iter().map(|t| tag_name(space, ws, *t)).collect();
orphans.push((*frag, title, tag_names));
}
}
orphans.sort_by(|a, b| a.1.cmp(&b.1));
if ids_only {
for (frag, _, _) in orphans.iter().take(top) {
println!("{}", hex::encode(**frag));
}
return Ok(());
}
println!("=== GAUGE: Orphan Fragments (no outgoing links) ===");
println!();
println!(
"Total orphans: {} / {} ({:.0}%)",
orphans.len(),
latest.len(),
100.0 * orphans.len() as f64 / latest.len() as f64
);
println!();
for (_, title, tags) in orphans.iter().take(top) {
let short: String = title.chars().take(60).collect();
let tag_str: String = tags
.iter()
.filter(|t| *t != "version" && *t != "typst" && *t != "markdown")
.take(3)
.cloned()
.collect::<Vec<_>>()
.join(", ");
println!(" {short}");
if !tag_str.is_empty() {
println!(" [{tag_str}]");
}
}
if orphans.len() > top {
println!(" ... and {} more", orphans.len() - top);
}
println!();
Ok(())
}
fn cmd_drift(space: &TribleSet, ws: &mut Workspace<Pile<valueschemas::Blake3>>) -> Result<()> {
let latest = latest_versions(space);
let mut buckets: std::collections::BTreeMap<String, HashMap<String, usize>> =
std::collections::BTreeMap::new();
for (_frag, (vid, ts_ns)) in &latest {
let epoch =
Epoch::from_tai_duration(hifitime::Duration::from_parts(0, (*ts_ns).max(0) as u64));
let (year, month, _, _, _, _, _) = epoch.to_gregorian_utc();
if year > 2030 || year < 2020 {
continue;
}
let bucket = format!("{year:04}-{month:02}");
let tags = tags_of(space, *vid);
let tag_names: Vec<String> = tags.iter().map(|t| tag_name(space, ws, *t)).collect();
let entry = buckets.entry(bucket).or_insert_with(HashMap::new);
*entry.entry("total".to_string()).or_insert(0) += 1;
for name in &tag_names {
match name.as_str() {
"published" | "refuted" | "preprint" | "hypothesis" | "evidence" | "review"
| "synthesis" | "finding" | "prediction" | "audit-warning" | "experiment" => {
*entry.entry(name.clone()).or_insert(0) += 1;
}
_ => {}
}
}
}
println!("=== GAUGE: Research Drift Over Time ===");
println!();
println!(
"{:<10} {:>5} {:>5} {:>5} {:>5} {:>5} {:>5} {:>5} {:>5}",
"Month", "Total", "Synth", "Evid", "Hypo", "Rev", "Pub", "Ref", "Pred"
);
println!("{}", "-".repeat(75));
for (month, counts) in &buckets {
let total = counts.get("total").copied().unwrap_or(0);
let synth = counts.get("synthesis").copied().unwrap_or(0);
let evid = counts.get("evidence").copied().unwrap_or(0);
let hypo = counts.get("hypothesis").copied().unwrap_or(0);
let rev = counts.get("review").copied().unwrap_or(0);
let publ = counts.get("published").copied().unwrap_or(0);
let refut = counts.get("refuted").copied().unwrap_or(0);
let pred = counts.get("prediction").copied().unwrap_or(0);
println!("{month:<10} {total:>5} {synth:>5} {evid:>5} {hypo:>5} {rev:>5} {publ:>5} {refut:>5} {pred:>5}");
}
println!();
Ok(())
}