use std::collections::HashSet;
use std::io::Write;
use ruve::database::Database;
use ruve::storage::retrieve_record;
const LAYER_SPACING: f64 = 10.0;
const HARD_MAX_NODES: usize = 2_000;
const HTML: &str = r#"<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>RuVe HNSW – __KEY__</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { background: #080810; overflow: hidden; font-family: 'Courier New', monospace; color: #ddd; }
#info {
position: fixed; top: 12px; left: 12px;
background: rgba(0,0,0,.72); border: 1px solid #2a2a3a; border-radius: 6px;
padding: 10px 14px; font-size: 12px; line-height: 2; pointer-events: none;
}
#info b { color: #7af; font-size: 13px; }
#legend {
position: fixed; top: 12px; right: 12px;
background: rgba(0,0,0,.72); border: 1px solid #2a2a3a; border-radius: 6px;
padding: 10px 14px; font-size: 12px; min-width: 140px;
}
#legend b { color: #7af; }
#layer-list label { display: block; cursor: pointer; margin: 4px 0; }
#layer-list input { cursor: pointer; margin-right: 5px; }
.dot { display: inline-block; width: 9px; height: 9px; border-radius: 50%; margin-right: 5px; vertical-align: middle; }
#node-info {
display: none;
position: fixed; bottom: 16px; left: 16px;
background: rgba(0,0,0,.88); border: 1px solid #444; border-radius: 6px;
padding: 10px 14px; font-size: 12px; line-height: 1.9;
min-width: 220px; max-width: 360px;
}
#node-info b { color: #fa8; font-size: 13px; }
#node-info .id { color: #777; font-size: 10px; word-break: break-all; }
#node-info .txt { color: #dfd; margin-top: 5px; padding-top: 5px; border-top: 1px solid #333;
white-space: pre-wrap; word-break: break-word; line-height: 1.5; }
#node-info .hint { color: #555; font-size: 11px; margin-top: 6px; }
</style>
</head>
<body>
<div id="info">
<b>RuVe HNSW – __KEY__</b><br>
Nodes: __N_NODES____SAMPLED_NOTE__<br>
Edges: __TOTAL_EDGES__<br>
Layers: __N_LAYERS__<br>
<br><span style="color:#555">Drag · Scroll · Right-drag pan<br>Click node to inspect</span>
</div>
<div id="legend"><b>Layers</b><br><div id="layer-list"></div></div>
<div id="node-info"></div>
<script type="importmap">
{"imports":{"three":"https://cdn.jsdelivr.net/npm/three@0.160.0/build/three.module.js","three/addons/":"https://cdn.jsdelivr.net/npm/three@0.160.0/examples/jsm/"}}
</script>
<script type="module">
import * as THREE from 'three';
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
__DATA_JS__
// ── Scene setup ────────────────────────────────────────────────────────────
const renderer = new THREE.WebGLRenderer({ antialias: true });
renderer.setPixelRatio(devicePixelRatio);
renderer.setSize(innerWidth, innerHeight);
renderer.setClearColor(0x080810);
document.body.appendChild(renderer.domElement);
const scene = new THREE.Scene();
scene.fog = new THREE.FogExp2(0x080810, 0.007);
const LS = __LAYER_SPACING__;
const nLayers = DATA.highestLayer + 1;
const mid = DATA.highestLayer * LS * 0.5;
const camera = new THREE.PerspectiveCamera(60, innerWidth / innerHeight, 0.1, 1200);
camera.position.set(0, mid + 20, 70);
const controls = new OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.07;
controls.target.set(0, mid * 0.4, 0);
window.addEventListener('resize', () => {
camera.aspect = innerWidth / innerHeight;
camera.updateProjectionMatrix();
renderer.setSize(innerWidth, innerHeight);
});
// ── Colour helpers ─────────────────────────────────────────────────────────
function layerColor(l) {
const t = nLayers <= 1 ? 0 : l / (nLayers - 1);
return new THREE.Color().setHSL(0.65 - t * 0.65, 0.85, 0.55); // blue → red
}
function dimColor(c) {
return new THREE.Color(c.r * 0.12, c.g * 0.12, c.b * 0.12);
}
// ── Pre-computed lookups ───────────────────────────────────────────────────
// nodeToInsts[nodeRank] → [instanceIdx, ...] (one per layer the node exists in)
// nodeLayerToInst[nodeRank][l] → instanceIdx (or -1)
// adjacency[layer] → Map<nodeRank, Set<nodeRank>>
const nodeToInsts = DATA.nodes.map(() => []);
DATA.instances.forEach((inst, i) => nodeToInsts[inst.ni].push(i));
const nodeLayerToInst = DATA.nodes.map(() => new Array(nLayers).fill(-1));
DATA.instances.forEach((inst, i) => { nodeLayerToInst[inst.ni][inst.l] = i; });
const adjacency = DATA.edges.map(flat => {
const adj = new Map();
for (let i = 0; i < flat.length; i += 2) {
const a = flat[i], b = flat[i + 1];
if (!adj.has(a)) adj.set(a, new Set());
if (!adj.has(b)) adj.set(b, new Set());
adj.get(a).add(b);
adj.get(b).add(a);
}
return adj;
});
// ── Nodes — InstancedMesh (one instance per DATA.instances entry) ───────────
const dummy = new THREE.Object3D();
const iMesh = new THREE.InstancedMesh(
new THREE.SphereGeometry(__NODE_RADIUS__, __SPHERE_W__, __SPHERE_H__),
new THREE.MeshPhongMaterial(),
DATA.instances.length
);
iMesh.frustumCulled = false;
DATA.instances.forEach((inst, i) => {
const nd = DATA.nodes[inst.ni];
dummy.position.set(inst.x, inst.y, inst.z);
dummy.scale.setScalar(0.7 + inst.l / nLayers * 1.0);
dummy.updateMatrix();
iMesh.setMatrixAt(i, dummy.matrix);
iMesh.setColorAt(i, layerColor(inst.l));
});
iMesh.instanceMatrix.needsUpdate = true;
if (iMesh.instanceColor) iMesh.instanceColor.needsUpdate = true;
scene.add(iMesh);
// Store base colors and matrices so we can restore them on deselect
const baseColors = new Float32Array(DATA.instances.length * 3);
const baseMatrices = new Float32Array(DATA.instances.length * 16);
DATA.instances.forEach((inst, i) => {
layerColor(inst.l).toArray(baseColors, i * 3);
iMesh.getMatrixAt(i, dummy.matrix);
dummy.matrix.toArray(baseMatrices, i * 16);
});
// ── Lights ─────────────────────────────────────────────────────────────────
scene.add(new THREE.AmbientLight(0xffffff, 0.45));
const sun = new THREE.DirectionalLight(0xffffff, 1.1);
sun.position.set(30, 60, 20);
scene.add(sun);
// ── Edge lines per layer ────────────────────────────────────────────────────
const layerMeshes = DATA.edges.map((flat, layer) => {
const pos = [];
for (let i = 0; i < flat.length; i += 2) {
const ia = nodeLayerToInst[flat[i]][layer];
const ib = nodeLayerToInst[flat[i + 1]][layer];
if (ia === -1 || ib === -1) continue;
const a = DATA.instances[ia], b = DATA.instances[ib];
pos.push(a.x, a.y, a.z, b.x, b.y, b.z);
}
const geo = new THREE.BufferGeometry();
geo.setAttribute('position', new THREE.Float32BufferAttribute(pos, 3));
const lines = new THREE.LineSegments(geo, new THREE.LineBasicMaterial({
color: layerColor(layer),
opacity: layer === 0 ? __EDGE_OPACITY_0__ : __EDGE_OPACITY_N__,
transparent: true,
}));
scene.add(lines);
return lines;
});
// ── Layer planes ────────────────────────────────────────────────────────────
for (let l = 0; l <= DATA.highestLayer; l++) {
const m = new THREE.Mesh(
new THREE.CircleGeometry(26, 80),
new THREE.MeshBasicMaterial({ color: layerColor(l), opacity: 0.022, transparent: true, side: THREE.DoubleSide })
);
m.rotation.x = -Math.PI / 2;
m.position.y = l * LS;
scene.add(m);
}
// ── Selection highlight objects ─────────────────────────────────────────────
// Vertical connector — Line drawn through the selected node's layer stack
const connectorLine = new THREE.Line(
new THREE.BufferGeometry(),
new THREE.LineBasicMaterial({ color: 0xffffff, opacity: 0.9, transparent: true })
);
connectorLine.frustumCulled = false;
connectorLine.visible = false;
scene.add(connectorLine);
// Neighbour edges — LineSegments rebuilt on each selection
const neighborLines = new THREE.LineSegments(
new THREE.BufferGeometry(),
new THREE.LineBasicMaterial({ color: 0xff7733, opacity: 0.85, transparent: true })
);
neighborLines.frustumCulled = false;
neighborLines.visible = false;
scene.add(neighborLines);
// ── Selection state ─────────────────────────────────────────────────────────
let selectedRank = -1;
const nodeInfo = document.getElementById('node-info');
function resetAll() {
DATA.instances.forEach((_, i) => {
iMesh.setColorAt(i, new THREE.Color().fromArray(baseColors, i * 3));
dummy.matrix.fromArray(baseMatrices, i * 16);
iMesh.setMatrixAt(i, dummy.matrix);
});
iMesh.instanceColor.needsUpdate = true;
iMesh.instanceMatrix.needsUpdate = true;
}
function selectNode(rank) {
if (rank === selectedRank) {
selectedRank = -1;
resetAll();
connectorLine.visible = false;
neighborLines.visible = false;
nodeInfo.style.display = 'none';
return;
}
selectedRank = rank;
const nd = DATA.nodes[rank];
// Collect all neighbour node ranks (union across every layer this node lives in)
const neighborRanks = new Set();
for (let l = 0; l <= nd.ml; l++) {
const adj = adjacency[l].get(rank);
if (adj) for (const nb of adj) neighborRanks.add(nb);
}
// Recolour + resize instances
DATA.instances.forEach((inst, i) => {
if (inst.ni === rank) {
iMesh.setColorAt(i, new THREE.Color(1, 1, 1)); // selected: white
// Scale up across all layers
dummy.position.set(inst.x, inst.y, inst.z);
dummy.scale.setScalar(2.5);
dummy.updateMatrix();
iMesh.setMatrixAt(i, dummy.matrix);
} else {
iMesh.setColorAt(i, neighborRanks.has(inst.ni)
? new THREE.Color(1, 0.5, 0.1) // neighbour: orange
: dimColor(layerColor(inst.l))); // background: dimmed
dummy.matrix.fromArray(baseMatrices, i * 16);
iMesh.setMatrixAt(i, dummy.matrix);
}
});
iMesh.instanceColor.needsUpdate = true;
iMesh.instanceMatrix.needsUpdate = true;
// Vertical connector through all of this node's layer instances
const connPts = nodeToInsts[rank]
.slice()
.sort((a, b) => DATA.instances[a].l - DATA.instances[b].l)
.flatMap(i => { const v = DATA.instances[i]; return [v.x, v.y, v.z]; });
connectorLine.geometry.setAttribute(
'position', new THREE.Float32BufferAttribute(connPts, 3)
);
connectorLine.geometry.computeBoundingSphere();
connectorLine.visible = connPts.length >= 6; // need at least 2 points
// Neighbour connection lines (at each layer this node exists in)
const nbPts = [];
for (let l = 0; l <= nd.ml; l++) {
const selI = nodeLayerToInst[rank][l];
if (selI === -1) continue;
const sv = DATA.instances[selI];
const adj = adjacency[l].get(rank);
if (!adj) continue;
for (const nb of adj) {
const nbI = nodeLayerToInst[nb][l];
if (nbI === -1) continue;
const nv = DATA.instances[nbI];
nbPts.push(sv.x, sv.y, sv.z, nv.x, nv.y, nv.z);
}
}
neighborLines.geometry.setAttribute(
'position', new THREE.Float32BufferAttribute(nbPts, 3)
);
neighborLines.geometry.computeBoundingSphere();
neighborLines.visible = nbPts.length > 0;
// Info panel
let layerRows = '';
for (let l = 0; l <= nd.ml; l++) {
const cnt = adjacency[l].get(rank)?.size ?? 0;
const dot = `<span class="dot" style="background:#${layerColor(l).getHexString()}"></span>`;
layerRows += `${dot}Layer ${l}: ${cnt} neighbour${cnt !== 1 ? 's' : ''}<br>`;
}
const txtBlock = nd.txt
? `<div class="txt">${nd.txt.replace(/</g,'<')}</div>`
: '';
nodeInfo.innerHTML =
`<b>Node #${rank}</b>${nd.e ? ' ★ entry' : ''}<br>` +
`<span class="id">${nd.id}</span><br>` +
`Max layer: ${nd.ml}<br>` +
layerRows +
txtBlock +
`<span class="hint">Click again or click empty space to deselect</span>`;
nodeInfo.style.display = 'block';
}
// ── Raycasting ──────────────────────────────────────────────────────────────
const raycaster = new THREE.Raycaster();
raycaster.params.Points = { threshold: 0.3 };
const mouse = new THREE.Vector2();
renderer.domElement.addEventListener('click', e => {
mouse.x = (e.clientX / innerWidth) * 2 - 1;
mouse.y = -(e.clientY / innerHeight) * 2 + 1;
raycaster.setFromCamera(mouse, camera);
const hits = raycaster.intersectObject(iMesh);
if (hits.length > 0) {
selectNode(DATA.instances[hits[0].instanceId].ni);
} else if (selectedRank !== -1) {
// click empty space → deselect
selectedRank = -1;
resetAll();
connectorLine.visible = false;
neighborLines.visible = false;
nodeInfo.style.display = 'none';
}
});
renderer.domElement.addEventListener('mousemove', e => {
mouse.x = (e.clientX / innerWidth) * 2 - 1;
mouse.y = -(e.clientY / innerHeight) * 2 + 1;
raycaster.setFromCamera(mouse, camera);
const hits = raycaster.intersectObject(iMesh);
renderer.domElement.style.cursor = hits.length > 0 ? 'pointer' : '';
});
// ── Legend / layer toggles ──────────────────────────────────────────────────
const list = document.getElementById('layer-list');
for (let l = 0; l <= DATA.highestLayer; l++) {
const label = document.createElement('label');
const cb = document.createElement('input');
cb.type = 'checkbox'; cb.checked = true;
const idx = l;
cb.addEventListener('change', () => { layerMeshes[idx].visible = cb.checked; });
const dot = document.createElement('span');
dot.className = 'dot';
dot.style.background = '#' + layerColor(l).getHexString();
label.append(cb, dot, 'Layer ' + l + (l === DATA.highestLayer ? ' ★' : ''));
list.appendChild(label);
}
// ── Render loop ─────────────────────────────────────────────────────────────
(function animate() {
requestAnimationFrame(animate);
controls.update();
renderer.render(scene, camera);
})();
</script>
</body>
</html>"#;
fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();
if args.iter().any(|a| a == "--help" || a == "-h") {
println!("Usage: visualize [scenario|path] [--out file.html]");
println!(" scenario xxs | xs | small | medium | large | highdim (default: xxs)");
println!(" path directory of a CLI database, e.g. ./data");
println!(" --out output path (default: hnsw_graph.html)");
return;
}
let target = args.iter()
.find(|a| !a.starts_with('-'))
.map(|s| s.as_str())
.unwrap_or("xxs");
let out_path = flag_value(&args, "--out").unwrap_or("hnsw_graph.html");
const SCENARIO_KEYS: &[&str] = &["xxs", "xs", "small", "medium", "large", "highdim"];
let (dir, key) = if SCENARIO_KEYS.contains(&target) {
(format!("/tmp/ruve_bench_{target}"), target.to_string())
} else {
let label = std::path::Path::new(target)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(target)
.to_string();
(target.to_string(), label)
};
eprint!("Loading '{key}'...");
std::io::stderr().flush().ok();
let bm25_path = pick(&dir, &["bm25.json", "bm25_index.json"]);
let graph_path = pick(&dir, &["graph.bin", "hnsw_graph.bin"]);
let db = Database::new(
&format!("{dir}/data.bin"),
&format!("{dir}/index.json"),
&bm25_path,
&format!("{dir}/hnsw.json"),
&graph_path,
);
eprintln!(" done");
let n = db.hnsw.node_offsets.len();
if n == 0 {
eprintln!("No nodes in '{key}'. Run first:");
eprintln!(" cargo run --release --bin benchmark -- {key}");
std::process::exit(1);
}
let highest = db.hnsw.highest_layer;
eprintln!("{n} nodes · {} layers", highest + 1);
let graph_size = std::fs::metadata(&graph_path)
.expect("graph.bin not found")
.len();
let slot = 4 + db.hnsw.max_neighbors_per_document * 4;
let node_max_layer: Vec<usize> = (0..n).map(|i| {
let start = db.hnsw.node_offsets[i];
let end = if i + 1 < n { db.hnsw.node_offsets[i + 1] } else { graph_size };
let num_slots = ((end - start) as usize) / slot;
num_slots.saturating_sub(1)
}).collect();
let sampled: Vec<usize> = if n <= HARD_MAX_NODES {
(0..n).collect()
} else {
eprintln!("Graph has {n} nodes; capping display at {HARD_MAX_NODES} (use xxs/xs for full view)");
let mut s: Vec<usize> = (0..n)
.filter(|&i| node_max_layer[i] > 0)
.collect();
if let Some(ep) = db.hnsw.entry_point {
let ep = ep as usize;
if !s.contains(&ep) { s.push(ep); }
}
let remaining = HARD_MAX_NODES.saturating_sub(s.len());
if remaining > 0 {
let layer0: Vec<usize> = (0..n)
.filter(|i| node_max_layer[*i] == 0)
.collect();
let step = (layer0.len() / remaining).max(1);
s.extend(layer0.into_iter().step_by(step).take(remaining));
}
s.sort_unstable();
s.dedup();
s.truncate(HARD_MAX_NODES);
s
};
let sampled_set: HashSet<usize> = sampled.iter().cloned().collect();
let n_sampled = sampled.len();
let base_radius = match n_sampled {
0..=50 => 8.0,
51..=150 => 13.0,
151..=500 => 18.0,
_ => 24.0,
};
let golden_angle = std::f64::consts::PI * (3.0 - 5.0_f64.sqrt());
let xz: Vec<(f64, f64)> = (0..n_sampled).map(|rank| {
let r = (rank as f64 / n_sampled as f64).sqrt() * base_radius;
let theta = rank as f64 * golden_angle;
(r * theta.cos(), r * theta.sin())
}).collect();
let node_to_rank: std::collections::HashMap<usize, usize> =
sampled.iter().enumerate().map(|(r, &idx)| (idx, r)).collect();
let nodes_js: String = sampled.iter().zip(xz.iter()).map(|(&idx, &(bx, bz))| {
let ml = node_max_layer[idx];
let ep = db.hnsw.entry_point.map_or(false, |ep| ep as usize == idx);
let uid = &db.hnsw.index_to_id[idx];
let (rec_id, txt) = if let Some(&offset) = db.index.get(uid) {
let rec = retrieve_record(offset, &db.data_path);
let t = rec.metadata.unwrap_or_default();
(rec.id, t)
} else {
(uid.clone(), String::new())
};
let txt_escaped = txt.replace('\\', "\\\\").replace('"', "\\\"");
let id_escaped = rec_id.replace('"', "\\\"");
format!("{{\"ml\":{ml},\"e\":{ep},\"bx\":{bx:.3},\"bz\":{bz:.3},\"id\":\"{id_escaped}\",\"txt\":\"{txt_escaped}\"}}")
}).collect::<Vec<_>>().join(",");
let mut instances_json = Vec::new();
for (rank, &idx) in sampled.iter().enumerate() {
let ml = node_max_layer[idx];
let (bx, bz) = xz[rank];
for layer in 0..=ml {
let y = layer as f64 * LAYER_SPACING;
instances_json.push(format!(
"{{\"ni\":{rank},\"l\":{layer},\"x\":{bx:.3},\"y\":{y:.3},\"z\":{bz:.3}}}"
));
}
}
let instances_js = instances_json.join(",");
eprint!("Reading edges...");
std::io::stderr().flush().ok();
let mut layer_edges: Vec<Vec<u32>> = vec![Vec::new(); highest + 1];
let mut total_edges = 0usize;
for &from in &sampled {
let max_l = node_max_layer[from].min(highest);
for layer in 0..=max_l {
for to in db.hnsw.get_neighbors(from as u32, layer) {
let to = to as usize;
if from < to && sampled_set.contains(&to) {
let rf = node_to_rank[&from] as u32;
let rt = node_to_rank[&to] as u32;
layer_edges[layer].push(rf);
layer_edges[layer].push(rt);
total_edges += 1;
}
}
}
}
eprintln!(" {total_edges} edges");
let edges_js: String = layer_edges.iter().map(|flat| {
format!("[{}]", flat.iter().map(|v| v.to_string()).collect::<Vec<_>>().join(","))
}).collect::<Vec<_>>().join(",");
let data_js = format!(
"const DATA={{nodes:[{nodes_js}],instances:[{instances_js}],edges:[{edges_js}],\
highestLayer:{highest},nNodes:{n},nSampled:{n_sampled}}};"
);
let sampled_note = if n_sampled < n {
format!(" (showing {n_sampled})")
} else {
String::new()
};
let (node_radius, sphere_w, sphere_h, edge_op0, edge_opn) = match n_sampled {
0..=50 => (0.70, 14, 10, 0.65, 0.95),
51..=150 => (0.45, 12, 8, 0.45, 0.85),
151..=500 => (0.30, 10, 7, 0.28, 0.70),
_ => (0.22, 8, 6, 0.13, 0.55),
};
let html = HTML
.replace("__KEY__", &key)
.replace("__N_NODES__", &n.to_string())
.replace("__SAMPLED_NOTE__", &sampled_note)
.replace("__TOTAL_EDGES__", &total_edges.to_string())
.replace("__N_LAYERS__", &(highest + 1).to_string())
.replace("__DATA_JS__", &data_js)
.replace("__LAYER_SPACING__", &LAYER_SPACING.to_string())
.replace("__NODE_RADIUS__", &node_radius.to_string())
.replace("__SPHERE_W__", &sphere_w.to_string())
.replace("__SPHERE_H__", &sphere_h.to_string())
.replace("__EDGE_OPACITY_0__", &edge_op0.to_string())
.replace("__EDGE_OPACITY_N__", &edge_opn.to_string());
std::fs::write(out_path, html).expect("failed to write HTML");
println!("Written: {out_path}");
#[cfg(target_os = "macos")]
{ let _ = std::process::Command::new("open").arg(out_path).spawn(); }
#[cfg(not(target_os = "macos"))]
println!("Open: file://{}", std::fs::canonicalize(out_path).unwrap().display());
}
fn flag_value<'a>(args: &'a [String], flag: &str) -> Option<&'a str> {
args.iter().position(|a| a == flag)
.and_then(|i| args.get(i + 1))
.map(|s| s.as_str())
}
fn pick(dir: &str, candidates: &[&str]) -> String {
candidates.iter()
.map(|name| format!("{dir}/{name}"))
.find(|path| std::path::Path::new(path).exists())
.unwrap_or_else(|| format!("{dir}/{}", candidates[0]))
}