use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
use crate::index::hasher;
const CORE_PACKAGE: &str = "gobby-core";
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct Crate {
pub name: String,
pub path: String,
pub is_binary: bool,
pub is_lib: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
pub struct Edge {
pub from: String,
pub to: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
pub enum ServiceKind {
Postgres,
Falkor,
Qdrant,
EmbeddingApi,
Daemon,
GhookInbox,
TreeSitter,
DocumentToolchain,
MediaToolchain,
}
impl ServiceKind {
pub(crate) fn kind_slug(self) -> &'static str {
match self {
Self::Postgres => "postgres",
Self::Falkor => "falkor",
Self::Qdrant => "qdrant",
Self::EmbeddingApi => "embedding_api",
Self::Daemon => "daemon",
Self::GhookInbox => "ghook_inbox",
Self::TreeSitter => "tree_sitter",
Self::DocumentToolchain => "document_toolchain",
Self::MediaToolchain => "media_toolchain",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct ServiceBoundary {
pub name: String,
pub kind: ServiceKind,
pub pulled_in_by: Vec<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
pub enum RuntimeMode {
Standalone,
DaemonAttached,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct SystemModel {
pub crates: Vec<Crate>,
pub edges: Vec<Edge>,
pub services: Vec<ServiceBoundary>,
pub runtime_modes: Vec<RuntimeMode>,
pub features_by_crate: BTreeMap<String, Vec<String>>,
pub notes: Vec<String>,
}
impl SystemModel {
pub(crate) fn digest(&self) -> String {
let encoded = serde_json::to_vec(self).unwrap_or_default();
hasher::content_hash(&encoded)
}
}
pub fn build_system_model(repo_root: &Path) -> SystemModel {
let mut notes = Vec::new();
let members = workspace_members(repo_root, &mut notes);
let mut crates: Vec<Crate> = Vec::new();
let mut manifests: Vec<(String, toml::Value)> = Vec::new();
for member in &members {
let manifest_path = repo_root.join(member).join("Cargo.toml");
let raw = match std::fs::read_to_string(&manifest_path) {
Ok(raw) => raw,
Err(err) => {
notes.push(format!(
"skipped member `{member}`: cannot read {}: {err}",
manifest_path.display()
));
continue;
}
};
let manifest: toml::Value = match toml::from_str::<toml::Value>(&raw) {
Ok(value) => value,
Err(err) => {
notes.push(format!(
"skipped member `{member}`: malformed {}: {err}",
manifest_path.display()
));
continue;
}
};
let Some(name) = package_name(&manifest) else {
notes.push(format!(
"skipped member `{member}`: manifest has no [package].name"
));
continue;
};
let crate_dir = repo_root.join(member);
let is_binary =
has_table_array(&manifest, "bin") || crate_dir.join("src/main.rs").is_file();
let is_lib = manifest.get("lib").is_some() || crate_dir.join("src/lib.rs").is_file();
crates.push(Crate {
name: name.clone(),
path: member.clone(),
is_binary,
is_lib,
});
manifests.push((name, manifest));
}
let member_names: BTreeSet<String> = crates.iter().map(|c| c.name.clone()).collect();
let mut edges: Vec<Edge> = Vec::new();
let mut features_by_crate: BTreeMap<String, Vec<String>> = BTreeMap::new();
let mut core_features: BTreeMap<String, Vec<String>> = BTreeMap::new();
let mut dep_names_by_crate: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
let mut feature_keys_by_crate: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
for (name, manifest) in &manifests {
for (dep_name, dep_value) in dependency_entries(manifest) {
if member_names.contains(&dep_name) && dep_name != *name {
edges.push(Edge {
from: name.clone(),
to: dep_name.clone(),
});
}
if dep_name == CORE_PACKAGE {
let feats = dependency_features(&dep_value);
features_by_crate.insert(name.clone(), feats.clone());
core_features.insert(name.clone(), feats);
}
dep_names_by_crate
.entry(name.clone())
.or_default()
.insert(dep_name);
}
let feature_keys = feature_table_keys(manifest);
if !feature_keys.is_empty() {
feature_keys_by_crate.insert(name.clone(), feature_keys);
}
}
edges.sort();
edges.dedup();
crates.sort_by(|a, b| a.name.cmp(&b.name));
let services = service_boundaries(
&core_features,
&dep_names_by_crate,
&feature_keys_by_crate,
&crates,
repo_root,
);
SystemModel {
crates,
edges,
services,
runtime_modes: vec![RuntimeMode::Standalone, RuntimeMode::DaemonAttached],
features_by_crate,
notes,
}
}
fn workspace_members(repo_root: &Path, notes: &mut Vec<String>) -> Vec<String> {
let root_manifest = repo_root.join("Cargo.toml");
let raw = match std::fs::read_to_string(&root_manifest) {
Ok(raw) => raw,
Err(err) => {
notes.push(format!(
"cannot read workspace manifest {}: {err}",
root_manifest.display()
));
return Vec::new();
}
};
let value: toml::Value = match toml::from_str::<toml::Value>(&raw) {
Ok(value) => value,
Err(err) => {
notes.push(format!(
"malformed workspace manifest {}: {err}",
root_manifest.display()
));
return Vec::new();
}
};
let members = value
.get("workspace")
.and_then(|w| w.get("members"))
.and_then(|m| m.as_array());
let Some(members) = members else {
notes.push("workspace manifest has no [workspace].members array".to_string());
return Vec::new();
};
members
.iter()
.filter_map(|m| m.as_str().map(str::to_string))
.collect()
}
fn package_name(manifest: &toml::Value) -> Option<String> {
manifest
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(str::to_string)
}
fn has_table_array(manifest: &toml::Value, key: &str) -> bool {
manifest
.get(key)
.and_then(|v| v.as_array())
.is_some_and(|arr| !arr.is_empty())
}
fn dependency_entries(manifest: &toml::Value) -> Vec<(String, toml::Value)> {
const TABLES: [&str; 3] = ["dependencies", "dev-dependencies", "build-dependencies"];
let mut out = Vec::new();
for table in TABLES {
if let Some(deps) = manifest.get(table).and_then(|t| t.as_table()) {
for (name, value) in deps {
out.push((name.clone(), value.clone()));
}
}
}
out
}
fn dependency_features(dep_value: &toml::Value) -> Vec<String> {
let mut feats: Vec<String> = dep_value
.as_table()
.and_then(|t| t.get("features"))
.and_then(|f| f.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(str::to_string))
.collect()
})
.unwrap_or_default();
feats.sort();
feats.dedup();
feats
}
fn feature_table_keys(manifest: &toml::Value) -> BTreeSet<String> {
manifest
.get("features")
.and_then(|f| f.as_table())
.map(|table| table.keys().cloned().collect())
.unwrap_or_default()
}
fn feature_services(feature: &str) -> &'static [ServiceKind] {
match feature {
"postgres" => &[ServiceKind::Postgres],
"falkor" => &[ServiceKind::Falkor],
"qdrant" => &[ServiceKind::Qdrant],
"ai" => &[ServiceKind::EmbeddingApi, ServiceKind::Daemon],
_ => &[],
}
}
fn service_name(kind: ServiceKind) -> &'static str {
match kind {
ServiceKind::Postgres => "PostgreSQL hub",
ServiceKind::Falkor => "FalkorDB graph",
ServiceKind::Qdrant => "Qdrant vectors",
ServiceKind::EmbeddingApi => "Embedding API",
ServiceKind::Daemon => "Gobby daemon",
ServiceKind::GhookInbox => "ghook inbox",
ServiceKind::TreeSitter => "tree-sitter grammars",
ServiceKind::DocumentToolchain => "Document toolchain (PDF/Office)",
ServiceKind::MediaToolchain => "Media toolchain (ffmpeg)",
}
}
fn service_boundaries(
core_features: &BTreeMap<String, Vec<String>>,
dep_names_by_crate: &BTreeMap<String, BTreeSet<String>>,
feature_keys_by_crate: &BTreeMap<String, BTreeSet<String>>,
crates: &[Crate],
repo_root: &Path,
) -> Vec<ServiceBoundary> {
let mut by_kind: BTreeMap<ServiceKind, BTreeSet<String>> = BTreeMap::new();
for (crate_name, feats) in core_features {
for feat in feats {
for kind in feature_services(feat) {
by_kind
.entry(*kind)
.or_default()
.insert(format!("{crate_name} (feature: {feat})"));
}
}
}
if let Some(ghook_owner) = crates
.iter()
.find(|c| c.is_binary && !c.is_lib && c.path == "crates/ghook")
{
by_kind
.entry(ServiceKind::GhookInbox)
.or_default()
.insert(format!("{} (always)", ghook_owner.name));
}
if repo_root.join("crates/gcore/Cargo.toml").is_file() {
by_kind
.entry(ServiceKind::Daemon)
.or_default()
.insert("workspace (gobby_core::daemon_url, always)".to_string());
}
for (kind, provenance) in
toolchain_boundaries(dep_names_by_crate, feature_keys_by_crate, crates, repo_root)
{
by_kind.entry(kind).or_default().insert(provenance);
}
let mut services: Vec<ServiceBoundary> = by_kind
.into_iter()
.map(|(kind, provenance)| ServiceBoundary {
name: service_name(kind).to_string(),
kind,
pulled_in_by: provenance.into_iter().collect(),
})
.collect();
services.sort_by(|a, b| (a.kind, &a.name).cmp(&(b.kind, &b.name)));
services
}
fn toolchain_boundaries(
dep_names_by_crate: &BTreeMap<String, BTreeSet<String>>,
feature_keys_by_crate: &BTreeMap<String, BTreeSet<String>>,
crates: &[Crate],
repo_root: &Path,
) -> Vec<(ServiceKind, String)> {
const PDF_DEPS: [&str; 3] = ["pdf-extract", "pdfium-render", "pdfium-auto"];
let mut out: Vec<(ServiceKind, String)> = Vec::new();
for (crate_name, deps) in dep_names_by_crate {
if deps.contains("tree-sitter") {
let grammar_count = deps
.iter()
.filter(|dep| dep.starts_with("tree-sitter-"))
.count();
out.push((
ServiceKind::TreeSitter,
format!("{crate_name} (deps: tree-sitter + {grammar_count} grammars)"),
));
}
let has_documents_feature = feature_keys_by_crate
.get(crate_name)
.is_some_and(|keys| keys.contains("documents"));
let pdf_deps = PDF_DEPS
.iter()
.filter(|dep| deps.contains(**dep))
.copied()
.collect::<Vec<_>>();
if has_documents_feature {
out.push((
ServiceKind::DocumentToolchain,
format!("{crate_name} (feature: documents)"),
));
} else if !pdf_deps.is_empty() {
out.push((
ServiceKind::DocumentToolchain,
format!("{crate_name} (deps: {})", pdf_deps.join(", ")),
));
}
}
if let Some(gwiki) = crates.iter().find(|c| c.path == "crates/gwiki")
&& repo_root.join(&gwiki.path).join("src/media.rs").is_file()
{
out.push((
ServiceKind::MediaToolchain,
format!("{} (src/media.rs, ffmpeg via PATH)", gwiki.name),
));
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::PathBuf;
fn fixture_workspace(members: &[(&str, &str)]) -> (tempfile::TempDir, PathBuf) {
let dir = tempfile::tempdir().expect("create temp dir");
let root = dir.path().to_path_buf();
let member_list = members
.iter()
.map(|(path, _)| format!("\"{path}\""))
.collect::<Vec<_>>()
.join(", ");
fs::write(
root.join("Cargo.toml"),
format!("[workspace]\nmembers = [{member_list}]\nresolver = \"3\"\n"),
)
.expect("write root manifest");
for (path, manifest) in members {
let crate_dir = root.join(path);
fs::create_dir_all(crate_dir.join("src")).expect("create crate dir");
fs::write(crate_dir.join("Cargo.toml"), manifest).expect("write member manifest");
}
(dir, root)
}
fn crate_named<'a>(model: &'a SystemModel, name: &str) -> &'a Crate {
model
.crates
.iter()
.find(|c| c.name == name)
.unwrap_or_else(|| panic!("crate `{name}` missing from model"))
}
#[test]
fn extracts_crates_internal_edges_and_target_shape() {
let lib_manifest = "[package]\nname = \"my-core\"\nversion = \"0.1.0\"\n\n[lib]\nname = \"my_core\"\npath = \"src/lib.rs\"\n";
let bin_manifest = "[package]\nname = \"my-app\"\nversion = \"0.1.0\"\n\n[[bin]]\nname = \"app\"\npath = \"src/main.rs\"\n\n[dependencies]\nmy-core = { path = \"../core\" }\nserde = \"1\"\n";
let (_dir, root) =
fixture_workspace(&[("crates/core", lib_manifest), ("crates/app", bin_manifest)]);
let model = build_system_model(&root);
assert!(
model.notes.is_empty(),
"unexpected notes: {:?}",
model.notes
);
assert_eq!(model.crates.len(), 2);
let app = crate_named(&model, "my-app");
assert!(app.is_binary);
assert!(!app.is_lib);
assert_eq!(app.path, "crates/app");
let core = crate_named(&model, "my-core");
assert!(!core.is_binary);
assert!(core.is_lib);
assert_eq!(
model.edges,
vec![Edge {
from: "my-app".to_string(),
to: "my-core".to_string(),
}]
);
assert!(model.runtime_modes.contains(&RuntimeMode::Standalone));
assert!(model.runtime_modes.contains(&RuntimeMode::DaemonAttached));
}
#[test]
fn maps_core_features_to_service_boundaries() {
let core_manifest = "[package]\nname = \"gobby-core\"\nversion = \"0.5.0\"\n\n[lib]\nname = \"gobby_core\"\npath = \"src/lib.rs\"\n\n[features]\npostgres = []\nqdrant = []\nfalkor = []\nai = []\n";
let consumer_manifest = "[package]\nname = \"gobby-code\"\nversion = \"1.0.0\"\n\n[[bin]]\nname = \"gcode\"\npath = \"src/main.rs\"\n\n[dependencies]\ngobby-core = { path = \"../gcore\", features = [\"postgres\", \"qdrant\"] }\n";
let (_dir, root) = fixture_workspace(&[
("crates/gcore", core_manifest),
("crates/gcode", consumer_manifest),
]);
let model = build_system_model(&root);
assert!(
model.notes.is_empty(),
"unexpected notes: {:?}",
model.notes
);
assert_eq!(
model.features_by_crate.get("gobby-code"),
Some(&vec!["postgres".to_string(), "qdrant".to_string()])
);
let pg = model
.services
.iter()
.find(|s| s.kind == ServiceKind::Postgres)
.expect("Postgres boundary present");
assert_eq!(
pg.pulled_in_by,
vec!["gobby-code (feature: postgres)".to_string()]
);
let qd = model
.services
.iter()
.find(|s| s.kind == ServiceKind::Qdrant)
.expect("Qdrant boundary present");
assert_eq!(
qd.pulled_in_by,
vec!["gobby-code (feature: qdrant)".to_string()]
);
assert!(
!model
.services
.iter()
.any(|s| s.kind == ServiceKind::EmbeddingApi),
"EmbeddingApi must not appear without the ai feature"
);
assert!(model.services.iter().any(|s| s.kind == ServiceKind::Daemon));
assert!(
model
.services
.iter()
.all(|s| s.kind != ServiceKind::GhookInbox)
);
}
#[test]
fn ghook_binary_member_yields_inbox_boundary() {
let core_manifest = "[package]\nname = \"gobby-core\"\nversion = \"0.5.0\"\n\n[lib]\npath = \"src/lib.rs\"\n";
let ghook_manifest = "[package]\nname = \"gobby-hooks\"\nversion = \"0.5.0\"\n\n[[bin]]\nname = \"ghook\"\npath = \"src/main.rs\"\n";
let (_dir, root) = fixture_workspace(&[
("crates/gcore", core_manifest),
("crates/ghook", ghook_manifest),
]);
let model = build_system_model(&root);
let inbox = model
.services
.iter()
.find(|service| service.kind == ServiceKind::GhookInbox)
.expect("ghook inbox boundary present");
assert_eq!(inbox.pulled_in_by, vec!["gobby-hooks (always)".to_string()]);
}
#[test]
fn ai_feature_pulls_in_embedding_api_and_daemon() {
let core_manifest = "[package]\nname = \"gobby-core\"\nversion = \"0.5.0\"\n\n[lib]\nname = \"gobby_core\"\npath = \"src/lib.rs\"\n\n[features]\nai = []\n";
let consumer_manifest = "[package]\nname = \"gobby-wiki\"\nversion = \"0.5.0\"\n\n[[bin]]\nname = \"gwiki\"\npath = \"src/main.rs\"\n\n[dependencies]\ngobby-core = { path = \"../gcore\", features = [\"ai\"] }\n";
let (_dir, root) = fixture_workspace(&[
("crates/gcore", core_manifest),
("crates/gwiki", consumer_manifest),
]);
let model = build_system_model(&root);
let embed = model
.services
.iter()
.find(|s| s.kind == ServiceKind::EmbeddingApi)
.expect("EmbeddingApi boundary present");
assert_eq!(
embed.pulled_in_by,
vec!["gobby-wiki (feature: ai)".to_string()]
);
let daemon = model
.services
.iter()
.find(|s| s.kind == ServiceKind::Daemon)
.expect("Daemon boundary present");
assert!(
daemon
.pulled_in_by
.contains(&"gobby-wiki (feature: ai)".to_string())
);
assert!(daemon.pulled_in_by.iter().any(|p| p.contains("daemon_url")));
}
#[test]
fn degrades_to_partial_model_on_missing_and_malformed_manifests() {
let good_manifest = "[package]\nname = \"good-crate\"\nversion = \"0.1.0\"\n\n[lib]\npath = \"src/lib.rs\"\n";
let (_dir, root) = fixture_workspace(&[("crates/good", good_manifest)]);
fs::write(
root.join("Cargo.toml"),
"[workspace]\nmembers = [\"crates/good\", \"crates/missing\", \"crates/broken\"]\n",
)
.expect("rewrite root manifest");
fs::create_dir_all(root.join("crates/broken")).expect("create broken dir");
fs::write(
root.join("crates/broken/Cargo.toml"),
"this is not = valid toml [[[",
)
.expect("write broken manifest");
let model = build_system_model(&root);
assert_eq!(model.crates.len(), 1);
assert_eq!(model.crates[0].name, "good-crate");
assert_eq!(model.notes.len(), 2, "notes: {:?}", model.notes);
assert!(model.notes.iter().any(|n| n.contains("crates/missing")));
assert!(model.notes.iter().any(|n| n.contains("crates/broken")));
assert_eq!(
model.runtime_modes,
vec![RuntimeMode::Standalone, RuntimeMode::DaemonAttached]
);
}
#[test]
fn missing_workspace_manifest_yields_empty_partial_model() {
let dir = tempfile::tempdir().expect("temp dir");
let model = build_system_model(dir.path());
assert!(model.crates.is_empty());
assert!(model.edges.is_empty());
assert_eq!(model.notes.len(), 1);
assert!(model.notes[0].contains("cannot read workspace manifest"));
assert_eq!(
model.runtime_modes,
vec![RuntimeMode::Standalone, RuntimeMode::DaemonAttached]
);
}
#[test]
fn tree_sitter_dep_yields_tree_sitter_boundary_with_grammar_count() {
let manifest = "[package]\nname = \"parser-crate\"\nversion = \"0.1.0\"\n\n[lib]\npath = \"src/lib.rs\"\n\n[dependencies]\ntree-sitter = \"0.25\"\ntree-sitter-rust = \"0.24\"\ntree-sitter-python = \"0.25\"\nserde = \"1\"\n";
let (_dir, root) = fixture_workspace(&[("crates/parser", manifest)]);
let model = build_system_model(&root);
assert!(
model.notes.is_empty(),
"unexpected notes: {:?}",
model.notes
);
let ts = model
.services
.iter()
.find(|s| s.kind == ServiceKind::TreeSitter)
.expect("TreeSitter boundary present");
assert_eq!(ts.name, "tree-sitter grammars");
assert_eq!(
ts.pulled_in_by,
vec!["parser-crate (deps: tree-sitter + 2 grammars)".to_string()]
);
}
#[test]
fn documents_feature_yields_document_toolchain_boundary() {
let manifest = "[package]\nname = \"vault-crate\"\nversion = \"0.1.0\"\n\n[lib]\npath = \"src/lib.rs\"\n\n[features]\ndefault = [\"documents\"]\ndocuments = [\"dep:pdf-extract\"]\n\n[dependencies]\npdf-extract = { version = \"0.10\", optional = true }\n";
let (_dir, root) = fixture_workspace(&[("crates/vault", manifest)]);
let model = build_system_model(&root);
assert!(
model.notes.is_empty(),
"unexpected notes: {:?}",
model.notes
);
let docs = model
.services
.iter()
.find(|s| s.kind == ServiceKind::DocumentToolchain)
.expect("DocumentToolchain boundary present");
assert_eq!(docs.name, "Document toolchain (PDF/Office)");
assert_eq!(
docs.pulled_in_by,
vec!["vault-crate (feature: documents)".to_string()]
);
}
#[test]
fn pdf_dep_yields_dependency_based_document_toolchain_boundary() {
let manifest = "[package]\nname = \"pdf-crate\"\nversion = \"0.1.0\"\n\n[lib]\npath = \"src/lib.rs\"\n\n[dependencies]\npdf-extract = \"0.10\"\n";
let (_dir, root) = fixture_workspace(&[("crates/pdf", manifest)]);
let model = build_system_model(&root);
let docs = model
.services
.iter()
.find(|s| s.kind == ServiceKind::DocumentToolchain)
.expect("DocumentToolchain boundary present");
assert_eq!(
docs.pulled_in_by,
vec!["pdf-crate (deps: pdf-extract)".to_string()]
);
}
#[test]
fn workspace_without_toolchains_omits_those_boundaries() {
let manifest = "[package]\nname = \"plain-crate\"\nversion = \"0.1.0\"\n\n[lib]\npath = \"src/lib.rs\"\n\n[dependencies]\nserde = \"1\"\n";
let (_dir, root) = fixture_workspace(&[("crates/plain", manifest)]);
let model = build_system_model(&root);
assert!(
model.notes.is_empty(),
"unexpected notes: {:?}",
model.notes
);
assert!(
!model
.services
.iter()
.any(|s| s.kind == ServiceKind::TreeSitter),
"TreeSitter must be omitted when no tree-sitter dep exists"
);
assert!(
!model
.services
.iter()
.any(|s| s.kind == ServiceKind::DocumentToolchain),
"DocumentToolchain must be omitted with no documents feature / pdf dep"
);
assert!(
!model
.services
.iter()
.any(|s| s.kind == ServiceKind::MediaToolchain),
"MediaToolchain must be omitted without a crates/gwiki member"
);
}
}