#![cfg(feature = "semantic")]
#![allow(clippy::unwrap_used, clippy::expect_used)]
mod common;
use std::path::Path;
use predicates::prelude::*;
use crate::common::{fixture_git_repo_with_files, repograph_cmd};
fn register(config_dir: &Path, repo: &Path, name: &str) {
repograph_cmd(config_dir)
.arg("add")
.arg(repo)
.arg("--name")
.arg(name)
.assert()
.success();
}
fn json_hit_paths(envelope: &serde_json::Value) -> Vec<&str> {
envelope["hits"]
.as_array()
.unwrap()
.iter()
.map(|h| h["path"].as_str().unwrap())
.collect()
}
#[test]
#[ignore = "downloads ~127MB embedding model; run with --features semantic -- --ignored"]
fn index_semantic_writes_embeddings_and_reports_it() {
let tmp = tempfile::TempDir::new().unwrap();
let config_dir = tmp.path().join("config");
let repo = fixture_git_repo_with_files(
tmp.path(),
"svc",
&[("retry.rs", "pub fn retry_with_backoff() {}\n")],
);
register(&config_dir, &repo, "svc");
repograph_cmd(&config_dir)
.arg("index")
.arg("--semantic")
.assert()
.success()
.stderr(predicates::str::contains("with embeddings"))
.stderr(predicates::str::contains("note:").not());
}
#[test]
#[ignore = "downloads ~127MB embedding model; run with --features semantic -- --ignored"]
fn find_semantic_ranks_by_meaning_and_marks_json_envelope() {
let tmp = tempfile::TempDir::new().unwrap();
let config_dir = tmp.path().join("config");
let repo = fixture_git_repo_with_files(
tmp.path(),
"svc",
&[
(
"resilience.rs",
"/// Retry an operation with exponential backoff between attempts.\n\
pub fn retry_with_backoff() {}\n",
),
(
"color.rs",
"/// Convert an RGB triple to a hexadecimal color string.\n\
pub fn rgb_to_hex() {}\n",
),
],
);
register(&config_dir, &repo, "svc");
repograph_cmd(&config_dir)
.arg("index")
.arg("--semantic")
.assert()
.success();
let lexical = repograph_cmd(&config_dir)
.arg("find")
.arg("pause and try the request again after a transient failure")
.arg("--json")
.assert()
.success();
let lv: serde_json::Value = serde_json::from_slice(&lexical.get_output().stdout).unwrap();
let lexical_paths = json_hit_paths(&lv);
assert!(
!lexical_paths.contains(&"resilience.rs"),
"BM25 alone must miss the zero-overlap paraphrase; it returned: {lexical_paths:?}"
);
let out = repograph_cmd(&config_dir)
.arg("find")
.arg("pause and try the request again after a transient failure")
.arg("--semantic")
.arg("--json")
.assert()
.success();
let v: serde_json::Value = serde_json::from_slice(&out.get_output().stdout).unwrap();
assert_eq!(v["schema_version"], 2);
assert_eq!(
v["semantic_used"], true,
"semantic retrieval contributed to the ranking"
);
assert!(
v["degraded"].is_null(),
"model present and embeddings written — nothing degraded"
);
let paths = json_hit_paths(&v);
assert!(
paths.contains(&"resilience.rs"),
"semantic retrieval surfaces the meaning-matched file BM25 alone misses: {paths:?}"
);
}