use std::path::Path;
pub const MAX_SUMMARY_CHARS: usize = 3000;
pub const MAX_DISPLAY_SUMMARY_CHARS: usize = 500;
const README_SLICE_CHARS: usize = 2000;
pub fn scrape_metadata_summary(root_path: &Path) -> Option<String> {
let mut parts: Vec<String> = Vec::new();
if let Some(s) = read_readme_head(root_path) {
parts.push(format!("README: {s}"));
}
if let Some(s) = read_full_text(root_path, "CLAUDE.md") {
parts.push(format!("CLAUDE.md: {s}"));
}
if let Some(s) = read_full_text(root_path, "AGENTS.md") {
parts.push(format!("AGENTS.md: {s}"));
}
if let Some(s) = read_cargo_toml(root_path) {
parts.push(format!("Cargo: {s}"));
}
if let Some(s) = read_package_json(root_path) {
parts.push(format!("package.json: {s}"));
}
if let Some(s) = read_pyproject_toml(root_path) {
parts.push(format!("pyproject: {s}"));
}
if let Some(s) = read_go_mod(root_path) {
parts.push(format!("go.mod: {s}"));
}
if let Some(s) = read_trusty_yaml_description(root_path) {
parts.push(format!("trusty-search.yaml: {s}"));
}
if parts.is_empty() {
return None;
}
let mut joined = parts.join("\n\n");
if joined.chars().count() > MAX_SUMMARY_CHARS {
joined = truncate_chars(&joined, MAX_SUMMARY_CHARS);
}
Some(joined)
}
pub fn make_display_summary(summary: &str) -> String {
truncate_chars(summary, MAX_DISPLAY_SUMMARY_CHARS)
}
fn truncate_chars(s: &str, max: usize) -> String {
if s.chars().count() <= max {
return s.to_string();
}
s.chars().take(max).collect()
}
fn read_readme_head(root: &Path) -> Option<String> {
let p = root.join("README.md");
let text = std::fs::read_to_string(&p).ok()?;
let head = truncate_chars(text.trim(), README_SLICE_CHARS);
if head.is_empty() {
None
} else {
Some(head)
}
}
fn read_full_text(root: &Path, file_name: &str) -> Option<String> {
let p = root.join(file_name);
let text = std::fs::read_to_string(&p).ok()?;
let trimmed = text.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
fn read_cargo_toml(root: &Path) -> Option<String> {
let text = std::fs::read_to_string(root.join("Cargo.toml")).ok()?;
let value: toml::Value = toml::from_str(&text).ok()?;
let pkg = value.get("package")?.as_table()?;
let name = pkg.get("name").and_then(|v| v.as_str());
let description = pkg.get("description").and_then(|v| v.as_str());
join_optional(&[name, description])
}
fn read_package_json(root: &Path) -> Option<String> {
let text = std::fs::read_to_string(root.join("package.json")).ok()?;
let value: serde_json::Value = serde_json::from_str(&text).ok()?;
let name = value.get("name").and_then(|v| v.as_str());
let description = value.get("description").and_then(|v| v.as_str());
let keywords = value.get("keywords").and_then(|v| v.as_array()).map(|arr| {
arr.iter()
.filter_map(|v| v.as_str())
.collect::<Vec<&str>>()
.join(", ")
});
let keywords_str = keywords.as_deref();
join_optional(&[name, description, keywords_str])
}
fn read_pyproject_toml(root: &Path) -> Option<String> {
let text = std::fs::read_to_string(root.join("pyproject.toml")).ok()?;
let value: toml::Value = toml::from_str(&text).ok()?;
let (name, description) = if let Some(project) = value.get("project").and_then(|v| v.as_table())
{
(
project.get("name").and_then(|v| v.as_str()),
project.get("description").and_then(|v| v.as_str()),
)
} else if let Some(poetry) = value
.get("tool")
.and_then(|v| v.as_table())
.and_then(|t| t.get("poetry"))
.and_then(|v| v.as_table())
{
(
poetry.get("name").and_then(|v| v.as_str()),
poetry.get("description").and_then(|v| v.as_str()),
)
} else {
(None, None)
};
join_optional(&[name, description])
}
fn read_go_mod(root: &Path) -> Option<String> {
let text = std::fs::read_to_string(root.join("go.mod")).ok()?;
for line in text.lines() {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix("module ") {
let module = rest.trim();
if !module.is_empty() {
return Some(module.to_string());
}
}
}
None
}
fn read_trusty_yaml_description(root: &Path) -> Option<String> {
for name in &[".trusty-search.yaml", "trusty-search.yaml"] {
let p = root.join(name);
let Ok(text) = std::fs::read_to_string(&p) else {
continue;
};
for line in text.lines() {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix("description:") {
let val = rest.trim().trim_matches('"').trim_matches('\'').trim();
if !val.is_empty() {
return Some(val.to_string());
}
}
}
}
None
}
fn join_optional(parts: &[Option<&str>]) -> Option<String> {
let collected: Vec<&str> = parts
.iter()
.filter_map(|p| p.map(|s| s.trim()).filter(|s| !s.is_empty()))
.collect();
if collected.is_empty() {
None
} else {
Some(collected.join(" — "))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn scrape_returns_none_when_no_metadata() {
let tmp = tempdir().expect("tempdir");
assert!(scrape_metadata_summary(tmp.path()).is_none());
}
#[test]
fn scrape_reads_readme_head() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("README.md"),
"# trusty-search\n\nBlazingly fast hybrid code search service.\n",
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("README:"));
assert!(summary.contains("trusty-search"));
assert!(summary.contains("hybrid code search"));
}
#[test]
fn scrape_reads_cargo_toml_package() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("Cargo.toml"),
r#"[package]
name = "trusty-search"
description = "Hybrid code search"
version = "0.1.0"
"#,
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("Cargo:"));
assert!(summary.contains("trusty-search"));
assert!(summary.contains("Hybrid code search"));
assert!(!summary.contains("0.1.0"));
}
#[test]
fn scrape_reads_package_json() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("package.json"),
r#"{
"name": "my-app",
"description": "Frontend app",
"keywords": ["react", "ssr"],
"version": "1.0.0"
}"#,
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("my-app"));
assert!(summary.contains("Frontend app"));
assert!(summary.contains("react"));
assert!(summary.contains("ssr"));
}
#[test]
fn scrape_reads_pyproject_project_table() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("pyproject.toml"),
r#"[project]
name = "myproj"
description = "Python data tools"
"#,
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("myproj"));
assert!(summary.contains("Python data tools"));
}
#[test]
fn scrape_reads_pyproject_poetry_table() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("pyproject.toml"),
r#"[tool.poetry]
name = "legacy-poetry"
description = "Old style"
"#,
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("legacy-poetry"));
assert!(summary.contains("Old style"));
}
#[test]
fn scrape_reads_go_mod_module_line() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("go.mod"),
"module github.com/example/proj\n\ngo 1.21\n",
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("github.com/example/proj"));
}
#[test]
fn scrape_reads_trusty_yaml_description() {
let tmp = tempdir().expect("tempdir");
fs::write(
tmp.path().join("trusty-search.yaml"),
"description: A multi-index polyrepo\nname: foo\n",
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.contains("A multi-index polyrepo"));
}
#[test]
fn scrape_truncates_to_max_summary_chars() {
let tmp = tempdir().expect("tempdir");
let big = "x".repeat(8000);
fs::write(tmp.path().join("README.md"), &big).unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
assert!(summary.chars().count() <= MAX_SUMMARY_CHARS);
}
#[test]
fn scrape_combines_multiple_sources_in_priority_order() {
let tmp = tempdir().expect("tempdir");
fs::write(tmp.path().join("README.md"), "Pitch line.").unwrap();
fs::write(tmp.path().join("CLAUDE.md"), "AI brief.").unwrap();
fs::write(
tmp.path().join("Cargo.toml"),
"[package]\nname = \"p\"\ndescription = \"d\"\n",
)
.unwrap();
let summary = scrape_metadata_summary(tmp.path()).expect("summary");
let readme_idx = summary.find("README:").unwrap();
let claude_idx = summary.find("CLAUDE.md:").unwrap();
let cargo_idx = summary.find("Cargo:").unwrap();
assert!(readme_idx < claude_idx);
assert!(claude_idx < cargo_idx);
}
#[test]
fn display_summary_truncates() {
let full = "a".repeat(2000);
let short = make_display_summary(&full);
assert!(short.chars().count() <= MAX_DISPLAY_SUMMARY_CHARS);
}
#[test]
fn empty_readme_does_not_contribute() {
let tmp = tempdir().expect("tempdir");
fs::write(tmp.path().join("README.md"), " \n\n").unwrap();
assert!(scrape_metadata_summary(tmp.path()).is_none());
}
#[test]
fn cosine_helper_contract() {
use crate::core::mmr::cosine_similarity;
assert!((cosine_similarity(&[1.0, 0.0], &[1.0, 0.0]) - 1.0).abs() < 1e-6);
assert!((cosine_similarity(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-6);
assert_eq!(cosine_similarity(&[1.0, 0.0], &[1.0, 0.0, 0.0]), 0.0);
}
}