use crate::core::graph::Symbol;
use crate::languages::stopwords_for_language;
pub fn build_embedding_text(
symbol: &Symbol,
callers: &[String],
callees: &[String],
importance: f64,
) -> String {
let mut parts = Vec::new();
parts.push(format!("{} {}", symbol.kind, symbol.name));
let split_name = split_camel_case(&symbol.name);
if split_name != symbol.name {
parts.push(split_name);
}
let snake_split = split_snake_case(&symbol.name);
if snake_split != symbol.name {
parts.push(snake_split);
}
if let Some(sig) = &symbol.signature {
parts.push(sig.clone());
}
if let Some(doc) = &symbol.docstring {
parts.push(doc.clone());
}
if let Some(parent_id) = &symbol.parent_id {
if let Some(parent_name) = extract_name_from_id(parent_id) {
parts.push(format!("in {parent_name}"));
let split_parent = split_camel_case(parent_name);
if split_parent != parent_name {
parts.push(format!("in {split_parent}"));
}
}
}
let path_parts: Vec<&str> = symbol
.file_path
.split('/')
.filter(|p| *p != "src" && !p.contains('.')) .collect();
if !path_parts.is_empty() {
parts.push(format!("path {}", path_parts.join(" ")));
}
if !callers.is_empty() {
parts.push(format!("called-by {}", callers.join(" ")));
}
if !callees.is_empty() {
parts.push(format!("calls {}", callees.join(" ")));
}
let stopwords = stopwords_for_language(&symbol.language);
let is_generic = stopwords.contains(&symbol.name.as_str());
if !is_generic {
if importance > 0.7 {
parts.push("importance high core".to_string());
} else if importance > 0.3 {
parts.push("importance medium".to_string());
}
}
parts.join(" | ")
}
pub(crate) fn split_camel_case(name: &str) -> String {
let mut result = String::with_capacity(name.len() + 4);
let chars: Vec<char> = name.chars().collect();
for (i, &ch) in chars.iter().enumerate() {
if i > 0 && ch.is_uppercase() {
let prev_lower = chars[i - 1].is_lowercase();
let next_lower = chars.get(i + 1).is_some_and(|c| c.is_lowercase());
if prev_lower || (next_lower && chars[i - 1].is_uppercase()) {
result.push(' ');
}
}
result.push(ch);
}
result
}
pub(crate) fn split_snake_case(name: &str) -> String {
name.replace('_', " ")
}
fn extract_name_from_id(id: &str) -> Option<&str> {
let parts: Vec<&str> = id.split("::").collect();
if parts.len() >= 2 {
Some(parts[parts.len() - 2])
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::graph::Symbol;
fn make_symbol(name: &str, kind: &str) -> Symbol {
Symbol {
id: format!("test.ts::{name}::{kind}"),
name: name.to_string(),
kind: kind.to_string(),
file_path: "test.ts".to_string(),
line_start: 1,
line_end: 10,
signature: None,
docstring: None,
parent_id: None,
language: "typescript".to_string(),
metadata: "{}".to_string(),
}
}
#[test]
fn test_basic_embedding_text() {
let sym = make_symbol("PaymentService", "class");
assert_eq!(
build_embedding_text(&sym, &[], &[], 0.0),
"class PaymentService | Payment Service"
);
}
#[test]
fn test_basic_embedding_text_no_split_needed() {
let sym = make_symbol("login", "function");
assert_eq!(build_embedding_text(&sym, &[], &[], 0.0), "function login");
}
#[test]
fn test_embedding_text_with_signature() {
let mut sym = make_symbol("processPayment", "method");
sym.signature = Some("(amount: number) => boolean".to_string());
assert_eq!(
build_embedding_text(&sym, &[], &[], 0.0),
"method processPayment | process Payment | (amount: number) => boolean"
);
}
#[test]
fn test_embedding_text_with_docstring() {
let mut sym = make_symbol("login", "function");
sym.docstring = Some("Authenticates a user".to_string());
assert_eq!(
build_embedding_text(&sym, &[], &[], 0.0),
"function login | Authenticates a user"
);
}
#[test]
fn test_embedding_text_with_parent() {
let mut sym = make_symbol("processPayment", "method");
sym.parent_id = Some("src/pay.ts::PaymentService::class".to_string());
sym.signature = Some("(amount: number) => boolean".to_string());
sym.docstring = Some("Process a payment".to_string());
assert_eq!(
build_embedding_text(&sym, &[], &[], 0.0),
"method processPayment | process Payment | (amount: number) => boolean | Process a payment | in PaymentService | in Payment Service"
);
}
#[test]
fn test_embedding_text_with_callers_and_callees() {
let sym = make_symbol("processPayment", "method");
let callers = vec!["OrderController".to_string(), "RetryWorker".to_string()];
let callees = vec!["validateAmount".to_string(), "chargeCard".to_string()];
let text = build_embedding_text(&sym, &callers, &callees, 0.0);
assert!(text.contains("called-by OrderController RetryWorker"));
assert!(text.contains("calls validateAmount chargeCard"));
}
#[test]
fn test_embedding_text_with_file_path_context() {
let mut sym = make_symbol("processPayment", "method");
sym.file_path = "src/payments/services/PaymentService.ts".to_string();
let text = build_embedding_text(&sym, &[], &[], 0.0);
assert!(text.contains("path payments services"));
}
#[test]
fn test_embedding_text_snake_case_name() {
let sym = make_symbol("payment_retry_worker", "function");
let text = build_embedding_text(&sym, &[], &[], 0.0);
assert!(text.contains("payment retry worker"));
}
#[test]
fn test_split_camel_case() {
assert_eq!(split_camel_case("processPayment"), "process Payment");
assert_eq!(split_camel_case("PaymentService"), "Payment Service");
assert_eq!(split_camel_case("login"), "login");
assert_eq!(split_camel_case("getHTTPResponse"), "get HTTP Response");
assert_eq!(split_camel_case("URL"), "URL");
}
#[test]
fn test_split_snake_case() {
assert_eq!(
split_snake_case("payment_retry_worker"),
"payment retry worker"
);
assert_eq!(split_snake_case("API_KEY"), "API KEY");
assert_eq!(split_snake_case("login"), "login");
}
#[test]
fn test_extract_name_from_id() {
assert_eq!(
extract_name_from_id("src/pay.ts::PaymentService::class"),
Some("PaymentService")
);
assert_eq!(extract_name_from_id("foo"), None);
}
#[test]
fn test_embedding_text_high_importance() {
let sym = make_symbol("PaymentService", "class");
let text = build_embedding_text(&sym, &[], &[], 0.8);
assert!(text.contains("importance high core"));
}
#[test]
fn test_embedding_text_medium_importance() {
let sym = make_symbol("PaymentService", "class");
let text = build_embedding_text(&sym, &[], &[], 0.5);
assert!(text.contains("importance medium"));
}
#[test]
fn test_embedding_text_low_importance() {
let sym = make_symbol("PaymentService", "class");
let text = build_embedding_text(&sym, &[], &[], 0.1);
assert!(!text.contains("importance"));
}
fn make_symbol_with_language(name: &str, kind: &str, language: &str) -> Symbol {
Symbol {
id: format!("test.rs::{name}::{kind}"),
name: name.to_string(),
kind: kind.to_string(),
file_path: "test.rs".to_string(),
line_start: 1,
line_end: 10,
signature: None,
docstring: None,
parent_id: None,
language: language.to_string(),
metadata: "{}".to_string(),
}
}
#[test]
fn test_generic_name_deranked_rust_new() {
let sym = make_symbol_with_language("new", "function", "rust");
let text = build_embedding_text(&sym, &[], &[], 0.9);
assert!(
!text.contains("importance"),
"generic name 'new' should not get importance boost, got: {text}"
);
}
#[test]
fn test_non_generic_name_gets_importance_rust() {
let sym = make_symbol_with_language("processPayment", "method", "rust");
let text = build_embedding_text(&sym, &[], &[], 0.9);
assert!(
text.contains("importance high core"),
"non-generic name should get importance boost, got: {text}"
);
}
#[test]
fn test_generic_name_deranked_python_init() {
let sym = make_symbol_with_language("__init__", "method", "python");
let text = build_embedding_text(&sym, &[], &[], 0.8);
assert!(
!text.contains("importance"),
"generic name '__init__' should not get importance boost, got: {text}"
);
}
#[test]
fn test_generic_name_deranked_typescript_constructor() {
let sym = make_symbol_with_language("constructor", "method", "typescript");
let text = build_embedding_text(&sym, &[], &[], 0.8);
assert!(
!text.contains("importance"),
"generic name 'constructor' should not get importance boost, got: {text}"
);
}
}