use std::path::{Path, PathBuf};
use crate::validation::normalize_path;
use crate::SymbolKind;
pub fn detect_language_from_path(path: &str) -> String {
let ext = Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
match ext {
"rs" => "rust".to_string(),
"py" => "python".to_string(),
"js" => "javascript".to_string(),
"ts" | "tsx" => "typescript".to_string(),
"java" => "java".to_string(),
"c" => "c".to_string(),
"cpp" | "cc" | "cxx" | "hpp" => "cpp".to_string(),
"go" => "go".to_string(),
"rb" => "ruby".to_string(),
"php" => "php".to_string(),
_ => "unknown".to_string(),
}
}
pub fn resolve_path(file_path: &PathBuf, root: &Option<PathBuf>) -> String {
let resolved = if file_path.is_absolute() {
file_path.clone()
} else if let Some(ref root) = root {
root.join(file_path)
} else {
std::env::current_dir()
.ok()
.and_then(|cwd| cwd.join(file_path).canonicalize().ok())
.unwrap_or_else(|| file_path.clone())
};
normalize_path(&resolved).unwrap_or_else(|_| resolved.to_string_lossy().to_string())
}
pub fn format_symbol_kind(kind: &SymbolKind) -> &'static str {
match kind {
SymbolKind::Function => "Function",
SymbolKind::Method => "Method",
SymbolKind::Class => "Class",
SymbolKind::Interface => "Interface",
SymbolKind::Enum => "Enum",
SymbolKind::Module => "Module",
SymbolKind::Union => "Union",
SymbolKind::Namespace => "Namespace",
SymbolKind::TypeAlias => "TypeAlias",
SymbolKind::Unknown => "Unknown",
}
}
pub fn parse_symbol_kind(s: &str) -> Option<SymbolKind> {
match s.to_lowercase().as_str() {
"function" | "fn" => Some(SymbolKind::Function),
"method" => Some(SymbolKind::Method),
"class" | "struct" => Some(SymbolKind::Class),
"interface" | "trait" => Some(SymbolKind::Interface),
"enum" => Some(SymbolKind::Enum),
"module" | "mod" => Some(SymbolKind::Module),
"union" => Some(SymbolKind::Union),
"namespace" | "ns" => Some(SymbolKind::Namespace),
"type" | "typealias" | "type alias" => Some(SymbolKind::TypeAlias),
_ => None,
}
}
pub fn safe_slice(source: &[u8], start: usize, end: usize) -> Option<&[u8]> {
if start <= end && end <= source.len() {
Some(&source[start..end])
} else {
None
}
}
pub fn safe_str_slice(source: &str, start: usize, end: usize) -> Option<&str> {
if start <= end && end <= source.len() {
source.get(start..end)
} else {
None
}
}
pub fn extract_symbol_content_safe(
source: &[u8],
byte_start: usize,
byte_end: usize,
) -> Option<String> {
if byte_start > byte_end || byte_end > source.len() {
return None;
}
let source_str = std::str::from_utf8(source).ok()?;
if !source_str.is_char_boundary(byte_start) {
return None;
}
let adjusted_end = find_char_boundary_before(source_str, byte_end);
source_str
.get(byte_start..adjusted_end)
.map(|s| s.to_string())
}
pub fn extract_context_safe(
source: &[u8],
byte_start: usize,
byte_end: usize,
context_bytes: usize,
) -> Option<String> {
if byte_start > byte_end || byte_end > source.len() {
return None;
}
let source_str = std::str::from_utf8(source).ok()?;
let context_start = byte_start.saturating_sub(context_bytes);
let context_end = (byte_end + context_bytes).min(source.len());
let adjusted_start = find_char_boundary_after(source_str, context_start);
let adjusted_end = find_char_boundary_before(source_str, context_end);
source_str
.get(adjusted_start..adjusted_end)
.map(|s| s.to_string())
}
fn find_char_boundary_before(s: &str, offset: usize) -> usize {
let mut pos = offset.min(s.len());
while pos > 0 && !s.is_char_boundary(pos) {
pos -= 1;
}
pos
}
fn find_char_boundary_after(s: &str, offset: usize) -> usize {
let mut pos = offset;
let len = s.len();
while pos < len && !s.is_char_boundary(pos) {
pos += 1;
}
pos
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_language_from_path() {
assert_eq!(detect_language_from_path("src/main.rs"), "rust");
assert_eq!(detect_language_from_path("script.py"), "python");
assert_eq!(detect_language_from_path("app.js"), "javascript");
assert_eq!(detect_language_from_path("component.ts"), "typescript");
assert_eq!(detect_language_from_path("component.tsx"), "typescript");
assert_eq!(detect_language_from_path("Main.java"), "java");
assert_eq!(detect_language_from_path("header.c"), "c");
assert_eq!(detect_language_from_path("source.cpp"), "cpp");
assert_eq!(detect_language_from_path("source.cc"), "cpp");
assert_eq!(detect_language_from_path("source.cxx"), "cpp");
assert_eq!(detect_language_from_path("header.hpp"), "cpp");
assert_eq!(detect_language_from_path("main.go"), "go");
assert_eq!(detect_language_from_path("file.rb"), "ruby");
assert_eq!(detect_language_from_path("index.php"), "php");
}
#[test]
fn test_detect_language_unknown() {
assert_eq!(detect_language_from_path("file.xyz"), "unknown");
assert_eq!(detect_language_from_path("README"), "unknown");
assert_eq!(detect_language_from_path(".gitignore"), "unknown");
assert_eq!(detect_language_from_path(""), "unknown");
}
#[test]
fn test_resolve_path_absolute() {
let path = PathBuf::from("/absolute/path/to/file.rs");
let root = None;
assert_eq!(resolve_path(&path, &root), "/absolute/path/to/file.rs");
let root = Some(PathBuf::from("/some/root"));
assert_eq!(resolve_path(&path, &root), "/absolute/path/to/file.rs");
}
#[test]
fn test_resolve_path_relative_with_root() {
let path = PathBuf::from("src/main.rs");
let root = Some(PathBuf::from("/project"));
let result = resolve_path(&path, &root);
assert!(result.contains("src/main.rs") || result.contains("project"));
let path = PathBuf::from("../lib.rs");
let root = Some(PathBuf::from("/project/src"));
let result = resolve_path(&path, &root);
assert!(result.contains("lib.rs") || result.contains("project"));
}
#[test]
fn test_resolve_path_relative_no_root() {
let path = PathBuf::from("src/main.rs");
let root = None;
let result = resolve_path(&path, &root);
assert!(result.contains("src/main.rs") || result.ends_with("src/main.rs"));
}
#[test]
fn test_format_symbol_kind() {
assert_eq!(format_symbol_kind(&SymbolKind::Function), "Function");
assert_eq!(format_symbol_kind(&SymbolKind::Method), "Method");
assert_eq!(format_symbol_kind(&SymbolKind::Class), "Class");
assert_eq!(format_symbol_kind(&SymbolKind::Interface), "Interface");
assert_eq!(format_symbol_kind(&SymbolKind::Enum), "Enum");
assert_eq!(format_symbol_kind(&SymbolKind::Module), "Module");
assert_eq!(format_symbol_kind(&SymbolKind::Union), "Union");
assert_eq!(format_symbol_kind(&SymbolKind::Namespace), "Namespace");
assert_eq!(format_symbol_kind(&SymbolKind::TypeAlias), "TypeAlias");
assert_eq!(format_symbol_kind(&SymbolKind::Unknown), "Unknown");
}
#[test]
fn test_parse_symbol_kind() {
assert_eq!(parse_symbol_kind("function"), Some(SymbolKind::Function));
assert_eq!(parse_symbol_kind("method"), Some(SymbolKind::Method));
assert_eq!(parse_symbol_kind("class"), Some(SymbolKind::Class));
assert_eq!(parse_symbol_kind("interface"), Some(SymbolKind::Interface));
assert_eq!(parse_symbol_kind("enum"), Some(SymbolKind::Enum));
assert_eq!(parse_symbol_kind("module"), Some(SymbolKind::Module));
assert_eq!(parse_symbol_kind("union"), Some(SymbolKind::Union));
assert_eq!(parse_symbol_kind("namespace"), Some(SymbolKind::Namespace));
assert_eq!(parse_symbol_kind("typealias"), Some(SymbolKind::TypeAlias));
assert_eq!(parse_symbol_kind("fn"), Some(SymbolKind::Function));
assert_eq!(parse_symbol_kind("struct"), Some(SymbolKind::Class));
assert_eq!(parse_symbol_kind("trait"), Some(SymbolKind::Interface));
assert_eq!(parse_symbol_kind("mod"), Some(SymbolKind::Module));
assert_eq!(parse_symbol_kind("ns"), Some(SymbolKind::Namespace));
assert_eq!(parse_symbol_kind("type"), Some(SymbolKind::TypeAlias));
assert_eq!(parse_symbol_kind("type alias"), Some(SymbolKind::TypeAlias));
}
#[test]
fn test_parse_symbol_kind_case_insensitive() {
assert_eq!(parse_symbol_kind("FUNCTION"), Some(SymbolKind::Function));
assert_eq!(parse_symbol_kind("Function"), Some(SymbolKind::Function));
assert_eq!(parse_symbol_kind("Fn"), Some(SymbolKind::Function));
assert_eq!(parse_symbol_kind("CLASS"), Some(SymbolKind::Class));
assert_eq!(parse_symbol_kind("Struct"), Some(SymbolKind::Class));
}
#[test]
fn test_parse_symbol_kind_unknown() {
assert_eq!(parse_symbol_kind("unknown_kind"), None);
assert_eq!(parse_symbol_kind(""), None);
assert_eq!(parse_symbol_kind("xyz"), None);
}
#[test]
fn test_extract_symbol_content_safe_ascii() {
let source = b"fn hello() { return 42; }";
let result = extract_symbol_content_safe(source, 0, source.len());
assert_eq!(result, Some("fn hello() { return 42; }".to_string()));
}
#[test]
fn test_extract_symbol_content_safe_partial_range() {
let source = b"fn hello() { return 42; }";
let result = extract_symbol_content_safe(source, 3, 8);
assert_eq!(result, Some("hello".to_string()));
}
#[test]
fn test_extract_symbol_content_safe_emoji() {
let source = "fn test() { // \u{1f44b} }";
let bytes = source.as_bytes();
let result = extract_symbol_content_safe(bytes, 0, bytes.len());
assert_eq!(result, Some(source.to_string()));
}
#[test]
fn test_extract_symbol_content_safe_emoji_splits_end() {
let source: Vec<u8> = vec![
b'h', b'i', 0xF0, 0x9F, 0x91, 0x8B, ];
let result = extract_symbol_content_safe(&source, 0, 4);
assert_eq!(result, Some("hi".to_string()));
}
#[test]
fn test_extract_symbol_content_safe_start_at_boundary() {
let source = "abc\u{1f44b}xyz"; let bytes = source.as_bytes();
let result = extract_symbol_content_safe(bytes, 3, bytes.len());
assert_eq!(result, Some("\u{1F44B}xyz".to_string()));
}
#[test]
fn test_extract_symbol_content_safe_start_splits_char_returns_none() {
let source = "abc\u{1f44b}xyz"; let bytes = source.as_bytes();
let result = extract_symbol_content_safe(bytes, 4, bytes.len());
assert_eq!(result, None);
}
#[test]
fn test_extract_symbol_content_safe_out_of_bounds() {
let source = b"hello";
assert_eq!(extract_symbol_content_safe(source, 0, 100), None);
assert_eq!(extract_symbol_content_safe(source, 10, 20), None);
assert_eq!(extract_symbol_content_safe(source, 5, 3), None);
}
#[test]
fn test_extract_symbol_content_safe_cjk() {
let source = "fn 你好() { return 世界; }";
let bytes = source.as_bytes();
let result = extract_symbol_content_safe(bytes, 0, bytes.len());
assert_eq!(result, Some(source.to_string()));
}
#[test]
fn test_extract_symbol_content_safe_accented() {
let source = "fn héllo() { return café; }";
let bytes = source.as_bytes();
let result = extract_symbol_content_safe(bytes, 0, bytes.len());
assert_eq!(result, Some(source.to_string()));
}
#[test]
fn test_extract_context_safe() {
let source = "line1\nline2\nline3\nline4";
let bytes = source.as_bytes();
let result = extract_context_safe(bytes, 6, 11, 3);
assert!(result.is_some());
let context = result.unwrap();
assert!(context.contains("line2"));
}
#[test]
fn test_extract_context_safe_with_emoji() {
let source = "before\u{1f44b}after";
let bytes = source.as_bytes();
let result = extract_context_safe(bytes, 6, 10, 2);
assert!(result.is_some());
let context = result.unwrap();
assert!(!context.is_empty());
assert!(context.len() > 0);
}
#[test]
fn test_find_char_boundary_before() {
let s = "a\u{1f44b}b"; assert_eq!(find_char_boundary_before(s, 5), 5); assert_eq!(find_char_boundary_before(s, 4), 1); assert_eq!(find_char_boundary_before(s, 3), 1); assert_eq!(find_char_boundary_before(s, 2), 1); assert_eq!(find_char_boundary_before(s, 1), 1); assert_eq!(find_char_boundary_before(s, 0), 0); }
#[test]
fn test_find_char_boundary_after() {
let s = "a\u{1f44b}b"; assert_eq!(find_char_boundary_after(s, 0), 0); assert_eq!(find_char_boundary_after(s, 1), 1); assert_eq!(find_char_boundary_after(s, 2), 5); assert_eq!(find_char_boundary_after(s, 3), 5); assert_eq!(find_char_boundary_after(s, 4), 5); assert_eq!(find_char_boundary_after(s, 5), 5); }
#[test]
fn test_extract_symbol_content_safe_invalid_utf8() {
let source: &[u8] = &[0xFF, 0xFE, 0xFD]; let result = extract_symbol_content_safe(source, 0, 3);
assert_eq!(result, None);
}
}
pub fn detect_project_root() -> PathBuf {
let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let mut current = current_dir.as_path();
while current.parent().is_some() {
for marker in &[
".git",
"Cargo.toml",
"package.json",
"go.mod",
"pyproject.toml",
"setup.py",
"pom.xml",
"build.gradle",
] {
if current.join(marker).exists() {
return current.to_path_buf();
}
}
if let Some(parent) = current.parent() {
current = parent;
} else {
break;
}
}
current_dir
}