use crate::adapters::scip::parser::{find_enclosing_definition, parse_range};
use crate::domain::ports::SemanticDataSource;
use crate::domain::semantic::{
Definition, DocumentData, Reference, ReferenceRole, Relationship, RelationshipKind,
SemanticData, SourceRange, SymbolKind, SymbolMetadata,
};
use crate::scip;
use anyhow::{Context, Result};
pub struct ScipDataSourceAdapter {
pub scip_path: std::path::PathBuf,
}
impl ScipDataSourceAdapter {
pub fn new<P: AsRef<std::path::Path>>(path: P) -> Self {
Self {
scip_path: path.as_ref().to_path_buf(),
}
}
}
impl SemanticDataSource for ScipDataSourceAdapter {
fn load(&self) -> Result<SemanticData> {
let index = load_scip_index(&self.scip_path)?;
let external_symbols = index
.external_symbols
.iter()
.map(|sym| convert_symbol_info(sym, true))
.collect();
let mut symbol_map = std::collections::HashMap::new();
for sym in &index.external_symbols {
symbol_map.insert(sym.symbol.clone(), convert_symbol_info(sym, true));
}
for doc in &index.documents {
for sym in &doc.symbols {
symbol_map.insert(sym.symbol.clone(), convert_symbol_info(sym, false));
}
}
let documents = index
.documents
.iter()
.map(|doc| {
let (definitions, references) = partition_occurrences_with_map(doc, &symbol_map);
DocumentData {
relative_path: doc.relative_path.clone(),
language: doc.language.clone(),
definitions,
references,
}
})
.collect();
let mut semantic_data = SemanticData {
project_root: index
.metadata
.as_ref()
.map(|m| m.project_root.clone())
.unwrap_or_default(),
documents,
external_symbols,
};
enrich_semantic_data(&mut semantic_data)?;
Ok(semantic_data)
}
}
fn load_scip_index<P: AsRef<std::path::Path>>(path: P) -> Result<scip::Index> {
use memmap2::Mmap;
use prost::Message;
use std::fs::File;
let file = File::open(path).context("Failed to open SCIP index file")?;
let mmap = unsafe { Mmap::map(&file).context("Failed to mmap SCIP index file")? };
let index = scip::Index::decode(&mmap[..]).context("Failed to decode SCIP index")?;
Ok(index)
}
fn partition_occurrences_with_map(
doc: &scip::Document,
symbol_map: &std::collections::HashMap<String, SymbolMetadata>,
) -> (Vec<Definition>, Vec<Reference>) {
let mut definitions = Vec::new();
let mut references = Vec::new();
let defs: Vec<(Vec<i32>, &str)> = doc
.occurrences
.iter()
.filter(|occ| (occ.symbol_roles & (scip::SymbolRole::Definition as i32)) != 0)
.filter(|occ| !occ.symbol.is_empty())
.map(|occ| {
let range = if !occ.enclosing_range.is_empty() {
&occ.enclosing_range
} else {
&occ.range
};
(range.clone(), occ.symbol.as_str())
})
.collect();
for occ in &doc.occurrences {
if (occ.symbol_roles & (scip::SymbolRole::Definition as i32)) != 0 {
let metadata = symbol_map
.get(&occ.symbol)
.cloned()
.unwrap_or_else(|| create_default_metadata(&occ.symbol));
let (start_line, start_col, end_line, end_col) = parse_range(&occ.range);
let (encl_start_line, encl_start_col, encl_end_line, encl_end_col) =
if !occ.enclosing_range.is_empty() {
parse_range(&occ.enclosing_range)
} else {
parse_range(&occ.range)
};
definitions.push(Definition {
symbol: occ.symbol.clone(),
range: SourceRange {
start_line: start_line as u32,
start_column: start_col as u32,
end_line: end_line as u32,
end_column: end_col as u32,
},
enclosing_range: SourceRange {
start_line: encl_start_line as u32,
start_column: encl_start_col as u32,
end_line: encl_end_line as u32,
end_column: encl_end_col as u32,
},
metadata,
});
} else if !occ.symbol.is_empty() && !occ.symbol.starts_with("local ") {
let enclosing_symbol = find_enclosing_definition(&occ.range, &defs)
.unwrap_or("")
.to_string();
let (start_line, start_col, end_line, end_col) = parse_range(&occ.range);
references.push(Reference {
symbol: occ.symbol.clone(),
range: SourceRange {
start_line: start_line as u32,
start_column: start_col as u32,
end_line: end_line as u32,
end_column: end_col as u32,
},
enclosing_symbol,
role: convert_role(occ.symbol_roles),
});
}
}
(definitions, references)
}
fn convert_symbol_info(sym: &scip::SymbolInformation, is_external: bool) -> SymbolMetadata {
let mut kind = convert_symbol_kind(sym.kind() as i32);
if matches!(kind, SymbolKind::Unknown) {
kind = infer_kind_from_symbol(&sym.symbol);
}
let relationships = sym
.relationships
.iter()
.map(|rel| Relationship {
target_symbol: rel.symbol.clone(),
kind: convert_relationship_kind(rel),
})
.collect();
SymbolMetadata {
symbol: sym.symbol.clone(),
kind,
display_name: sym.display_name.clone(),
documentation: sym.documentation.clone(),
signature: sym.signature_documentation.as_ref().map(|d| d.text.clone()),
relationships,
enclosing_symbol: if sym.enclosing_symbol.is_empty() {
None
} else {
Some(sym.enclosing_symbol.clone())
},
is_external,
}
}
fn create_default_metadata(symbol: &str) -> SymbolMetadata {
let kind = infer_kind_from_symbol(symbol);
SymbolMetadata {
symbol: symbol.to_string(),
kind,
display_name: symbol.to_string(),
documentation: Vec::new(),
signature: None,
relationships: Vec::new(),
enclosing_symbol: None,
is_external: false,
}
}
fn infer_kind_from_symbol(symbol: &str) -> SymbolKind {
if symbol.ends_with(").") {
if symbol.contains('#') {
return SymbolKind::Method;
} else {
return SymbolKind::Function;
}
}
if symbol.ends_with(')') && !symbol.ends_with(").") {
return SymbolKind::Parameter;
}
if symbol.ends_with(']') {
return SymbolKind::Parameter; }
if symbol.ends_with('#') {
return SymbolKind::Class;
}
if symbol.ends_with(':') {
return SymbolKind::Module;
}
if symbol.ends_with('!') {
return SymbolKind::Macro;
}
if symbol.ends_with('/') {
return SymbolKind::Namespace;
}
if symbol.ends_with('.') {
if symbol.contains('#') {
return SymbolKind::Field;
} else {
return SymbolKind::Variable;
}
}
SymbolKind::Unknown
}
fn convert_symbol_kind(kind: i32) -> SymbolKind {
match kind {
17 => SymbolKind::Function, 26 => SymbolKind::Method, 9 => SymbolKind::Constructor, 80 => SymbolKind::StaticMethod, 66 => SymbolKind::AbstractMethod, 7 => SymbolKind::Class, 21 => SymbolKind::Interface, 49 => SymbolKind::Struct, 11 => SymbolKind::Enum, 55 => SymbolKind::TypeAlias, 53 => SymbolKind::Trait, 42 => SymbolKind::Protocol, 61 => SymbolKind::Variable, 15 => SymbolKind::Field, 8 => SymbolKind::Constant, 37 => SymbolKind::Parameter, 30 => SymbolKind::Namespace, 29 => SymbolKind::Module, 35 => SymbolKind::Package, 25 => SymbolKind::Macro, _ => SymbolKind::Unknown,
}
}
fn convert_relationship_kind(rel: &scip::Relationship) -> RelationshipKind {
if rel.is_implementation {
RelationshipKind::Implements
} else if rel.is_type_definition {
RelationshipKind::TypeDefinition
} else if rel.is_reference {
RelationshipKind::References
} else {
RelationshipKind::Inherits }
}
fn convert_role(symbol_roles: i32) -> ReferenceRole {
use scip::SymbolRole::*;
if (symbol_roles & (WriteAccess as i32)) != 0 {
ReferenceRole::Write
} else if (symbol_roles & (ReadAccess as i32)) != 0 {
ReferenceRole::Read
} else if (symbol_roles & (Import as i32)) != 0 {
ReferenceRole::Import
} else {
ReferenceRole::Call
}
}
fn enrich_semantic_data(data: &mut SemanticData) -> Result<()> {
for document in &mut data.documents {
let language = if !document.language.is_empty() {
document.language.as_str()
} else if document.relative_path.ends_with(".py") {
"python"
} else {
""
};
if language == "python" {
enrich_python_return_types(document, &data.project_root)?;
}
}
Ok(())
}
fn enrich_python_return_types(doc: &mut DocumentData, project_root: &str) -> Result<()> {
use std::path::Path;
let root_path = project_root.strip_prefix("file://").unwrap_or(project_root);
let source_path = Path::new(root_path).join(&doc.relative_path);
let source_code = std::fs::read_to_string(&source_path)
.context(format!("Failed to read source file: {:?}", source_path))?;
let lines: Vec<&str> = source_code.lines().collect();
for definition in &mut doc.definitions {
if !matches!(
definition.metadata.kind,
SymbolKind::Function
| SymbolKind::Method
| SymbolKind::Constructor
| SymbolKind::StaticMethod
| SymbolKind::AbstractMethod
) {
continue;
}
for reference in &doc.references {
if reference.enclosing_symbol != definition.symbol {
continue;
}
if !matches!(
reference.role,
ReferenceRole::TypeUsage | ReferenceRole::Call | ReferenceRole::Read
) {
continue;
}
if is_python_return_type_annotation(reference, definition, &lines) {
let already_exists = definition.metadata.relationships.iter().any(|r| {
r.target_symbol == reference.symbol
&& matches!(r.kind, RelationshipKind::TypeDefinition)
});
if !already_exists {
definition.metadata.relationships.push(Relationship {
target_symbol: reference.symbol.clone(),
kind: RelationshipKind::TypeDefinition,
});
}
break;
}
}
}
Ok(())
}
fn is_python_return_type_annotation(
type_ref: &Reference,
function_def: &Definition,
lines: &[&str],
) -> bool {
let line_num = type_ref.range.start_line as usize;
let func_line = function_def.range.start_line as usize;
if line_num < func_line || line_num > func_line + 5 {
return false;
}
if line_num >= lines.len() {
return false;
}
let line = lines[line_num];
line.contains("->") && line.trim_end().ends_with(':')
}
#[cfg(test)]
mod tests {
use super::*;
use prost::Message;
use std::io::Write;
#[test]
fn test_load_nonexistent_file_returns_error() {
let adapter = ScipDataSourceAdapter::new("/nonexistent/path/to/index.scip");
let result = adapter.load();
assert!(result.is_err());
let err_msg = format!("{:?}", result.unwrap_err());
assert!(
err_msg.contains("Failed to open") || err_msg.contains("nonexistent"),
"expected open/decode error, got: {}",
err_msg
);
}
#[test]
fn test_load_invalid_protobuf_returns_error() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("invalid.scip");
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(b"not valid protobuf content").unwrap();
drop(f);
let adapter = ScipDataSourceAdapter::new(&path);
let result = adapter.load();
assert!(result.is_err());
let err_msg = format!("{:?}", result.unwrap_err());
assert!(
err_msg.contains("decode") || err_msg.contains("Failed"),
"expected decode error, got: {}",
err_msg
);
}
#[test]
fn test_empty_scip_index_returns_empty_data() {
let empty_index = scip::Index::default();
let mut buf = Vec::new();
empty_index.encode(&mut buf).unwrap();
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("empty.scip");
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(&buf).unwrap();
drop(f);
let adapter = ScipDataSourceAdapter::new(&path);
let result = adapter.load().unwrap();
assert!(result.documents.is_empty());
assert!(result.external_symbols.is_empty());
assert!(result.project_root.is_empty());
}
#[test]
fn test_infer_kind_from_symbol() {
assert_eq!(infer_kind_from_symbol("abc/"), SymbolKind::Namespace);
assert_eq!(infer_kind_from_symbol("abc#"), SymbolKind::Class);
assert_eq!(infer_kind_from_symbol("abc()."), SymbolKind::Function);
assert_eq!(
infer_kind_from_symbol("Class#method()."),
SymbolKind::Method
);
assert_eq!(
infer_kind_from_symbol("func().(param)"),
SymbolKind::Parameter
);
assert_eq!(infer_kind_from_symbol("func().[T]"), SymbolKind::Parameter);
assert_eq!(infer_kind_from_symbol("module:"), SymbolKind::Module);
assert_eq!(infer_kind_from_symbol("macro!"), SymbolKind::Macro);
assert_eq!(infer_kind_from_symbol("var."), SymbolKind::Variable);
assert_eq!(infer_kind_from_symbol("Class#field."), SymbolKind::Field);
assert_eq!(infer_kind_from_symbol("unknown"), SymbolKind::Unknown);
}
#[test]
fn test_convert_symbol_kind() {
assert_eq!(convert_symbol_kind(17), SymbolKind::Function);
assert_eq!(convert_symbol_kind(26), SymbolKind::Method);
assert_eq!(convert_symbol_kind(7), SymbolKind::Class);
assert_eq!(convert_symbol_kind(61), SymbolKind::Variable);
assert_eq!(convert_symbol_kind(15), SymbolKind::Field);
assert_eq!(convert_symbol_kind(37), SymbolKind::Parameter);
assert_eq!(convert_symbol_kind(30), SymbolKind::Namespace);
assert_eq!(convert_symbol_kind(29), SymbolKind::Module);
assert_eq!(convert_symbol_kind(25), SymbolKind::Macro);
assert_eq!(convert_symbol_kind(999), SymbolKind::Unknown);
}
#[test]
fn test_convert_role() {
use scip::SymbolRole::*;
assert_eq!(convert_role(WriteAccess as i32), ReferenceRole::Write);
assert_eq!(convert_role(ReadAccess as i32), ReferenceRole::Read);
assert_eq!(convert_role(Import as i32), ReferenceRole::Import);
assert_eq!(convert_role(0), ReferenceRole::Call);
}
#[test]
fn test_is_python_return_type_annotation() {
use crate::domain::semantic::{Reference, SourceRange};
let func_def = Definition {
symbol: "func".into(),
range: SourceRange {
start_line: 0,
start_column: 0,
end_line: 0,
end_column: 0,
},
enclosing_range: SourceRange {
start_line: 0,
start_column: 0,
end_line: 1,
end_column: 0,
},
metadata: create_default_metadata("func"),
};
let type_ref = Reference {
symbol: "MyType".into(),
range: SourceRange {
start_line: 0,
start_column: 0,
end_line: 0,
end_column: 0,
},
enclosing_symbol: "func".into(),
role: ReferenceRole::Read,
};
let lines = vec!["def func() -> MyType:"];
assert!(is_python_return_type_annotation(
&type_ref, &func_def, &lines
));
let lines_no_arrow = vec!["def func(x: MyType):"];
assert!(!is_python_return_type_annotation(
&type_ref,
&func_def,
&lines_no_arrow
));
let lines_wrong_line = vec!["def func():", " return MyType()"];
let type_ref_wrong = Reference {
symbol: "MyType".into(),
range: SourceRange {
start_line: 1,
start_column: 0,
end_line: 1,
end_column: 0,
},
enclosing_symbol: "func".into(),
role: ReferenceRole::Read,
};
assert!(!is_python_return_type_annotation(
&type_ref_wrong,
&func_def,
&lines_wrong_line
));
}
}