use crate::parsing::{Import, LanguageId, PipelineSymbolCache, ResolveResult};
use crate::relationship::RelationshipMetadata;
use crate::symbol::ScopeContext;
use crate::types::{CompactString, FileId, Range, SymbolId};
use crate::{RelationKind, Symbol, SymbolKind, Visibility};
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct RawSymbol {
pub name: CompactString,
pub kind: SymbolKind,
pub range: Range,
pub signature: Option<Box<str>>,
pub doc_comment: Option<Box<str>>,
pub visibility: Visibility,
pub scope_context: Option<ScopeContext>,
}
impl RawSymbol {
pub fn new(name: impl Into<CompactString>, kind: SymbolKind, range: Range) -> Self {
Self {
name: name.into(),
kind,
range,
signature: None,
doc_comment: None,
visibility: Visibility::Public,
scope_context: None,
}
}
pub fn with_signature(mut self, sig: impl Into<Box<str>>) -> Self {
self.signature = Some(sig.into());
self
}
pub fn with_doc_comment(mut self, doc: impl Into<Box<str>>) -> Self {
self.doc_comment = Some(doc.into());
self
}
pub fn with_visibility(mut self, vis: Visibility) -> Self {
self.visibility = vis;
self
}
pub fn with_scope_context(mut self, ctx: ScopeContext) -> Self {
self.scope_context = Some(ctx);
self
}
}
#[derive(Debug, Clone)]
pub struct RawImport {
pub path: String,
pub alias: Option<String>,
pub is_glob: bool,
pub is_type_only: bool,
}
impl RawImport {
pub fn new(path: impl Into<String>) -> Self {
Self {
path: path.into(),
alias: None,
is_glob: false,
is_type_only: false,
}
}
pub fn with_alias(mut self, alias: impl Into<String>) -> Self {
self.alias = Some(alias.into());
self
}
pub fn as_glob(mut self) -> Self {
self.is_glob = true;
self
}
pub fn as_type_only(mut self) -> Self {
self.is_type_only = true;
self
}
pub fn into_import(self, file_id: FileId) -> Import {
Import {
file_id,
path: self.path,
alias: self.alias,
is_glob: self.is_glob,
is_type_only: self.is_type_only,
}
}
}
#[derive(Debug, Clone)]
pub struct RawRelationship {
pub from_name: Arc<str>,
pub from_range: Range,
pub to_name: Arc<str>,
pub to_range: Range,
pub kind: RelationKind,
pub metadata: Option<RelationshipMetadata>,
}
impl RawRelationship {
pub fn new(
from_name: impl Into<Arc<str>>,
from_range: Range,
to_name: impl Into<Arc<str>>,
to_range: Range,
kind: RelationKind,
) -> Self {
Self {
from_name: from_name.into(),
from_range,
to_name: to_name.into(),
to_range,
kind,
metadata: None,
}
}
pub fn with_metadata(mut self, metadata: RelationshipMetadata) -> Self {
self.metadata = Some(metadata);
self
}
}
#[derive(Debug)]
pub struct ParsedFile {
pub path: PathBuf,
pub content_hash: String,
pub language_id: LanguageId,
pub module_path: Option<String>,
pub raw_symbols: Vec<RawSymbol>,
pub raw_imports: Vec<RawImport>,
pub raw_relationships: Vec<RawRelationship>,
}
impl ParsedFile {
pub fn new(path: PathBuf, content_hash: String, language_id: LanguageId) -> Self {
Self {
path,
content_hash,
language_id,
module_path: None,
raw_symbols: Vec::new(),
raw_imports: Vec::new(),
raw_relationships: Vec::new(),
}
}
pub fn with_module_path(mut self, module_path: impl Into<String>) -> Self {
self.module_path = Some(module_path.into());
self
}
pub fn symbol_count(&self) -> usize {
self.raw_symbols.len()
}
pub fn import_count(&self) -> usize {
self.raw_imports.len()
}
pub fn relationship_count(&self) -> usize {
self.raw_relationships.len()
}
}
#[derive(Debug, Clone)]
pub struct FileRegistration {
pub path: PathBuf,
pub file_id: FileId,
pub content_hash: String,
pub language_id: LanguageId,
pub timestamp: u64,
pub mtime: u64,
}
#[derive(Debug, Clone)]
pub struct UnresolvedRelationship {
pub from_id: Option<SymbolId>,
pub from_name: Arc<str>,
pub to_name: Arc<str>,
pub file_id: FileId,
pub kind: RelationKind,
pub metadata: Option<RelationshipMetadata>,
pub to_range: Option<Range>,
}
#[derive(Debug)]
pub struct IndexBatch {
pub symbols: Vec<(Symbol, PathBuf)>,
pub imports: Vec<Import>,
pub unresolved_relationships: Vec<UnresolvedRelationship>,
pub file_registrations: Vec<FileRegistration>,
}
impl IndexBatch {
pub fn new() -> Self {
Self {
symbols: Vec::new(),
imports: Vec::new(),
unresolved_relationships: Vec::new(),
file_registrations: Vec::new(),
}
}
pub fn with_capacity(symbols: usize, imports: usize, rels: usize) -> Self {
Self {
symbols: Vec::with_capacity(symbols),
imports: Vec::with_capacity(imports),
unresolved_relationships: Vec::with_capacity(rels),
file_registrations: Vec::new(),
}
}
pub fn symbol_count(&self) -> usize {
self.symbols.len()
}
pub fn is_empty(&self) -> bool {
self.symbols.is_empty() && self.imports.is_empty() && self.file_registrations.is_empty()
}
pub fn merge(&mut self, other: IndexBatch) {
self.symbols.extend(other.symbols);
self.imports.extend(other.imports);
self.unresolved_relationships
.extend(other.unresolved_relationships);
self.file_registrations.extend(other.file_registrations);
}
}
impl Default for IndexBatch {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct EmbeddingBatch {
pub candidates: Vec<(SymbolId, Box<str>, Box<str>)>,
}
impl EmbeddingBatch {
pub fn new() -> Self {
Self {
candidates: Vec::new(),
}
}
pub fn with_capacity(size: usize) -> Self {
Self {
candidates: Vec::with_capacity(size),
}
}
pub fn is_empty(&self) -> bool {
self.candidates.is_empty()
}
pub fn len(&self) -> usize {
self.candidates.len()
}
}
impl Default for EmbeddingBatch {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct FileContent {
pub path: PathBuf,
pub content: String,
pub hash: String,
}
impl FileContent {
pub fn new(path: PathBuf, content: String, hash: String) -> Self {
Self {
path,
content,
hash,
}
}
}
pub use crate::parsing::CallerContext;
#[derive(Debug)]
pub struct SymbolLookupCache {
by_id: dashmap::DashMap<crate::types::SymbolId, crate::Symbol>,
by_name: dashmap::DashMap<Box<str>, Vec<crate::types::SymbolId>>,
by_file_id: dashmap::DashMap<crate::types::FileId, Vec<crate::types::SymbolId>>,
}
impl Default for SymbolLookupCache {
fn default() -> Self {
Self::new()
}
}
impl SymbolLookupCache {
pub fn new() -> Self {
Self {
by_id: dashmap::DashMap::new(),
by_name: dashmap::DashMap::new(),
by_file_id: dashmap::DashMap::new(),
}
}
pub fn with_capacity(symbols: usize) -> Self {
Self {
by_id: dashmap::DashMap::with_capacity(symbols),
by_name: dashmap::DashMap::with_capacity(symbols / 10), by_file_id: dashmap::DashMap::with_capacity(symbols / 50), }
}
pub fn insert(&self, symbol: crate::Symbol) {
let id = symbol.id;
let file_id = symbol.file_id;
let name: Box<str> = symbol.name.as_ref().into();
self.by_id.insert(id, symbol);
self.by_name.entry(name).or_default().push(id);
self.by_file_id.entry(file_id).or_default().push(id);
}
pub fn get(&self, id: crate::types::SymbolId) -> Option<crate::Symbol> {
self.by_id.get(&id).map(|r| r.value().clone())
}
pub fn get_ref(
&self,
id: crate::types::SymbolId,
) -> Option<dashmap::mapref::one::Ref<'_, crate::types::SymbolId, crate::Symbol>> {
self.by_id.get(&id)
}
pub fn lookup_candidates(&self, name: &str) -> Vec<crate::types::SymbolId> {
self.by_name
.get(name)
.map(|r| r.value().clone())
.unwrap_or_default()
}
pub fn symbols_in_file(&self, file_id: crate::types::FileId) -> Vec<crate::types::SymbolId> {
self.by_file_id
.get(&file_id)
.map(|r| r.value().clone())
.unwrap_or_default()
}
pub fn file_count(&self) -> usize {
self.by_file_id.len()
}
pub fn len(&self) -> usize {
self.by_id.len()
}
pub fn is_empty(&self) -> bool {
self.by_id.is_empty()
}
pub fn unique_names(&self) -> usize {
self.by_name.len()
}
}
impl PipelineSymbolCache for SymbolLookupCache {
fn resolve(
&self,
name: &str,
caller: &CallerContext,
to_range: Option<&Range>,
imports: &[Import],
) -> ResolveResult {
let candidates = self.lookup_candidates(name);
if candidates.is_empty() {
return ResolveResult::NotFound;
}
let local_matches: Vec<_> = candidates
.iter()
.filter_map(|&id| {
let sym = self.by_id.get(&id)?;
if sym.file_id != caller.file_id {
return None;
}
if let Some(ref_range) = to_range {
if sym.range.start_line <= ref_range.start_line {
return Some(id);
}
return Some(id);
}
Some(id)
})
.collect();
if local_matches.len() == 1 {
return ResolveResult::Found(local_matches[0]);
}
if local_matches.len() > 1 {
return ResolveResult::Ambiguous(local_matches);
}
for import in imports {
if import.alias.as_deref() == Some(name) {
if let Some(id) = self.find_by_import_path(&import.path, caller.language_id) {
return ResolveResult::Found(id);
}
}
let last_segment = import
.path
.rsplit("::")
.next()
.or_else(|| import.path.rsplit('.').next())
.or_else(|| import.path.rsplit('/').next());
if last_segment == Some(name) {
if let Some(id) = self.find_by_import_path(&import.path, caller.language_id) {
return ResolveResult::Found(id);
}
}
}
let same_language: Vec<_> = candidates
.iter()
.filter_map(|&id| {
let sym = self.by_id.get(&id)?;
if sym.language_id.as_ref() != Some(&caller.language_id) {
return None;
}
if sym.file_id == caller.file_id {
return Some(id);
}
if caller.is_same_module(sym.module_path.as_deref()) {
return Some(id);
}
if sym.visibility == crate::Visibility::Public {
return Some(id);
}
None
})
.collect();
if same_language.len() == 1 {
return ResolveResult::Found(same_language[0]);
}
if same_language.len() > 1 {
return ResolveResult::Ambiguous(same_language);
}
ResolveResult::NotFound
}
fn get(&self, id: SymbolId) -> Option<Symbol> {
self.by_id.get(&id).map(|r| r.value().clone())
}
fn symbols_in_file(&self, file_id: FileId) -> Vec<SymbolId> {
self.by_file_id
.get(&file_id)
.map(|r| r.value().clone())
.unwrap_or_default()
}
fn lookup_candidates(&self, name: &str) -> Vec<SymbolId> {
self.by_name
.get(name)
.map(|r| r.value().clone())
.unwrap_or_default()
}
}
impl SymbolLookupCache {
pub fn from_index(index: &crate::storage::DocumentIndex) -> PipelineResult<Self> {
let count = index.document_count().unwrap_or(0) as usize;
let cache = Self::with_capacity(count);
let symbols = index
.get_all_symbols(1_000_000)
.map_err(|e| PipelineError::Index(crate::IndexError::Storage(e)))?;
for symbol in symbols {
cache.insert(symbol);
}
Ok(cache)
}
fn find_by_import_path(&self, path: &str, language_id: LanguageId) -> Option<SymbolId> {
let name = path
.rsplit("::")
.next()
.or_else(|| path.rsplit('.').next())
.or_else(|| path.rsplit('/').next())?;
let candidates = self.lookup_candidates(name);
for id in candidates {
if let Some(sym) = self.by_id.get(&id) {
if sym.language_id.as_ref() == Some(&language_id) {
if let Some(ref module_path) = sym.module_path {
if path.contains(module_path.as_ref()) || module_path.contains(path) {
return Some(id);
}
}
}
}
}
None
}
}
pub struct ResolutionContext {
pub file_id: FileId,
pub language_id: LanguageId,
pub imports: Vec<Import>,
pub local_symbols: Vec<SymbolId>,
pub scope: Box<dyn crate::parsing::ResolutionScope>,
pub unresolved_rels: Vec<UnresolvedRelationship>,
}
impl std::fmt::Debug for ResolutionContext {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ResolutionContext")
.field("file_id", &self.file_id)
.field("language_id", &self.language_id)
.field("imports", &self.imports.len())
.field("local_symbols", &self.local_symbols.len())
.field("unresolved_rels", &self.unresolved_rels.len())
.finish()
}
}
impl ResolutionContext {
pub fn relationship_count(&self) -> usize {
self.unresolved_rels.len()
}
pub fn resolve(&self, name: &str) -> Option<SymbolId> {
self.scope.resolve(name)
}
}
#[derive(Debug, Clone)]
pub struct ResolvedRelationship {
pub from_id: SymbolId,
pub to_id: SymbolId,
pub kind: RelationKind,
pub metadata: Option<RelationshipMetadata>,
}
impl ResolvedRelationship {
pub fn new(from_id: SymbolId, to_id: SymbolId, kind: RelationKind) -> Self {
Self {
from_id,
to_id,
kind,
metadata: None,
}
}
pub fn with_metadata(mut self, metadata: RelationshipMetadata) -> Self {
self.metadata = Some(metadata);
self
}
}
#[derive(Debug, Default)]
pub struct ResolvedBatch {
pub relationships: Vec<ResolvedRelationship>,
}
impl ResolvedBatch {
pub fn new() -> Self {
Self {
relationships: Vec::new(),
}
}
pub fn with_capacity(cap: usize) -> Self {
Self {
relationships: Vec::with_capacity(cap),
}
}
pub fn push(&mut self, rel: ResolvedRelationship) {
self.relationships.push(rel);
}
pub fn len(&self) -> usize {
self.relationships.len()
}
pub fn is_empty(&self) -> bool {
self.relationships.is_empty()
}
pub fn merge(&mut self, other: ResolvedBatch) {
self.relationships.extend(other.relationships);
}
}
#[derive(Debug, Default)]
pub struct DiscoverResult {
pub new_files: Vec<PathBuf>,
pub modified_files: Vec<PathBuf>,
pub deleted_files: Vec<PathBuf>,
}
impl DiscoverResult {
pub fn files_to_process(&self) -> usize {
self.new_files.len() + self.modified_files.len()
}
pub fn files_to_cleanup(&self) -> usize {
self.deleted_files.len() + self.modified_files.len()
}
pub fn is_empty(&self) -> bool {
self.new_files.is_empty() && self.modified_files.is_empty() && self.deleted_files.is_empty()
}
}
#[derive(Debug, thiserror::Error)]
pub enum PipelineError {
#[error("Failed to read file {path}: {source}")]
FileRead {
path: PathBuf,
source: std::io::Error,
},
#[error("Failed to parse file {path}: {reason}")]
Parse { path: PathBuf, reason: String },
#[error("Unsupported file type: {path}")]
UnsupportedFileType { path: PathBuf },
#[error("Channel send error: {0}")]
ChannelSend(String),
#[error("Channel receive error: {0}")]
ChannelRecv(String),
#[error("Index error: {0}")]
Index(#[from] crate::IndexError),
#[error("Storage error: {0}")]
Storage(#[from] crate::storage::StorageError),
}
pub type PipelineResult<T> = Result<T, PipelineError>;
#[derive(Debug, Clone)]
pub struct SingleFileStats {
pub file_id: crate::FileId,
pub indexed: bool,
pub cached: bool,
pub symbols_found: usize,
pub relationships_resolved: usize,
pub elapsed: std::time::Duration,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_raw_symbol_builder() {
let range = Range::new(1, 0, 1, 10);
let sym = RawSymbol::new("test_fn", SymbolKind::Function, range)
.with_signature("fn test_fn() -> i32")
.with_visibility(Visibility::Public);
assert_eq!(&*sym.name, "test_fn");
assert_eq!(sym.kind, SymbolKind::Function);
assert!(sym.signature.is_some());
}
#[test]
fn test_raw_import_conversion() {
let raw = RawImport::new("std::collections::HashMap").with_alias("Map");
let file_id = FileId::new(1).unwrap();
let import = raw.into_import(file_id);
assert_eq!(import.path, "std::collections::HashMap");
assert_eq!(import.alias, Some("Map".to_string()));
assert_eq!(import.file_id, file_id);
}
#[test]
fn test_parsed_file_counts() {
let mut parsed = ParsedFile::new(
PathBuf::from("test.rs"),
"abc123def456".to_string(),
LanguageId::new("rust"),
);
parsed.raw_symbols.push(RawSymbol::new(
"foo",
SymbolKind::Function,
Range::new(1, 0, 1, 10),
));
parsed.raw_symbols.push(RawSymbol::new(
"bar",
SymbolKind::Function,
Range::new(2, 0, 2, 10),
));
assert_eq!(parsed.symbol_count(), 2);
assert_eq!(parsed.import_count(), 0);
}
#[test]
fn test_index_batch_merge() {
let mut batch1 = IndexBatch::new();
let mut batch2 = IndexBatch::new();
batch2.imports.push(Import {
file_id: FileId::new(1).unwrap(),
path: "test".to_string(),
alias: None,
is_glob: false,
is_type_only: false,
});
batch1.merge(batch2);
assert_eq!(batch1.imports.len(), 1);
}
}