codemem_engine/index/scip/
mod.rs1pub mod graph_builder;
7pub mod orchestrate;
8
9use codemem_core::NodeKind;
10use protobuf::Message;
11use scip::types::Index;
12
13#[derive(Debug, Clone)]
15pub struct ScipDefinition {
16 pub scip_symbol: String,
18 pub qualified_name: String,
20 pub file_path: String,
22 pub line_start: u32,
24 pub line_end: u32,
26 pub col_start: u32,
28 pub col_end: u32,
30 pub kind: NodeKind,
32 pub documentation: Vec<String>,
34 pub relationships: Vec<ScipRelationship>,
36 pub is_test: bool,
38 pub is_generated: bool,
40}
41
42#[derive(Debug, Clone)]
44pub struct ScipReference {
45 pub scip_symbol: String,
47 pub file_path: String,
49 pub line: u32,
51 pub col_start: u32,
53 pub col_end: u32,
55 pub role_bitmask: i32,
57}
58
59#[derive(Debug, Clone)]
61pub struct ScipExternal {
62 pub scip_symbol: String,
64 pub package_manager: String,
66 pub package_name: String,
68 pub package_version: String,
70 pub kind: NodeKind,
72 pub documentation: Vec<String>,
74}
75
76#[derive(Debug, Clone)]
78pub struct ScipRelationship {
79 pub target_symbol: String,
81 pub is_implementation: bool,
82 pub is_type_definition: bool,
83 pub is_reference: bool,
84 pub is_definition: bool,
85}
86
87#[derive(Debug, Clone)]
89pub struct ScipReadResult {
90 pub project_root: String,
92 pub definitions: Vec<ScipDefinition>,
94 pub references: Vec<ScipReference>,
96 pub externals: Vec<ScipExternal>,
98 pub covered_files: Vec<String>,
100}
101
102const ROLE_DEFINITION: i32 = 0x1;
104pub(crate) const ROLE_IMPORT: i32 = 0x2;
105pub(crate) const ROLE_WRITE_ACCESS: i32 = 0x4;
106pub(crate) const ROLE_READ_ACCESS: i32 = 0x8;
107const ROLE_TEST: i32 = 0x20;
108const ROLE_GENERATED: i32 = 0x10;
109
110pub fn parse_scip_bytes(bytes: &[u8]) -> Result<ScipReadResult, String> {
112 let index = Index::parse_from_bytes(bytes)
113 .map_err(|e| format!("Failed to parse SCIP protobuf: {e}"))?;
114
115 let project_root = index
116 .metadata
117 .as_ref()
118 .map(|m| m.project_root.clone())
119 .unwrap_or_default();
120
121 let mut definitions = Vec::new();
122 let mut references = Vec::new();
123 let mut covered_files = Vec::new();
124
125 for doc in &index.documents {
126 let file_path = &doc.relative_path;
127 let language = &doc.language;
128 covered_files.push(file_path.clone());
129
130 let lang_sep = detect_language_separator(language);
131
132 let mut sym_info_map = std::collections::HashMap::new();
134 for sym_info in &doc.symbols {
135 if !sym_info.symbol.is_empty() {
136 sym_info_map.insert(sym_info.symbol.as_str(), sym_info);
137 }
138 }
139
140 for occ in &doc.occurrences {
141 if occ.symbol.is_empty() || scip::symbol::is_local_symbol(&occ.symbol) {
142 continue;
143 }
144
145 let (start_line, start_col, end_line, end_col) = match parse_range(&occ.range) {
146 Some(r) => r,
147 None => continue,
148 };
149
150 let roles = occ.symbol_roles;
151 let is_def = (roles & ROLE_DEFINITION) != 0;
152 let is_test = (roles & ROLE_TEST) != 0;
153 let is_generated = (roles & ROLE_GENERATED) != 0;
154
155 if is_def {
156 let qualified_name = match scip_symbol_to_qualified_name(&occ.symbol, lang_sep) {
157 Some(q) => q,
158 None => continue,
159 };
160
161 let (kind, documentation, relationships) =
163 if let Some(info) = sym_info_map.get(occ.symbol.as_str()) {
164 let kind = resolve_node_kind(info.kind.value(), &occ.symbol);
165 let docs: Vec<String> =
166 info.documentation.iter().map(|s| s.to_string()).collect();
167 let rels: Vec<ScipRelationship> = info
168 .relationships
169 .iter()
170 .map(|r| ScipRelationship {
171 target_symbol: r.symbol.clone(),
172 is_implementation: r.is_implementation,
173 is_type_definition: r.is_type_definition,
174 is_reference: r.is_reference,
175 is_definition: r.is_definition,
176 })
177 .collect();
178 (kind, docs, rels)
179 } else {
180 (infer_kind_from_symbol(&occ.symbol), Vec::new(), Vec::new())
181 };
182
183 definitions.push(ScipDefinition {
184 scip_symbol: occ.symbol.clone(),
185 qualified_name,
186 file_path: file_path.clone(),
187 line_start: start_line,
188 line_end: end_line,
189 col_start: start_col,
190 col_end: end_col,
191 kind,
192 documentation,
193 relationships,
194 is_test,
195 is_generated,
196 });
197 } else {
198 references.push(ScipReference {
199 scip_symbol: occ.symbol.clone(),
200 file_path: file_path.clone(),
201 line: start_line,
202 col_start: start_col,
203 col_end: end_col,
204 role_bitmask: roles,
205 });
206 }
207 }
208 }
209
210 let externals = index
212 .external_symbols
213 .iter()
214 .filter(|ext| !ext.symbol.is_empty() && !scip::symbol::is_local_symbol(&ext.symbol))
215 .filter_map(|ext| {
216 let parsed = scip::symbol::parse_symbol(&ext.symbol).ok()?;
217 let package = parsed.package.as_ref()?;
218 let kind = resolve_node_kind(ext.kind.value(), &ext.symbol);
219 let documentation: Vec<String> =
220 ext.documentation.iter().map(|s| s.to_string()).collect();
221
222 Some(ScipExternal {
223 scip_symbol: ext.symbol.clone(),
224 package_manager: package.manager.clone(),
225 package_name: package.name.clone(),
226 package_version: package.version.clone(),
227 kind,
228 documentation,
229 })
230 })
231 .collect();
232
233 infer_definition_extents(&mut definitions);
238
239 Ok(ScipReadResult {
240 project_root,
241 definitions,
242 references,
243 externals,
244 covered_files,
245 })
246}
247
248fn infer_definition_extents(definitions: &mut [ScipDefinition]) {
254 use std::collections::HashMap;
255
256 let mut by_file: HashMap<String, Vec<usize>> = HashMap::new();
258 for (i, def) in definitions.iter().enumerate() {
259 by_file.entry(def.file_path.clone()).or_default().push(i);
260 }
261
262 for indices in by_file.values() {
263 let mut sorted: Vec<usize> = indices.clone();
265 sorted.sort_by_key(|&i| definitions[i].line_start);
266
267 let depths: Vec<usize> = sorted
269 .iter()
270 .map(|&i| descriptor_depth(&definitions[i].scip_symbol))
271 .collect();
272
273 for pos in 0..sorted.len() {
274 let idx = sorted[pos];
275 if definitions[idx].line_end > definitions[idx].line_start {
277 continue;
278 }
279
280 let my_depth = depths[pos];
281
282 let mut end_line = u32::MAX;
288 for next_pos in pos + 1..sorted.len() {
289 if depths[next_pos] <= my_depth {
290 end_line = definitions[sorted[next_pos]].line_start.saturating_sub(1);
291 break;
292 }
293 }
294
295 definitions[idx].line_end = end_line;
296 }
297 }
298}
299
300fn descriptor_depth(scip_symbol: &str) -> usize {
302 scip::symbol::parse_symbol(scip_symbol)
303 .map(|p| p.descriptors.len())
304 .unwrap_or(0)
305}
306
307fn parse_range(range: &[i32]) -> Option<(u32, u32, u32, u32)> {
312 match range.len() {
313 3 => Some((
314 range[0].try_into().ok()?,
315 range[1].try_into().ok()?,
316 range[0].try_into().ok()?,
317 range[2].try_into().ok()?,
318 )),
319 4 => Some((
320 range[0].try_into().ok()?,
321 range[1].try_into().ok()?,
322 range[2].try_into().ok()?,
323 range[3].try_into().ok()?,
324 )),
325 _ => None,
326 }
327}
328
329pub fn scip_symbol_to_qualified_name(scip_symbol: &str, lang_separator: &str) -> Option<String> {
334 let parsed = scip::symbol::parse_symbol(scip_symbol).ok()?;
335 let parts: Vec<&str> = parsed
336 .descriptors
337 .iter()
338 .map(|d| d.name.as_str())
339 .filter(|s| !s.is_empty())
340 .collect();
341 if parts.is_empty() {
342 return None;
343 }
344 Some(parts.join(lang_separator))
345}
346
347pub fn detect_language_separator(language: &str) -> &'static str {
351 match language.to_lowercase().as_str() {
352 "rust" | "cpp" | "c++" => "::",
353 _ => ".",
354 }
355}
356
357fn scip_kind_to_node_kind(kind: i32) -> Option<NodeKind> {
362 use scip::types::symbol_information::Kind;
363 match kind {
364 x if x == Kind::Class as i32 => Some(NodeKind::Class),
365 x if x == Kind::Interface as i32 => Some(NodeKind::Interface),
366 x if x == Kind::Trait as i32 => Some(NodeKind::Trait),
367 x if x == Kind::Enum as i32 => Some(NodeKind::Enum),
368 x if x == Kind::EnumMember as i32 => Some(NodeKind::EnumVariant),
369 x if x == Kind::Field as i32 => Some(NodeKind::Field),
370 x if x == Kind::TypeParameter as i32 => Some(NodeKind::TypeParameter),
371 x if x == Kind::Macro as i32 => Some(NodeKind::Macro),
372 x if x == Kind::Property as i32 => Some(NodeKind::Property),
373 x if x == Kind::Function as i32 || x == Kind::Constructor as i32 => {
374 Some(NodeKind::Function)
375 }
376 x if x == Kind::Method as i32 => Some(NodeKind::Method),
377 x if x == Kind::Namespace as i32 || x == Kind::Module as i32 => Some(NodeKind::Module),
378 x if x == Kind::Package as i32 => Some(NodeKind::Package),
379 x if x == Kind::TypeAlias as i32 || x == Kind::Type as i32 => Some(NodeKind::Type),
380 x if x == Kind::Constant as i32 => Some(NodeKind::Constant),
381 _ => None,
382 }
383}
384
385pub fn infer_kind_from_symbol(scip_symbol: &str) -> NodeKind {
395 let parsed = match scip::symbol::parse_symbol(scip_symbol) {
396 Ok(p) => p,
397 Err(_) => return NodeKind::Function,
398 };
399 infer_kind_from_parsed(&parsed)
400}
401
402pub fn infer_kind_from_parsed(parsed: &scip::types::Symbol) -> NodeKind {
404 let last = match parsed.descriptors.last() {
405 Some(d) => d,
406 None => return NodeKind::Function,
407 };
408 use scip::types::descriptor::Suffix;
409 match last.suffix.enum_value() {
410 Ok(Suffix::Package | Suffix::Namespace) => NodeKind::Module,
411 Ok(Suffix::Type) => NodeKind::Class,
412 Ok(Suffix::Method) => NodeKind::Method,
413 Ok(Suffix::Macro) => NodeKind::Macro,
414 Ok(Suffix::TypeParameter) => NodeKind::TypeParameter,
415 Ok(Suffix::Parameter) => NodeKind::Field,
416 Ok(Suffix::Term) => {
417 let parent = parsed.descriptors.iter().rev().nth(1);
420 match parent.and_then(|d| d.suffix.enum_value().ok()) {
421 Some(Suffix::Type) => NodeKind::Field,
422 _ => NodeKind::Function,
423 }
424 }
425 _ => NodeKind::Function, }
427}
428
429fn resolve_node_kind(kind: i32, scip_symbol: &str) -> NodeKind {
431 scip_kind_to_node_kind(kind).unwrap_or_else(|| infer_kind_from_symbol(scip_symbol))
432}
433
434pub fn is_import_ref(role_bitmask: i32) -> bool {
436 (role_bitmask & ROLE_IMPORT) != 0
437}
438
439pub fn is_read_ref(role_bitmask: i32) -> bool {
441 (role_bitmask & ROLE_READ_ACCESS) != 0
442}
443
444pub fn is_write_ref(role_bitmask: i32) -> bool {
446 (role_bitmask & ROLE_WRITE_ACCESS) != 0
447}
448
449#[cfg(test)]
450#[path = "../tests/scip_reader_tests.rs"]
451mod tests;