codemem_engine/index/scip/
mod.rs1pub mod graph_builder;
7pub mod orchestrate;
8
9use codemem_core::NodeKind;
10use protobuf::Message;
11use scip::types::Index;
12
13#[derive(Debug, Clone)]
15pub struct ScipDefinition {
16 pub scip_symbol: String,
18 pub qualified_name: String,
20 pub file_path: String,
22 pub line_start: u32,
24 pub line_end: u32,
26 pub col_start: u32,
28 pub col_end: u32,
30 pub kind: NodeKind,
32 pub documentation: Vec<String>,
34 pub relationships: Vec<ScipRelationship>,
36 pub is_test: bool,
38 pub is_generated: bool,
40}
41
42#[derive(Debug, Clone)]
44pub struct ScipReference {
45 pub scip_symbol: String,
47 pub file_path: String,
49 pub line: u32,
51 pub col_start: u32,
53 pub col_end: u32,
55 pub role_bitmask: i32,
57}
58
59#[derive(Debug, Clone)]
61pub struct ScipExternal {
62 pub scip_symbol: String,
64 pub package_manager: String,
66 pub package_name: String,
68 pub package_version: String,
70 pub kind: NodeKind,
72 pub documentation: Vec<String>,
74}
75
76#[derive(Debug, Clone)]
78pub struct ScipRelationship {
79 pub target_symbol: String,
81 pub is_implementation: bool,
82 pub is_type_definition: bool,
83 pub is_reference: bool,
84 pub is_definition: bool,
85}
86
87#[derive(Debug, Clone)]
89pub struct ScipReadResult {
90 pub project_root: String,
92 pub definitions: Vec<ScipDefinition>,
94 pub references: Vec<ScipReference>,
96 pub externals: Vec<ScipExternal>,
98 pub covered_files: Vec<String>,
100}
101
102const ROLE_DEFINITION: i32 = 0x1;
104pub(crate) const ROLE_IMPORT: i32 = 0x2;
105pub(crate) const ROLE_WRITE_ACCESS: i32 = 0x4;
106pub(crate) const ROLE_READ_ACCESS: i32 = 0x8;
107const ROLE_TEST: i32 = 0x20;
108const ROLE_GENERATED: i32 = 0x10;
109
110pub fn parse_scip_bytes(bytes: &[u8]) -> Result<ScipReadResult, String> {
112 let index = Index::parse_from_bytes(bytes)
113 .map_err(|e| format!("Failed to parse SCIP protobuf: {e}"))?;
114
115 let project_root = index
116 .metadata
117 .as_ref()
118 .map(|m| m.project_root.clone())
119 .unwrap_or_default();
120
121 let mut definitions = Vec::new();
122 let mut references = Vec::new();
123 let mut covered_files = Vec::new();
124
125 for doc in &index.documents {
126 let file_path = &doc.relative_path;
127 let language = &doc.language;
128 covered_files.push(file_path.clone());
129
130 let lang_sep = detect_language_separator(language);
131
132 let mut sym_info_map = std::collections::HashMap::new();
134 for sym_info in &doc.symbols {
135 if !sym_info.symbol.is_empty() {
136 sym_info_map.insert(sym_info.symbol.as_str(), sym_info);
137 }
138 }
139
140 for occ in &doc.occurrences {
141 if occ.symbol.is_empty() || scip::symbol::is_local_symbol(&occ.symbol) {
142 continue;
143 }
144
145 let (start_line, start_col, end_line, end_col) = match parse_range(&occ.range) {
146 Some(r) => r,
147 None => continue,
148 };
149
150 let roles = occ.symbol_roles;
151 let is_def = (roles & ROLE_DEFINITION) != 0;
152 let is_test = (roles & ROLE_TEST) != 0;
153 let is_generated = (roles & ROLE_GENERATED) != 0;
154
155 if is_def {
156 if let Some(info) = sym_info_map.get(occ.symbol.as_str()) {
160 if is_noise_kind(info.kind.value()) {
161 continue;
162 }
163 }
164
165 let qualified_name = match scip_symbol_to_qualified_name(&occ.symbol, lang_sep) {
166 Some(q) => q,
167 None => continue,
168 };
169
170 let (kind, documentation, relationships) =
172 if let Some(info) = sym_info_map.get(occ.symbol.as_str()) {
173 let kind = resolve_node_kind(info.kind.value(), &occ.symbol);
174 let docs: Vec<String> =
175 info.documentation.iter().map(|s| s.to_string()).collect();
176 let rels: Vec<ScipRelationship> = info
177 .relationships
178 .iter()
179 .map(|r| ScipRelationship {
180 target_symbol: r.symbol.clone(),
181 is_implementation: r.is_implementation,
182 is_type_definition: r.is_type_definition,
183 is_reference: r.is_reference,
184 is_definition: r.is_definition,
185 })
186 .collect();
187 (kind, docs, rels)
188 } else {
189 (infer_kind_from_symbol(&occ.symbol), Vec::new(), Vec::new())
190 };
191
192 definitions.push(ScipDefinition {
193 scip_symbol: occ.symbol.clone(),
194 qualified_name,
195 file_path: file_path.clone(),
196 line_start: start_line,
197 line_end: end_line,
198 col_start: start_col,
199 col_end: end_col,
200 kind,
201 documentation,
202 relationships,
203 is_test,
204 is_generated,
205 });
206 } else {
207 references.push(ScipReference {
208 scip_symbol: occ.symbol.clone(),
209 file_path: file_path.clone(),
210 line: start_line,
211 col_start: start_col,
212 col_end: end_col,
213 role_bitmask: roles,
214 });
215 }
216 }
217 }
218
219 let externals = index
221 .external_symbols
222 .iter()
223 .filter(|ext| !ext.symbol.is_empty() && !scip::symbol::is_local_symbol(&ext.symbol))
224 .filter_map(|ext| {
225 let parsed = scip::symbol::parse_symbol(&ext.symbol).ok()?;
226 let package = parsed.package.as_ref()?;
227 let kind = resolve_node_kind(ext.kind.value(), &ext.symbol);
228 let documentation: Vec<String> =
229 ext.documentation.iter().map(|s| s.to_string()).collect();
230
231 Some(ScipExternal {
232 scip_symbol: ext.symbol.clone(),
233 package_manager: package.manager.clone(),
234 package_name: package.name.clone(),
235 package_version: package.version.clone(),
236 kind,
237 documentation,
238 })
239 })
240 .collect();
241
242 infer_definition_extents(&mut definitions);
247
248 Ok(ScipReadResult {
249 project_root,
250 definitions,
251 references,
252 externals,
253 covered_files,
254 })
255}
256
257fn infer_definition_extents(definitions: &mut [ScipDefinition]) {
263 use std::collections::HashMap;
264
265 let mut by_file: HashMap<String, Vec<usize>> = HashMap::new();
267 for (i, def) in definitions.iter().enumerate() {
268 by_file.entry(def.file_path.clone()).or_default().push(i);
269 }
270
271 for indices in by_file.values() {
272 let mut sorted: Vec<usize> = indices.clone();
274 sorted.sort_by_key(|&i| definitions[i].line_start);
275
276 let depths: Vec<usize> = sorted
278 .iter()
279 .map(|&i| descriptor_depth(&definitions[i].scip_symbol))
280 .collect();
281
282 for pos in 0..sorted.len() {
283 let idx = sorted[pos];
284 if definitions[idx].line_end > definitions[idx].line_start {
286 continue;
287 }
288
289 let my_depth = depths[pos];
290
291 let mut end_line = u32::MAX;
297 for next_pos in pos + 1..sorted.len() {
298 if depths[next_pos] <= my_depth {
299 end_line = definitions[sorted[next_pos]].line_start.saturating_sub(1);
300 break;
301 }
302 }
303
304 definitions[idx].line_end = end_line;
305 }
306 }
307}
308
309fn descriptor_depth(scip_symbol: &str) -> usize {
311 scip::symbol::parse_symbol(scip_symbol)
312 .map(|p| p.descriptors.len())
313 .unwrap_or(0)
314}
315
316fn parse_range(range: &[i32]) -> Option<(u32, u32, u32, u32)> {
321 match range.len() {
322 3 => Some((
323 range[0].try_into().ok()?,
324 range[1].try_into().ok()?,
325 range[0].try_into().ok()?,
326 range[2].try_into().ok()?,
327 )),
328 4 => Some((
329 range[0].try_into().ok()?,
330 range[1].try_into().ok()?,
331 range[2].try_into().ok()?,
332 range[3].try_into().ok()?,
333 )),
334 _ => None,
335 }
336}
337
338pub fn scip_symbol_to_qualified_name(scip_symbol: &str, lang_separator: &str) -> Option<String> {
343 let parsed = scip::symbol::parse_symbol(scip_symbol).ok()?;
344 let parts: Vec<&str> = parsed
345 .descriptors
346 .iter()
347 .map(|d| d.name.as_str())
348 .filter(|s| !s.is_empty())
349 .collect();
350 if parts.is_empty() {
351 return None;
352 }
353 Some(parts.join(lang_separator))
354}
355
356pub fn detect_language_separator(language: &str) -> &'static str {
360 match language.to_lowercase().as_str() {
361 "rust" | "cpp" | "c++" => "::",
362 _ => ".",
363 }
364}
365
366fn scip_kind_to_node_kind(kind: i32) -> Option<NodeKind> {
371 use scip::types::symbol_information::Kind;
372 match kind {
373 x if x == Kind::Class as i32 || x == Kind::Struct as i32 => Some(NodeKind::Class),
374 x if x == Kind::Interface as i32 || x == Kind::Protocol as i32 => Some(NodeKind::Interface),
375 x if x == Kind::Trait as i32 => Some(NodeKind::Trait),
376 x if x == Kind::Enum as i32 => Some(NodeKind::Enum),
377 x if x == Kind::EnumMember as i32 => Some(NodeKind::EnumVariant),
378 x if x == Kind::Field as i32
379 || x == Kind::StaticField as i32
380 || x == Kind::StaticDataMember as i32 =>
381 {
382 Some(NodeKind::Field)
383 }
384 x if x == Kind::Property as i32 || x == Kind::StaticProperty as i32 => {
385 Some(NodeKind::Property)
386 }
387 x if x == Kind::TypeParameter as i32 => Some(NodeKind::TypeParameter),
388 x if x == Kind::Macro as i32 => Some(NodeKind::Macro),
389 x if x == Kind::Function as i32 || x == Kind::Constructor as i32 => {
390 Some(NodeKind::Function)
391 }
392 x if x == Kind::Method as i32
393 || x == Kind::StaticMethod as i32
394 || x == Kind::AbstractMethod as i32
395 || x == Kind::TraitMethod as i32
396 || x == Kind::ProtocolMethod as i32
397 || x == Kind::PureVirtualMethod as i32
398 || x == Kind::MethodSpecification as i32
399 || x == Kind::Getter as i32
400 || x == Kind::Setter as i32
401 || x == Kind::Accessor as i32 =>
402 {
403 Some(NodeKind::Method)
404 }
405 x if x == Kind::Namespace as i32
406 || x == Kind::Module as i32
407 || x == Kind::PackageObject as i32 =>
408 {
409 Some(NodeKind::Module)
410 }
411 x if x == Kind::Package as i32 || x == Kind::Library as i32 => Some(NodeKind::Package),
412 x if x == Kind::TypeAlias as i32
413 || x == Kind::Type as i32
414 || x == Kind::AssociatedType as i32 =>
415 {
416 Some(NodeKind::Type)
417 }
418 x if x == Kind::Constant as i32 || x == Kind::StaticVariable as i32 => {
419 Some(NodeKind::Constant)
420 }
421 _ => None,
422 }
423}
424
425pub fn is_noise_kind(kind: i32) -> bool {
432 use scip::types::symbol_information::Kind;
433 matches!(kind,
434 x if x == Kind::Variable as i32
435 || x == Kind::Parameter as i32
436 || x == Kind::SelfParameter as i32
437 || x == Kind::ThisParameter as i32
438 || x == Kind::ParameterLabel as i32
439 || x == Kind::TypeParameter as i32
440 || x == Kind::Boolean as i32
442 || x == Kind::Number as i32
443 || x == Kind::String as i32
444 || x == Kind::Null as i32
445 || x == Kind::Array as i32
446 || x == Kind::Object as i32
447 || x == Kind::Key as i32
448 || x == Kind::Pattern as i32
449 || x == Kind::MethodReceiver as i32
451 || x == Kind::Error as i32
452 )
453}
454
455pub fn infer_kind_from_symbol(scip_symbol: &str) -> NodeKind {
465 let parsed = match scip::symbol::parse_symbol(scip_symbol) {
466 Ok(p) => p,
467 Err(_) => return NodeKind::Function,
468 };
469 infer_kind_from_parsed(&parsed)
470}
471
472pub fn infer_kind_from_parsed(parsed: &scip::types::Symbol) -> NodeKind {
474 let last = match parsed.descriptors.last() {
475 Some(d) => d,
476 None => return NodeKind::Function,
477 };
478 use scip::types::descriptor::Suffix;
479 match last.suffix.enum_value() {
480 Ok(Suffix::Package | Suffix::Namespace) => NodeKind::Module,
481 Ok(Suffix::Type) => NodeKind::Class,
482 Ok(Suffix::Method) => NodeKind::Method,
483 Ok(Suffix::Macro) => NodeKind::Macro,
484 Ok(Suffix::TypeParameter) => NodeKind::TypeParameter,
485 Ok(Suffix::Parameter) => NodeKind::Field,
486 Ok(Suffix::Term) => {
487 let parent = parsed.descriptors.iter().rev().nth(1);
490 match parent.and_then(|d| d.suffix.enum_value().ok()) {
491 Some(Suffix::Type) => NodeKind::Field,
492 _ => {
493 if is_constant_name(&last.name) {
497 NodeKind::Constant
498 } else {
499 NodeKind::Function
500 }
501 }
502 }
503 }
504 _ => NodeKind::Function, }
506}
507
508fn is_constant_name(name: &str) -> bool {
512 !name.is_empty()
513 && name
514 .chars()
515 .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
516 && name.chars().any(|c| c.is_ascii_uppercase())
517}
518
519fn resolve_node_kind(kind: i32, scip_symbol: &str) -> NodeKind {
521 scip_kind_to_node_kind(kind).unwrap_or_else(|| infer_kind_from_symbol(scip_symbol))
522}
523
524pub fn is_import_ref(role_bitmask: i32) -> bool {
526 (role_bitmask & ROLE_IMPORT) != 0
527}
528
529pub fn is_read_ref(role_bitmask: i32) -> bool {
531 (role_bitmask & ROLE_READ_ACCESS) != 0
532}
533
534pub fn is_write_ref(role_bitmask: i32) -> bool {
536 (role_bitmask & ROLE_WRITE_ACCESS) != 0
537}
538
539#[cfg(test)]
540#[path = "../tests/scip_reader_tests.rs"]
541mod tests;