splice 2.6.4

Span-safe refactoring kernel for 7 languages with Magellan code graph integration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
//! C/C++-specific tree-sitter parsing logic.
//!
//! This module contains tree-sitter-cpp integration for extracting
//! functions, classes, structs, namespaces, enums, and other C/C++ constructs with byte spans.

use crate::error::{Result, SpliceError};
use ropey::Rope;
use std::path::Path;

/// Represents a C/C++ symbol with its byte and line/col spans.
#[derive(Debug, Clone, PartialEq)]
pub struct CppSymbol {
    /// Local symbol name (e.g., `foo`).
    pub name: String,

    /// Symbol kind (function, class, struct, namespace, enum, method, etc.).
    pub kind: CppSymbolKind,

    /// Start byte offset.
    pub byte_start: usize,

    /// End byte offset.
    pub byte_end: usize,

    /// Start line (1-based).
    pub line_start: usize,

    /// End line (1-based).
    pub line_end: usize,

    /// Start column (0-based, in bytes).
    pub col_start: usize,

    /// End column (0-based, in bytes).
    pub col_end: usize,

    /// Function parameters (for functions and methods).
    pub parameters: Vec<String>,

    /// Namespace path (e.g., `std::vector` or `Outer::Inner`).
    pub namespace_path: String,

    /// Fully qualified name (e.g., `std::vector::iterator`).
    pub fully_qualified: String,

    /// Whether this is a template declaration.
    pub is_template: bool,
}

/// Kinds of C/C++ symbols.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CppSymbolKind {
    /// Function symbol.
    Function,
    /// Class symbol.
    Class,
    /// Struct symbol.
    Struct,
    /// Namespace symbol.
    Namespace,
    /// Enum symbol.
    Enum,
    /// Enumerator (enum value).
    Enumerator,
    /// Method symbol (function inside a class/struct).
    Method,
    /// Field/member variable.
    Field,
    /// Template function.
    TemplateFunction,
    /// Template class.
    TemplateClass,
}

impl CppSymbolKind {
    /// Convert to string for storage.
    pub fn as_str(&self) -> &'static str {
        match self {
            CppSymbolKind::Function => "function",
            CppSymbolKind::Class => "class",
            CppSymbolKind::Struct => "struct",
            CppSymbolKind::Namespace => "namespace",
            CppSymbolKind::Enum => "enum",
            CppSymbolKind::Enumerator => "enumerator",
            CppSymbolKind::Method => "method",
            CppSymbolKind::Field => "field",
            CppSymbolKind::TemplateFunction => "template_function",
            CppSymbolKind::TemplateClass => "template_class",
        }
    }
}

/// Extract symbols and spans from a C/C++ source file.
///
/// Uses tree-sitter-cpp to parse the file and extract:
/// - Functions with signatures and bodies
/// - Class definitions with methods
/// - Struct definitions
/// - Namespace definitions
/// - Enum definitions
/// - Template declarations
///
/// Returns a list of symbol entries ready for graph insertion.
pub fn extract_cpp_symbols(path: &Path, source: &[u8]) -> Result<Vec<CppSymbol>> {
    // Create tree-sitter parser for C/C++
    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&tree_sitter_cpp::language())
        .map_err(|e| SpliceError::Parse {
            file: path.to_path_buf(),
            message: format!("Failed to set C++ language: {:?}", e),
        })?;

    // Parse the source code
    let tree = parser
        .parse(source, None)
        .ok_or_else(|| SpliceError::Parse {
            file: path.to_path_buf(),
            message: "Parse failed - no tree returned".to_string(),
        })?;

    // Create Rope for line/col conversion
    let rope = Rope::from_str(std::str::from_utf8(source)?);

    // Extract symbols from the AST
    let mut symbols = Vec::new();
    extract_symbols(tree.root_node(), source, &rope, &mut symbols, "");

    Ok(symbols)
}

/// Extract symbols from AST nodes.
fn extract_symbols(
    node: tree_sitter::Node,
    source: &[u8],
    rope: &Rope,
    symbols: &mut Vec<CppSymbol>,
    namespace_path: &str,
) {
    let kind = node.kind();

    // Handle template_declaration - unwrap to get the actual declaration
    if kind == "template_declaration" {
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            match child.kind() {
                "function_definition" | "class_specifier" | "struct_specifier" => {
                    // Extract with is_template=true
                    extract_symbol_with_template(
                        child,
                        source,
                        rope,
                        symbols,
                        namespace_path,
                        true,
                    );
                }
                _ => {}
            }
        }
        // Don't recurse into children of template_declaration
        return;
    }

    // Determine symbol kind
    let symbol_kind = match kind {
        "function_definition" => Some(CppSymbolKind::Function),
        "class_specifier" => Some(CppSymbolKind::Class),
        "struct_specifier" => Some(CppSymbolKind::Struct),
        "namespace_definition" => Some(CppSymbolKind::Namespace),
        "enum_specifier" => Some(CppSymbolKind::Enum),
        "declaration" => {
            // Check if this is a function declaration (has function_declarator)
            let mut has_func_declarator = false;
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "function_declarator" {
                    has_func_declarator = true;
                    break;
                }
            }
            if has_func_declarator {
                Some(CppSymbolKind::Function)
            } else {
                None
            }
        }
        _ => None,
    };

    // Extract the symbol if found
    if let Some(kind) = symbol_kind {
        if let Some(symbol) = extract_symbol(node, source, rope, kind, namespace_path, false) {
            let name = symbol.name.clone();

            symbols.push(symbol);

            // For classes, structs, and namespaces, extract nested symbols
            if matches!(
                kind,
                CppSymbolKind::Class | CppSymbolKind::Struct | CppSymbolKind::Namespace
            ) {
                let new_namespace = if namespace_path.is_empty() {
                    name.clone()
                } else {
                    format!("{}::{}", namespace_path, name)
                };

                // Extract symbols from the body/declaration_list
                // Namespaces use "declaration_list", classes/structs use "field_declaration_list"
                let body_field = node.child_by_field_name("body");

                if let Some(body) = body_field {
                    // For namespaces, check declaration_list
                    if kind == CppSymbolKind::Namespace {
                        // body is declaration_list, iterate through declarations
                        for decl in body.children(&mut body.walk()) {
                            if decl.kind() == "declaration" {
                                // Recurse into the declaration to find definitions
                                extract_symbols(decl, source, rope, symbols, &new_namespace);
                            } else {
                                // Also handle nodes directly (for function_declarator etc)
                                extract_symbols(decl, source, rope, symbols, &new_namespace);
                            }
                        }
                    } else {
                        // For classes/structs, body IS the field_declaration_list
                        for field in body.children(&mut body.walk()) {
                            // Nested classes/structs are wrapped in field_declaration
                            match field.kind() {
                                "field_declaration" => {
                                    // Check for nested class_specifier or struct_specifier inside
                                    let mut cursor = field.walk();
                                    for nested in field.children(&mut cursor) {
                                        match nested.kind() {
                                            "class_specifier"
                                            | "struct_specifier"
                                            | "function_definition" => {
                                                extract_symbols(
                                                    nested,
                                                    source,
                                                    rope,
                                                    symbols,
                                                    &new_namespace,
                                                );
                                            }
                                            _ => {}
                                        }
                                    }
                                }
                                "class_specifier" | "struct_specifier" | "function_definition" => {
                                    extract_symbols(field, source, rope, symbols, &new_namespace);
                                }
                                _ => {}
                            }
                        }
                    }
                }

                return;
            }
        }
    }

    // Recursively process children
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        // Skip bodies of classes/structs/namespaces as we handle them above
        if matches!(
            kind,
            "class_specifier" | "struct_specifier" | "namespace_definition"
        ) && (child.kind() == "field_declaration_list" || child.kind() == "declaration_list")
        {
            continue;
        }
        // Skip function_declarator children of declarations (already handled)
        if kind == "declaration" && child.kind() == "function_declarator" {
            continue;
        }
        extract_symbols(child, source, rope, symbols, namespace_path);
    }
}

/// Extract a symbol with template flag.
fn extract_symbol_with_template(
    node: tree_sitter::Node,
    source: &[u8],
    rope: &Rope,
    symbols: &mut Vec<CppSymbol>,
    namespace_path: &str,
    is_template: bool,
) {
    let kind = match node.kind() {
        "function_definition" => CppSymbolKind::TemplateFunction,
        "class_specifier" => CppSymbolKind::TemplateClass,
        "struct_specifier" => CppSymbolKind::Struct, // Template structs use Struct kind
        _ => return,
    };

    if let Some(symbol) = extract_symbol(node, source, rope, kind, namespace_path, is_template) {
        let name = symbol.name.clone();
        symbols.push(symbol);

        // For template classes, also extract nested symbols
        if kind == CppSymbolKind::TemplateClass {
            let new_namespace = if namespace_path.is_empty() {
                name.clone()
            } else {
                format!("{}::{}", namespace_path, name)
            };

            if let Some(body) = node.child_by_field_name("body") {
                for child in body.children(&mut body.walk()) {
                    if child.kind() == "field_declaration_list" {
                        for field in child.children(&mut child.walk()) {
                            // Nested classes/structs are wrapped in field_declaration
                            match field.kind() {
                                "field_declaration" => {
                                    for nested in field.children(&mut field.walk()) {
                                        match nested.kind() {
                                            "class_specifier"
                                            | "struct_specifier"
                                            | "function_definition" => {
                                                extract_symbols(
                                                    nested,
                                                    source,
                                                    rope,
                                                    symbols,
                                                    &new_namespace,
                                                );
                                            }
                                            _ => {}
                                        }
                                    }
                                }
                                "class_specifier" | "struct_specifier" | "function_definition" => {
                                    extract_symbols(field, source, rope, symbols, &new_namespace);
                                }
                                _ => {}
                            }
                        }
                    }
                }
            }
        }
    }
}

/// Extract a single symbol from a tree-sitter node.
fn extract_symbol(
    node: tree_sitter::Node,
    source: &[u8],
    rope: &Rope,
    kind: CppSymbolKind,
    namespace_path: &str,
    is_template: bool,
) -> Option<CppSymbol> {
    let name = extract_name(node, source)?;

    // Compute byte spans
    let byte_start = node.start_byte();
    let byte_end = node.end_byte();

    // Convert bytes to line/col using ropey
    let start_char = rope.byte_to_char(byte_start);
    let end_char = rope.byte_to_char(byte_end);

    let line_start = rope.char_to_line(start_char);
    let line_end = rope.char_to_line(end_char);

    // Column is byte offset within the line (0-based)
    let line_start_byte = rope.line_to_byte(line_start);
    let line_end_byte = rope.line_to_byte(line_end);

    let col_start = byte_start - line_start_byte;
    let col_end = byte_end - line_end_byte;

    // Extract parameters for functions
    let parameters = extract_parameters(node, source);

    // Build fully qualified name
    let fully_qualified = if namespace_path.is_empty() {
        name.clone()
    } else {
        format!("{}::{}", namespace_path, name)
    };

    // Detect if this is a method (function inside a class/struct)
    // A function is a method if it's inside a namespace that starts with uppercase
    // This is a heuristic - in practice, you'd need more sophisticated tracking
    let actual_kind = if kind == CppSymbolKind::Function && !namespace_path.is_empty() {
        // Check if parent namespace starts with uppercase (likely a class)
        let first_char = namespace_path.chars().next();
        if matches!(first_char, Some(c) if c.is_uppercase()) {
            CppSymbolKind::Method
        } else {
            CppSymbolKind::Function
        }
    } else {
        kind
    };

    Some(CppSymbol {
        name,
        kind: actual_kind,
        byte_start,
        byte_end,
        line_start: line_start + 1, // Convert to 1-based
        line_end: line_end + 1,     // Convert to 1-based
        col_start,
        col_end,
        parameters,
        namespace_path: namespace_path.to_string(),
        fully_qualified,
        is_template,
    })
}

/// Extract the name from a node.
fn extract_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
    let kind = node.kind();

    match kind {
        "function_definition" | "declaration" => {
            // Find the identifier in function_declarator
            for child in node.children(&mut node.walk()) {
                if child.kind() == "function_declarator" {
                    return extract_name(child, source);
                }
            }
            None
        }
        "function_declarator" => {
            // Find the identifier
            for child in node.children(&mut node.walk()) {
                if child.kind() == "identifier" || child.kind() == "field_identifier" {
                    return child.utf8_text(source).ok().map(|s| s.to_string());
                }
            }
            None
        }
        "class_specifier" | "struct_specifier" | "enum_specifier" => {
            // Find type_identifier
            for child in node.children(&mut node.walk()) {
                if child.kind() == "type_identifier" {
                    return child.utf8_text(source).ok().map(|s| s.to_string());
                }
            }
            None
        }
        "namespace_definition" => {
            // Find the namespace name - uses namespace_identifier
            for child in node.children(&mut node.walk()) {
                if child.kind() == "namespace_identifier" {
                    return child.utf8_text(source).ok().map(|s| s.to_string());
                }
            }
            None
        }
        _ => None,
    }
}

/// Extract parameter names from a function definition.
fn extract_parameters(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
    let mut parameters = Vec::new();

    // Find the parameter_list
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if child.kind() == "function_declarator" {
            let mut param_cursor = child.walk();
            for param_child in child.children(&mut param_cursor) {
                if param_child.kind() == "parameter_list" {
                    // Extract identifiers from parameter_declaration
                    let mut list_cursor = param_child.walk();
                    for param in param_child.children(&mut list_cursor) {
                        if param.kind() == "parameter_declaration" {
                            // Find the identifier in the parameter
                            let mut decl_cursor = param.walk();
                            for decl_child in param.children(&mut decl_cursor) {
                                if decl_child.kind() == "identifier" {
                                    if let Ok(name) = decl_child.utf8_text(source) {
                                        parameters.push(name.to_string());
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    parameters
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_simple_function_basic() {
        let source = b"int foo() { return 42; }\n";
        let path = Path::new("test.cpp");
        let result = extract_cpp_symbols(path, source);
        assert!(result.is_ok());
        let symbols = result.unwrap();
        assert_eq!(symbols.len(), 1);
        assert_eq!(symbols[0].name, "foo");
        assert_eq!(symbols[0].kind, CppSymbolKind::Function);
    }
}