Skip to main content

kdo_context/
extract.rs

1//! Tree-sitter based signature extraction.
2//!
3//! Extracts public API signatures (functions, structs, enums, traits, classes,
4//! interfaces, type aliases) WITHOUT function bodies.
5
6use kdo_core::Language;
7use serde::{Deserialize, Serialize};
8use std::path::Path;
9use tracing::debug;
10
11/// Kind of extracted signature.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
13#[serde(rename_all = "snake_case")]
14pub enum SignatureKind {
15    /// Function or method.
16    Function,
17    /// Struct or class.
18    Struct,
19    /// Enum definition.
20    Enum,
21    /// Trait or interface.
22    Trait,
23    /// Type alias.
24    TypeAlias,
25    /// Constant or static.
26    Constant,
27    /// Impl block header.
28    Impl,
29}
30
31/// A single extracted signature.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct Signature {
34    /// The kind of signature.
35    pub kind: SignatureKind,
36    /// The signature text (no body).
37    pub text: String,
38    /// Source file path.
39    pub file: String,
40    /// Line number in the source file.
41    pub line: usize,
42}
43
44/// Extract all public API signatures from a source file.
45///
46/// Uses tree-sitter for parsing; falls back to line-based extraction on error.
47pub fn extract_signatures(file_path: &Path, language: &Language) -> Vec<Signature> {
48    let content = match std::fs::read_to_string(file_path) {
49        Ok(c) => c,
50        Err(_) => return Vec::new(),
51    };
52
53    let file_str = file_path.to_string_lossy().to_string();
54
55    match language {
56        Language::Rust | Language::Anchor => extract_rust_signatures(&content, &file_str),
57        Language::TypeScript | Language::JavaScript => extract_ts_signatures(&content, &file_str),
58        Language::Python => extract_python_signatures(&content, &file_str),
59        Language::Go => extract_go_signatures(&content, &file_str),
60    }
61}
62
63fn extract_rust_signatures(source: &str, file: &str) -> Vec<Signature> {
64    let mut parser = tree_sitter::Parser::new();
65    let ts_lang = tree_sitter_rust::language();
66    if parser.set_language(&ts_lang).is_err() {
67        return fallback_rust_extract(source, file);
68    }
69
70    let tree = match parser.parse(source, None) {
71        Some(t) => t,
72        None => return fallback_rust_extract(source, file),
73    };
74
75    let mut sigs = Vec::new();
76    let root = tree.root_node();
77    let mut cursor = root.walk();
78
79    for node in root.children(&mut cursor) {
80        match node.kind() {
81            "function_item" => {
82                if let Some(sig) = extract_rust_fn_sig(source, &node, file) {
83                    sigs.push(sig);
84                }
85            }
86            "struct_item" => {
87                if let Some(sig) = extract_rust_type_sig(source, &node, file, SignatureKind::Struct)
88                {
89                    sigs.push(sig);
90                }
91            }
92            "enum_item" => {
93                if let Some(sig) = extract_rust_type_sig(source, &node, file, SignatureKind::Enum) {
94                    sigs.push(sig);
95                }
96            }
97            "trait_item" => {
98                if let Some(sig) = extract_rust_type_sig(source, &node, file, SignatureKind::Trait)
99                {
100                    sigs.push(sig);
101                }
102            }
103            "impl_item" => {
104                if let Some(sig) = extract_rust_impl_sig(source, &node, file) {
105                    sigs.push(sig);
106                }
107            }
108            "type_item" => {
109                let text = node_text(source, &node);
110                sigs.push(Signature {
111                    kind: SignatureKind::TypeAlias,
112                    text,
113                    file: file.to_string(),
114                    line: node.start_position().row + 1,
115                });
116            }
117            "const_item" | "static_item" => {
118                if is_pub(source, &node) {
119                    let text = node_text(source, &node);
120                    sigs.push(Signature {
121                        kind: SignatureKind::Constant,
122                        text,
123                        file: file.to_string(),
124                        line: node.start_position().row + 1,
125                    });
126                }
127            }
128            _ => {}
129        }
130    }
131
132    debug!(file = file, count = sigs.len(), "extracted Rust signatures");
133    sigs
134}
135
136fn extract_rust_fn_sig(
137    source: &str,
138    node: &tree_sitter::Node<'_>,
139    file: &str,
140) -> Option<Signature> {
141    // Only extract pub functions
142    if !is_pub(source, node) {
143        return None;
144    }
145
146    // Get text up to the body (block)
147    let mut sig_end = node.end_byte();
148    let mut child_cursor = node.walk();
149    for child in node.children(&mut child_cursor) {
150        if child.kind() == "block" {
151            sig_end = child.start_byte();
152            break;
153        }
154    }
155
156    let text = source[node.start_byte()..sig_end].trim().to_string();
157    Some(Signature {
158        kind: SignatureKind::Function,
159        text,
160        file: file.to_string(),
161        line: node.start_position().row + 1,
162    })
163}
164
165fn extract_rust_type_sig(
166    source: &str,
167    node: &tree_sitter::Node<'_>,
168    file: &str,
169    kind: SignatureKind,
170) -> Option<Signature> {
171    if !is_pub(source, node) {
172        return None;
173    }
174
175    // For structs/enums, get the header before the body
176    let sig_end = node.end_byte();
177    // For structs with fields, include the whole thing but truncate bodies of methods
178    let text = source[node.start_byte()..sig_end].trim().to_string();
179
180    Some(Signature {
181        kind,
182        text,
183        file: file.to_string(),
184        line: node.start_position().row + 1,
185    })
186}
187
188fn extract_rust_impl_sig(
189    source: &str,
190    node: &tree_sitter::Node<'_>,
191    file: &str,
192) -> Option<Signature> {
193    // Get just the impl header, not the body
194    let mut sig_end = node.end_byte();
195    let mut child_cursor = node.walk();
196    for child in node.children(&mut child_cursor) {
197        if child.kind() == "declaration_list" {
198            sig_end = child.start_byte();
199            break;
200        }
201    }
202
203    let text = source[node.start_byte()..sig_end].trim().to_string();
204    Some(Signature {
205        kind: SignatureKind::Impl,
206        text,
207        file: file.to_string(),
208        line: node.start_position().row + 1,
209    })
210}
211
212fn extract_ts_signatures(source: &str, file: &str) -> Vec<Signature> {
213    let mut parser = tree_sitter::Parser::new();
214    let ts_lang = tree_sitter_typescript::language_typescript();
215    if parser.set_language(&ts_lang).is_err() {
216        return fallback_ts_extract(source, file);
217    }
218
219    let tree = match parser.parse(source, None) {
220        Some(t) => t,
221        None => return fallback_ts_extract(source, file),
222    };
223
224    let mut sigs = Vec::new();
225    let root = tree.root_node();
226    let mut cursor = root.walk();
227
228    for node in root.children(&mut cursor) {
229        if node.kind() != "export_statement" {
230            continue;
231        }
232        // Look at the exported declaration
233        let mut child_cursor = node.walk();
234        for child in node.children(&mut child_cursor) {
235            match child.kind() {
236                "function_declaration" | "function_signature" => {
237                    let mut sig_end = child.end_byte();
238                    let mut gc = child.walk();
239                    for grandchild in child.children(&mut gc) {
240                        if grandchild.kind() == "statement_block" {
241                            sig_end = grandchild.start_byte();
242                            break;
243                        }
244                    }
245                    let text = format!("export {}", source[child.start_byte()..sig_end].trim());
246                    sigs.push(Signature {
247                        kind: SignatureKind::Function,
248                        text,
249                        file: file.to_string(),
250                        line: child.start_position().row + 1,
251                    });
252                }
253                "class_declaration" => {
254                    let mut sig_end = child.end_byte();
255                    let mut gc = child.walk();
256                    for grandchild in child.children(&mut gc) {
257                        if grandchild.kind() == "class_body" {
258                            sig_end = grandchild.start_byte();
259                            break;
260                        }
261                    }
262                    let text = format!("export {}", source[child.start_byte()..sig_end].trim());
263                    sigs.push(Signature {
264                        kind: SignatureKind::Struct,
265                        text,
266                        file: file.to_string(),
267                        line: child.start_position().row + 1,
268                    });
269                }
270                "interface_declaration" => {
271                    let text = format!("export {}", node_text(source, &child));
272                    sigs.push(Signature {
273                        kind: SignatureKind::Trait,
274                        text,
275                        file: file.to_string(),
276                        line: child.start_position().row + 1,
277                    });
278                }
279                "type_alias_declaration" => {
280                    let text = format!("export {}", node_text(source, &child));
281                    sigs.push(Signature {
282                        kind: SignatureKind::TypeAlias,
283                        text,
284                        file: file.to_string(),
285                        line: child.start_position().row + 1,
286                    });
287                }
288                "lexical_declaration" => {
289                    let text = format!("export {}", node_text(source, &child));
290                    sigs.push(Signature {
291                        kind: SignatureKind::Constant,
292                        text,
293                        file: file.to_string(),
294                        line: child.start_position().row + 1,
295                    });
296                }
297                _ => {}
298            }
299        }
300    }
301
302    debug!(file = file, count = sigs.len(), "extracted TS signatures");
303    sigs
304}
305
306fn extract_python_signatures(source: &str, file: &str) -> Vec<Signature> {
307    let mut parser = tree_sitter::Parser::new();
308    let py_lang = tree_sitter_python::language();
309    if parser.set_language(&py_lang).is_err() {
310        return fallback_python_extract(source, file);
311    }
312
313    let tree = match parser.parse(source, None) {
314        Some(t) => t,
315        None => return fallback_python_extract(source, file),
316    };
317
318    let mut sigs = Vec::new();
319    let root = tree.root_node();
320    let mut cursor = root.walk();
321
322    for node in root.children(&mut cursor) {
323        match node.kind() {
324            "function_definition" => {
325                // Get signature line only (def ... :)
326                let mut sig_end = node.end_byte();
327                let mut child_cursor = node.walk();
328                for child in node.children(&mut child_cursor) {
329                    if child.kind() == "block" {
330                        sig_end = child.start_byte();
331                        break;
332                    }
333                }
334                let text = source[node.start_byte()..sig_end].trim().to_string();
335                // Skip private functions (starting with _)
336                if !text.contains("def _") || text.contains("def __init__") {
337                    sigs.push(Signature {
338                        kind: SignatureKind::Function,
339                        text,
340                        file: file.to_string(),
341                        line: node.start_position().row + 1,
342                    });
343                }
344            }
345            "class_definition" => {
346                // Get class header only
347                let mut sig_end = node.end_byte();
348                let mut child_cursor = node.walk();
349                for child in node.children(&mut child_cursor) {
350                    if child.kind() == "block" {
351                        sig_end = child.start_byte();
352                        break;
353                    }
354                }
355                let text = source[node.start_byte()..sig_end].trim().to_string();
356                sigs.push(Signature {
357                    kind: SignatureKind::Struct,
358                    text,
359                    file: file.to_string(),
360                    line: node.start_position().row + 1,
361                });
362            }
363            "expression_statement" => {
364                // Top-level type-annotated assignments
365                let text = node_text(source, &node);
366                if text.contains(':') && !text.starts_with('_') {
367                    sigs.push(Signature {
368                        kind: SignatureKind::Constant,
369                        text,
370                        file: file.to_string(),
371                        line: node.start_position().row + 1,
372                    });
373                }
374            }
375            _ => {}
376        }
377    }
378
379    debug!(
380        file = file,
381        count = sigs.len(),
382        "extracted Python signatures"
383    );
384    sigs
385}
386
387/// Check if a Rust node has a `pub` visibility modifier.
388fn is_pub(source: &str, node: &tree_sitter::Node<'_>) -> bool {
389    let mut cursor = node.walk();
390    for child in node.children(&mut cursor) {
391        if child.kind() == "visibility_modifier" {
392            let text = node_text(source, &child);
393            return text.starts_with("pub");
394        }
395    }
396    false
397}
398
399/// Get the text of a tree-sitter node.
400fn node_text(source: &str, node: &tree_sitter::Node<'_>) -> String {
401    source[node.start_byte()..node.end_byte()].to_string()
402}
403
404// Fallback extractors for when tree-sitter parsing fails
405
406fn fallback_rust_extract(source: &str, file: &str) -> Vec<Signature> {
407    let mut sigs = Vec::new();
408    for (i, line) in source.lines().enumerate() {
409        let trimmed = line.trim();
410        if trimmed.starts_with("pub fn ")
411            || trimmed.starts_with("pub struct ")
412            || trimmed.starts_with("pub enum ")
413            || trimmed.starts_with("pub trait ")
414        {
415            let kind = if trimmed.starts_with("pub fn") {
416                SignatureKind::Function
417            } else if trimmed.starts_with("pub struct") {
418                SignatureKind::Struct
419            } else if trimmed.starts_with("pub enum") {
420                SignatureKind::Enum
421            } else {
422                SignatureKind::Trait
423            };
424            sigs.push(Signature {
425                kind,
426                text: trimmed.trim_end_matches('{').trim().to_string(),
427                file: file.to_string(),
428                line: i + 1,
429            });
430        }
431    }
432    sigs
433}
434
435fn fallback_ts_extract(source: &str, file: &str) -> Vec<Signature> {
436    let mut sigs = Vec::new();
437    for (i, line) in source.lines().enumerate() {
438        let trimmed = line.trim();
439        if trimmed.starts_with("export function ")
440            || trimmed.starts_with("export class ")
441            || trimmed.starts_with("export interface ")
442            || trimmed.starts_with("export type ")
443            || trimmed.starts_with("export const ")
444        {
445            sigs.push(Signature {
446                kind: SignatureKind::Function,
447                text: trimmed.trim_end_matches('{').trim().to_string(),
448                file: file.to_string(),
449                line: i + 1,
450            });
451        }
452    }
453    sigs
454}
455
456fn fallback_python_extract(source: &str, file: &str) -> Vec<Signature> {
457    let mut sigs = Vec::new();
458    for (i, line) in source.lines().enumerate() {
459        let trimmed = line.trim();
460        if (trimmed.starts_with("def ") || trimmed.starts_with("class "))
461            && !trimmed.starts_with("def _")
462        {
463            let kind = if trimmed.starts_with("def ") {
464                SignatureKind::Function
465            } else {
466                SignatureKind::Struct
467            };
468            sigs.push(Signature {
469                kind,
470                text: trimmed.trim_end_matches(':').trim().to_string(),
471                file: file.to_string(),
472                line: i + 1,
473            });
474        }
475    }
476    sigs
477}
478
479/// Go signature extractor — line-based (no Go tree-sitter grammar bundled).
480///
481/// Extracts exported functions (`func Foo`), types (`type Foo`), and interfaces.
482fn extract_go_signatures(source: &str, file: &str) -> Vec<Signature> {
483    let mut sigs = Vec::new();
484    for (i, line) in source.lines().enumerate() {
485        let trimmed = line.trim();
486        // Exported function: "func Foo(" or "func (r Receiver) Foo("
487        if trimmed.starts_with("func ") {
488            // Exported if the function name starts with uppercase
489            let is_exported = trimmed
490                .trim_start_matches("func ")
491                .trim_start_matches('(') // skip receiver
492                .chars()
493                .next()
494                .map(|c| c.is_uppercase())
495                .unwrap_or(false)
496                // Also check after closing paren of receiver
497                || {
498                    if let Some(close) = trimmed.find(')') {
499                        trimmed[close..]
500                            .trim_start_matches(')')
501                            .trim()
502                            .chars()
503                            .next()
504                            .map(|c| c.is_uppercase())
505                            .unwrap_or(false)
506                    } else {
507                        false
508                    }
509                };
510            if is_exported {
511                // Signature up to opening `{`
512                let sig = trimmed.trim_end_matches('{').trim().to_string();
513                sigs.push(Signature {
514                    kind: SignatureKind::Function,
515                    text: sig,
516                    file: file.to_string(),
517                    line: i + 1,
518                });
519            }
520        } else if trimmed.starts_with("type ") {
521            // Exported types and interfaces
522            let rest = trimmed.trim_start_matches("type ").trim();
523            let first_char = rest.chars().next().unwrap_or(' ');
524            if first_char.is_uppercase() {
525                let kind = if rest.contains("interface") {
526                    SignatureKind::Trait
527                } else if rest.contains("struct") {
528                    SignatureKind::Struct
529                } else {
530                    SignatureKind::Constant
531                };
532                sigs.push(Signature {
533                    kind,
534                    text: trimmed.trim_end_matches('{').trim().to_string(),
535                    file: file.to_string(),
536                    line: i + 1,
537                });
538            }
539        }
540    }
541    sigs
542}
543
544#[cfg(test)]
545mod tests {
546    use super::*;
547
548    #[test]
549    fn test_rust_extraction() {
550        let source = r#"
551pub fn hello(name: &str) -> String {
552    format!("hello {name}")
553}
554
555fn private_fn() {}
556
557pub struct Foo {
558    pub bar: u32,
559}
560
561pub enum Color {
562    Red,
563    Green,
564    Blue,
565}
566"#;
567        let sigs = extract_rust_signatures(source, "test.rs");
568        assert!(sigs.iter().any(|s| s.text.contains("pub fn hello")));
569        assert!(!sigs.iter().any(|s| s.text.contains("private_fn")));
570        assert!(sigs.iter().any(|s| s.text.contains("pub struct Foo")));
571    }
572
573    #[test]
574    fn test_python_extraction() {
575        let source = r#"
576def hello(name: str) -> str:
577    return f"hello {name}"
578
579def _private():
580    pass
581
582class Greeter:
583    def __init__(self):
584        pass
585"#;
586        let sigs = extract_python_signatures(source, "test.py");
587        assert!(sigs.iter().any(|s| s.text.contains("def hello")));
588        assert!(!sigs.iter().any(|s| s.text == "def _private():"));
589        assert!(sigs.iter().any(|s| s.text.contains("class Greeter")));
590    }
591}