Skip to main content

arbor_core/
fallback_parser.rs

1//! Lightweight fallback parser for emerging language support.
2//!
3//! This parser is intentionally heuristic-based (line scanner + simple token rules)
4//! so Arbor can provide useful symbol indexing for languages that are not yet
5//! wired to a full Tree-sitter grammar in every runtime path.
6
7use crate::node::{CodeNode, NodeKind};
8
9/// Extra language extensions supported via fallback parsing.
10pub const FALLBACK_EXTENSIONS: &[&str] = &[
11    "kt", "kts",   // Kotlin
12    "swift", // Swift
13    "rb",    // Ruby
14    "php", "phtml", // PHP
15    "sh", "bash", "zsh", // Shell
16];
17
18pub fn is_fallback_supported_extension(ext: &str) -> bool {
19    let ext = ext.to_ascii_lowercase();
20    FALLBACK_EXTENSIONS.iter().any(|e| *e == ext)
21}
22
23pub fn parse_fallback_source(source: &str, file_path: &str, ext: &str) -> Vec<CodeNode> {
24    let ext = ext.to_ascii_lowercase();
25    let mut nodes = Vec::new();
26
27    for (idx, line) in source.lines().enumerate() {
28        let line_no = idx as u32 + 1;
29        let trimmed = line.trim_start();
30
31        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("//") {
32            continue;
33        }
34
35        let candidate = match ext.as_str() {
36            "kt" | "kts" => parse_kotlin_line(trimmed),
37            "swift" => parse_swift_line(trimmed),
38            "rb" => parse_ruby_line(trimmed),
39            "php" | "phtml" => parse_php_line(trimmed),
40            "sh" | "bash" | "zsh" => parse_shell_line(trimmed),
41            _ => None,
42        };
43
44        if let Some((name, kind)) = candidate {
45            let col = (line.len().saturating_sub(trimmed.len())) as u32;
46            let node = CodeNode::new(&name, &name, kind, file_path)
47                .with_lines(line_no, line_no)
48                .with_column(col)
49                .with_signature(trimmed.to_string());
50            nodes.push(node);
51        }
52    }
53
54    nodes
55}
56
57fn parse_kotlin_line(line: &str) -> Option<(String, NodeKind)> {
58    if let Some(rest) = line.strip_prefix("fun ") {
59        return take_ident(rest).map(|name| (name, NodeKind::Function));
60    }
61
62    if let Some(rest) = line.strip_prefix("class ") {
63        return take_ident(rest).map(|name| (name, NodeKind::Class));
64    }
65
66    if let Some(rest) = line.strip_prefix("interface ") {
67        return take_ident(rest).map(|name| (name, NodeKind::Interface));
68    }
69
70    if let Some(rest) = line.strip_prefix("object ") {
71        return take_ident(rest).map(|name| (name, NodeKind::Class));
72    }
73
74    if let Some(rest) = line.strip_prefix("enum class ") {
75        return take_ident(rest).map(|name| (name, NodeKind::Enum));
76    }
77
78    None
79}
80
81fn parse_swift_line(line: &str) -> Option<(String, NodeKind)> {
82    if let Some(rest) = line.strip_prefix("func ") {
83        return take_ident(rest).map(|name| (name, NodeKind::Function));
84    }
85
86    if let Some(rest) = line.strip_prefix("class ") {
87        return take_ident(rest).map(|name| (name, NodeKind::Class));
88    }
89
90    if let Some(rest) = line.strip_prefix("struct ") {
91        return take_ident(rest).map(|name| (name, NodeKind::Struct));
92    }
93
94    if let Some(rest) = line.strip_prefix("enum ") {
95        return take_ident(rest).map(|name| (name, NodeKind::Enum));
96    }
97
98    if let Some(rest) = line.strip_prefix("protocol ") {
99        return take_ident(rest).map(|name| (name, NodeKind::Interface));
100    }
101
102    if let Some(rest) = line.strip_prefix("extension ") {
103        return take_ident(rest).map(|name| (name, NodeKind::Module));
104    }
105
106    None
107}
108
109fn parse_ruby_line(line: &str) -> Option<(String, NodeKind)> {
110    if let Some(rest) = line.strip_prefix("def ") {
111        return take_ident(rest.trim_start_matches("self.")).map(|name| (name, NodeKind::Function));
112    }
113
114    if let Some(rest) = line.strip_prefix("class ") {
115        return take_ident(rest).map(|name| (name, NodeKind::Class));
116    }
117
118    if let Some(rest) = line.strip_prefix("module ") {
119        return take_ident(rest).map(|name| (name, NodeKind::Module));
120    }
121
122    None
123}
124
125fn parse_php_line(line: &str) -> Option<(String, NodeKind)> {
126    if let Some(rest) = line.strip_prefix("function ") {
127        return take_ident(rest).map(|name| (name, NodeKind::Function));
128    }
129
130    if let Some(rest) = line.strip_prefix("class ") {
131        return take_ident(rest).map(|name| (name, NodeKind::Class));
132    }
133
134    if let Some(rest) = line.strip_prefix("interface ") {
135        return take_ident(rest).map(|name| (name, NodeKind::Interface));
136    }
137
138    if let Some(rest) = line.strip_prefix("trait ") {
139        return take_ident(rest).map(|name| (name, NodeKind::Interface));
140    }
141
142    None
143}
144
145fn parse_shell_line(line: &str) -> Option<(String, NodeKind)> {
146    if let Some(rest) = line.strip_prefix("function ") {
147        return take_ident(rest).map(|name| (name, NodeKind::Function));
148    }
149
150    // foo() {
151    if let Some(paren_idx) = line.find("()") {
152        let name = line[..paren_idx].trim();
153        if !name.is_empty() {
154            return Some((name.to_string(), NodeKind::Function));
155        }
156    }
157
158    None
159}
160
161fn take_ident(input: &str) -> Option<String> {
162    let mut out = String::new();
163    for ch in input.chars() {
164        if ch.is_alphanumeric() || ch == '_' {
165            out.push(ch);
166        } else {
167            break;
168        }
169    }
170
171    if out.is_empty() {
172        None
173    } else {
174        Some(out)
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn fallback_supports_requested_extensions() {
184        for ext in ["kt", "swift", "rb", "php", "sh"] {
185            assert!(is_fallback_supported_extension(ext));
186        }
187    }
188
189    #[test]
190    fn parses_kotlin_function() {
191        let source = "fun fetchUser(id: String): User = TODO()";
192        let nodes = parse_fallback_source(source, "sample.kt", "kt");
193        assert!(nodes.iter().any(|n| n.name == "fetchUser"));
194    }
195
196    #[test]
197    fn parses_shell_function() {
198        let source = "deploy_prod() { echo hi; }";
199        let nodes = parse_fallback_source(source, "deploy.sh", "sh");
200        assert!(nodes.iter().any(|n| n.name == "deploy_prod"));
201    }
202}