Skip to main content

arbor_core/
fallback_parser.rs

1//! Lightweight fallback parser for emerging language support.
2//!
3//! This parser is intentionally heuristic-based (line scanner + simple token rules)
4//! so Arbor can provide useful symbol indexing for languages that are not yet
5//! wired to a full Tree-sitter grammar in every runtime path.
6
7use crate::node::{CodeNode, NodeKind};
8
9/// Extra language extensions supported via fallback parsing.
10pub const FALLBACK_EXTENSIONS: &[&str] = &[
11    "kt", "kts",   // Kotlin
12    "swift", // Swift
13    "rb",    // Ruby
14    "php", "phtml", // PHP
15    "sh", "bash", "zsh", // Shell
16    "md", "markdown", // Markdown for knowledge graphs (Lattice)
17];
18
19pub fn is_fallback_supported_extension(ext: &str) -> bool {
20    let ext = ext.to_ascii_lowercase();
21    FALLBACK_EXTENSIONS.iter().any(|e| *e == ext)
22}
23
24pub fn parse_fallback_source(source: &str, file_path: &str, ext: &str) -> Vec<CodeNode> {
25    let ext = ext.to_ascii_lowercase();
26    let mut nodes = Vec::new();
27
28    for (idx, line) in source.lines().enumerate() {
29        let line_no = idx as u32 + 1;
30        let trimmed = line.trim_start();
31
32        let candidate = match ext.as_str() {
33            "md" | "markdown" => parse_markdown_line(trimmed),
34            "kt" | "kts" => parse_kotlin_line(trimmed),
35            "swift" => parse_swift_line(trimmed),
36            "rb" => parse_ruby_line(trimmed),
37            "php" | "phtml" => parse_php_line(trimmed),
38            "sh" | "bash" | "zsh" => parse_shell_line(trimmed),
39            _ => None,
40        };
41
42        if trimmed.is_empty() || (trimmed.starts_with('#') || trimmed.starts_with("//")) && candidate.is_none() {
43            continue;
44        }
45
46        if let Some((name, kind)) = candidate {
47            let col = (line.len().saturating_sub(trimmed.len())) as u32;
48            let node = CodeNode::new(&name, &name, kind, file_path)
49                .with_lines(line_no, line_no)
50                .with_column(col)
51                .with_signature(trimmed.to_string());
52            nodes.push(node);
53        }
54    }
55
56    nodes
57}
58
59fn parse_kotlin_line(line: &str) -> Option<(String, NodeKind)> {
60    if let Some(rest) = line.strip_prefix("fun ") {
61        return take_ident(rest).map(|name| (name, NodeKind::Function));
62    }
63
64    if let Some(rest) = line.strip_prefix("class ") {
65        return take_ident(rest).map(|name| (name, NodeKind::Class));
66    }
67
68    if let Some(rest) = line.strip_prefix("data class ") {
69        return take_ident(rest).map(|name| (name, NodeKind::Class));
70    }
71
72    if let Some(rest) = line.strip_prefix("interface ") {
73        return take_ident(rest).map(|name| (name, NodeKind::Interface));
74    }
75
76    if let Some(rest) = line.strip_prefix("object ") {
77        return take_ident(rest).map(|name| (name, NodeKind::Class));
78    }
79
80    if let Some(rest) = line.strip_prefix("enum class ") {
81        return take_ident(rest).map(|name| (name, NodeKind::Enum));
82    }
83
84    None
85}
86
87fn parse_swift_line(line: &str) -> Option<(String, NodeKind)> {
88    if let Some(rest) = line.strip_prefix("func ") {
89        return take_ident(rest).map(|name| (name, NodeKind::Function));
90    }
91
92    if let Some(rest) = line.strip_prefix("class ") {
93        return take_ident(rest).map(|name| (name, NodeKind::Class));
94    }
95
96    if let Some(rest) = line.strip_prefix("struct ") {
97        return take_ident(rest).map(|name| (name, NodeKind::Struct));
98    }
99
100    if let Some(rest) = line.strip_prefix("enum ") {
101        return take_ident(rest).map(|name| (name, NodeKind::Enum));
102    }
103
104    if let Some(rest) = line.strip_prefix("protocol ") {
105        return take_ident(rest).map(|name| (name, NodeKind::Interface));
106    }
107
108    if let Some(rest) = line.strip_prefix("extension ") {
109        return take_ident(rest).map(|name| (name, NodeKind::Module));
110    }
111
112    None
113}
114
115fn parse_ruby_line(line: &str) -> Option<(String, NodeKind)> {
116    if let Some(rest) = line.strip_prefix("def ") {
117        return take_ident(rest.trim_start_matches("self.")).map(|name| (name, NodeKind::Function));
118    }
119
120    if let Some(rest) = line.strip_prefix("class ") {
121        return take_ident(rest).map(|name| (name, NodeKind::Class));
122    }
123
124    if let Some(rest) = line.strip_prefix("module ") {
125        return take_ident(rest).map(|name| (name, NodeKind::Module));
126    }
127
128    None
129}
130
131fn parse_php_line(line: &str) -> Option<(String, NodeKind)> {
132    if let Some(rest) = line.strip_prefix("function ") {
133        return take_ident(rest).map(|name| (name, NodeKind::Function));
134    }
135
136    if let Some(rest) = line.strip_prefix("class ") {
137        return take_ident(rest).map(|name| (name, NodeKind::Class));
138    }
139
140    if let Some(rest) = line.strip_prefix("interface ") {
141        return take_ident(rest).map(|name| (name, NodeKind::Interface));
142    }
143
144    if let Some(rest) = line.strip_prefix("trait ") {
145        return take_ident(rest).map(|name| (name, NodeKind::Interface));
146    }
147
148    None
149}
150
151fn parse_shell_line(line: &str) -> Option<(String, NodeKind)> {
152    if let Some(rest) = line.strip_prefix("function ") {
153        return take_ident(rest).map(|name| (name, NodeKind::Function));
154    }
155
156    // foo() {
157    if let Some(paren_idx) = line.find("()") {
158        let name = line[..paren_idx].trim();
159        if !name.is_empty() {
160            return Some((name.to_string(), NodeKind::Function));
161        }
162    }
163
164    None
165}
166
167fn parse_markdown_line(line: &str) -> Option<(String, NodeKind)> {
168    let trimmed = line.trim_start();
169    if let Some(rest) = trimmed.strip_prefix("# ") {
170        return take_ident(rest).map(|name| (name, NodeKind::Section));
171    }
172    if let Some(rest) = trimmed.strip_prefix("## ") {
173        return take_ident(rest).map(|name| (name, NodeKind::Section));
174    }
175    if let Some(rest) = trimmed.strip_prefix("### ") {
176        return take_ident(rest).map(|name| (name, NodeKind::Section));
177    }
178    // Support ## Heading with ID or other variants
179    if trimmed.starts_with("#") && trimmed.contains(' ') {
180        let name = trimmed.split_whitespace().nth(1).unwrap_or(trimmed).trim_start_matches('#').trim();
181        if !name.is_empty() {
182            return Some((name.to_string(), NodeKind::Section));
183        }
184    }
185    None
186}
187
188fn take_ident(input: &str) -> Option<String> {
189    let mut out = String::new();
190    for ch in input.chars() {
191        if ch.is_alphanumeric() || ch == '_' || ch == '-' {
192            out.push(ch);
193        } else {
194            break;
195        }
196    }
197
198    if out.is_empty() {
199        None
200    } else {
201        Some(out)
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn fallback_supports_requested_extensions() {
211        for ext in ["kt", "swift", "rb", "php", "sh", "md"] {
212            assert!(is_fallback_supported_extension(ext));
213        }
214    }
215
216    #[test]
217    fn parses_kotlin_function() {
218        let source = "fun fetchUser(id: String): User = TODO()";
219        let nodes = parse_fallback_source(source, "sample.kt", "kt");
220        assert!(nodes.iter().any(|n| n.name == "fetchUser"));
221    }
222
223    #[test]
224    fn parses_shell_function() {
225        let source = "deploy_prod() { echo hi; }";
226        let nodes = parse_fallback_source(source, "deploy.sh", "sh");
227        assert!(nodes.iter().any(|n| n.name == "deploy_prod"));
228    }
229
230    #[test]
231    fn parses_swift_source() {
232        let source = r#"
233class UserManager {
234    func getUser() -> User {
235        return User()
236    }
237}
238
239struct Point {
240    var x: Double
241    var y: Double
242}
243
244enum Status {
245    case active
246    case inactive
247}
248"#;
249        let nodes = parse_fallback_source(source, "Users.swift", "swift");
250        assert!(nodes
251            .iter()
252            .any(|n| n.name == "UserManager" && matches!(n.kind, NodeKind::Class)));
253        assert!(nodes
254            .iter()
255            .any(|n| n.name == "getUser" && matches!(n.kind, NodeKind::Function)));
256        assert!(nodes
257            .iter()
258            .any(|n| n.name == "Point" && matches!(n.kind, NodeKind::Struct)));
259        assert!(nodes
260            .iter()
261            .any(|n| n.name == "Status" && matches!(n.kind, NodeKind::Enum)));
262    }
263
264    #[test]
265    fn parses_ruby_source() {
266        let source = r#"
267class ApplicationController
268  def index
269    render json: { status: "ok" }
270  end
271
272  def show
273    @user = User.find(params[:id])
274  end
275end
276
277module Authentication
278end
279"#;
280        let nodes = parse_fallback_source(source, "controller.rb", "rb");
281        assert!(nodes
282            .iter()
283            .any(|n| n.name == "ApplicationController" && matches!(n.kind, NodeKind::Class)));
284        assert!(nodes
285            .iter()
286            .any(|n| n.name == "index" && matches!(n.kind, NodeKind::Function)));
287        assert!(nodes
288            .iter()
289            .any(|n| n.name == "show" && matches!(n.kind, NodeKind::Function)));
290        assert!(nodes
291            .iter()
292            .any(|n| n.name == "Authentication" && matches!(n.kind, NodeKind::Module)));
293    }
294
295    #[test]
296    fn parses_php_source() {
297        let source = r#"
298class PaymentProcessor {
299    function processPayment($amount) {
300        return true;
301    }
302}
303
304interface Gateway {
305}
306
307function helper() {
308}
309"#;
310        let nodes = parse_fallback_source(source, "payment.php", "php");
311        assert!(nodes
312            .iter()
313            .any(|n| n.name == "PaymentProcessor" && matches!(n.kind, NodeKind::Class)));
314        assert!(nodes
315            .iter()
316            .any(|n| n.name == "processPayment" && matches!(n.kind, NodeKind::Function)));
317        assert!(nodes
318            .iter()
319            .any(|n| n.name == "Gateway" && matches!(n.kind, NodeKind::Interface)));
320        assert!(nodes
321            .iter()
322            .any(|n| n.name == "helper" && matches!(n.kind, NodeKind::Function)));
323    }
324
325    #[test]
326    fn fallback_ignores_comments() {
327        // Lines starting with // or # should not produce nodes
328        let source = r#"
329// class NotAClass
330# def not_a_function
331fun realFunction(x: Int): Int = x
332"#;
333        let nodes = parse_fallback_source(source, "test.kt", "kt");
334        assert!(!nodes.iter().any(|n| n.name == "NotAClass"));
335        assert!(!nodes.iter().any(|n| n.name == "not_a_function"));
336        assert!(nodes.iter().any(|n| n.name == "realFunction"));
337    }
338
339    #[test]
340    fn fallback_kotlin_class_and_data_class() {
341        let source = r#"
342class Repository {
343}
344data class UserDto(val name: String)
345object Singleton
346"#;
347        let nodes = parse_fallback_source(source, "models.kt", "kt");
348        assert!(nodes
349            .iter()
350            .any(|n| n.name == "Repository" && matches!(n.kind, NodeKind::Class)));
351        assert!(nodes
352            .iter()
353            .any(|n| n.name == "UserDto" && matches!(n.kind, NodeKind::Class)));
354        assert!(nodes
355            .iter()
356            .any(|n| n.name == "Singleton" && matches!(n.kind, NodeKind::Class)));
357    }
358
359    #[test]
360    fn fallback_empty_source_returns_empty() {
361        let nodes = parse_fallback_source("", "empty.kt", "kt");
362        assert!(nodes.is_empty());
363    }
364
365    #[test]
366    fn fallback_unsupported_extension_returns_empty() {
367        // Make sure unsupported extensions don't panic
368        assert!(!is_fallback_supported_extension("xyz"));
369        assert!(!is_fallback_supported_extension("rs"));
370    }
371
372    #[test]
373    fn parses_shell_function_keyword() {
374        let source = "function deploy_staging { echo staging; }";
375        let nodes = parse_fallback_source(source, "deploy.bash", "bash");
376        assert!(nodes.iter().any(|n| n.name == "deploy_staging"));
377    }
378}