Skip to main content

arbor_core/
fallback_parser.rs

1//! Lightweight fallback parser for emerging language support.
2//!
3//! This parser is intentionally heuristic-based (line scanner + simple token rules)
4//! so Arbor can provide useful symbol indexing for languages that are not yet
5//! wired to a full Tree-sitter grammar in every runtime path.
6
7use crate::node::{CodeNode, NodeKind};
8
9/// Extra language extensions supported via fallback parsing.
10pub const FALLBACK_EXTENSIONS: &[&str] = &[
11    "kt", "kts",   // Kotlin
12    "swift", // Swift
13    "rb",    // Ruby
14    "php", "phtml", // PHP
15    "sh", "bash", "zsh", // Shell
16    "md", "markdown", // Markdown for knowledge graphs (Lattice)
17];
18
19pub fn is_fallback_supported_extension(ext: &str) -> bool {
20    let ext = ext.to_ascii_lowercase();
21    FALLBACK_EXTENSIONS.iter().any(|e| *e == ext)
22}
23
24pub fn parse_fallback_source(source: &str, file_path: &str, ext: &str) -> Vec<CodeNode> {
25    let ext = ext.to_ascii_lowercase();
26    let mut nodes = Vec::new();
27
28    for (idx, line) in source.lines().enumerate() {
29        let line_no = idx as u32 + 1;
30        let trimmed = line.trim_start();
31
32        let candidate = match ext.as_str() {
33            "md" | "markdown" => parse_markdown_line(trimmed),
34            "kt" | "kts" => parse_kotlin_line(trimmed),
35            "swift" => parse_swift_line(trimmed),
36            "rb" => parse_ruby_line(trimmed),
37            "php" | "phtml" => parse_php_line(trimmed),
38            "sh" | "bash" | "zsh" => parse_shell_line(trimmed),
39            _ => None,
40        };
41
42        if trimmed.is_empty()
43            || (trimmed.starts_with('#') || trimmed.starts_with("//")) && candidate.is_none()
44        {
45            continue;
46        }
47
48        if let Some((name, kind)) = candidate {
49            let col = (line.len().saturating_sub(trimmed.len())) as u32;
50            let node = CodeNode::new(&name, &name, kind, file_path)
51                .with_lines(line_no, line_no)
52                .with_column(col)
53                .with_signature(trimmed.to_string());
54            nodes.push(node);
55        }
56    }
57
58    nodes
59}
60
61fn parse_kotlin_line(line: &str) -> Option<(String, NodeKind)> {
62    if let Some(rest) = line.strip_prefix("fun ") {
63        return take_ident(rest).map(|name| (name, NodeKind::Function));
64    }
65
66    if let Some(rest) = line.strip_prefix("class ") {
67        return take_ident(rest).map(|name| (name, NodeKind::Class));
68    }
69
70    if let Some(rest) = line.strip_prefix("data class ") {
71        return take_ident(rest).map(|name| (name, NodeKind::Class));
72    }
73
74    if let Some(rest) = line.strip_prefix("interface ") {
75        return take_ident(rest).map(|name| (name, NodeKind::Interface));
76    }
77
78    if let Some(rest) = line.strip_prefix("object ") {
79        return take_ident(rest).map(|name| (name, NodeKind::Class));
80    }
81
82    if let Some(rest) = line.strip_prefix("enum class ") {
83        return take_ident(rest).map(|name| (name, NodeKind::Enum));
84    }
85
86    None
87}
88
89fn parse_swift_line(line: &str) -> Option<(String, NodeKind)> {
90    if let Some(rest) = line.strip_prefix("func ") {
91        return take_ident(rest).map(|name| (name, NodeKind::Function));
92    }
93
94    if let Some(rest) = line.strip_prefix("class ") {
95        return take_ident(rest).map(|name| (name, NodeKind::Class));
96    }
97
98    if let Some(rest) = line.strip_prefix("struct ") {
99        return take_ident(rest).map(|name| (name, NodeKind::Struct));
100    }
101
102    if let Some(rest) = line.strip_prefix("enum ") {
103        return take_ident(rest).map(|name| (name, NodeKind::Enum));
104    }
105
106    if let Some(rest) = line.strip_prefix("protocol ") {
107        return take_ident(rest).map(|name| (name, NodeKind::Interface));
108    }
109
110    if let Some(rest) = line.strip_prefix("extension ") {
111        return take_ident(rest).map(|name| (name, NodeKind::Module));
112    }
113
114    None
115}
116
117fn parse_ruby_line(line: &str) -> Option<(String, NodeKind)> {
118    if let Some(rest) = line.strip_prefix("def ") {
119        return take_ident(rest.trim_start_matches("self.")).map(|name| (name, NodeKind::Function));
120    }
121
122    if let Some(rest) = line.strip_prefix("class ") {
123        return take_ident(rest).map(|name| (name, NodeKind::Class));
124    }
125
126    if let Some(rest) = line.strip_prefix("module ") {
127        return take_ident(rest).map(|name| (name, NodeKind::Module));
128    }
129
130    None
131}
132
133fn parse_php_line(line: &str) -> Option<(String, NodeKind)> {
134    if let Some(rest) = line.strip_prefix("function ") {
135        return take_ident(rest).map(|name| (name, NodeKind::Function));
136    }
137
138    if let Some(rest) = line.strip_prefix("class ") {
139        return take_ident(rest).map(|name| (name, NodeKind::Class));
140    }
141
142    if let Some(rest) = line.strip_prefix("interface ") {
143        return take_ident(rest).map(|name| (name, NodeKind::Interface));
144    }
145
146    if let Some(rest) = line.strip_prefix("trait ") {
147        return take_ident(rest).map(|name| (name, NodeKind::Interface));
148    }
149
150    None
151}
152
153fn parse_shell_line(line: &str) -> Option<(String, NodeKind)> {
154    if let Some(rest) = line.strip_prefix("function ") {
155        return take_ident(rest).map(|name| (name, NodeKind::Function));
156    }
157
158    // foo() {
159    if let Some(paren_idx) = line.find("()") {
160        let name = line[..paren_idx].trim();
161        if !name.is_empty() {
162            return Some((name.to_string(), NodeKind::Function));
163        }
164    }
165
166    None
167}
168
169fn parse_markdown_line(line: &str) -> Option<(String, NodeKind)> {
170    let trimmed = line.trim_start();
171    if let Some(rest) = trimmed.strip_prefix("# ") {
172        return take_ident(rest).map(|name| (name, NodeKind::Section));
173    }
174    if let Some(rest) = trimmed.strip_prefix("## ") {
175        return take_ident(rest).map(|name| (name, NodeKind::Section));
176    }
177    if let Some(rest) = trimmed.strip_prefix("### ") {
178        return take_ident(rest).map(|name| (name, NodeKind::Section));
179    }
180    // Support ## Heading with ID or other variants
181    if trimmed.starts_with("#") && trimmed.contains(' ') {
182        let name = trimmed
183            .split_whitespace()
184            .nth(1)
185            .unwrap_or(trimmed)
186            .trim_start_matches('#')
187            .trim();
188        if !name.is_empty() {
189            return Some((name.to_string(), NodeKind::Section));
190        }
191    }
192    None
193}
194
195fn take_ident(input: &str) -> Option<String> {
196    let mut out = String::new();
197    for ch in input.chars() {
198        if ch.is_alphanumeric() || ch == '_' || ch == '-' {
199            out.push(ch);
200        } else {
201            break;
202        }
203    }
204
205    if out.is_empty() {
206        None
207    } else {
208        Some(out)
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn fallback_supports_requested_extensions() {
218        for ext in ["kt", "swift", "rb", "php", "sh", "md"] {
219            assert!(is_fallback_supported_extension(ext));
220        }
221    }
222
223    #[test]
224    fn parses_kotlin_function() {
225        let source = "fun fetchUser(id: String): User = TODO()";
226        let nodes = parse_fallback_source(source, "sample.kt", "kt");
227        assert!(nodes.iter().any(|n| n.name == "fetchUser"));
228    }
229
230    #[test]
231    fn parses_shell_function() {
232        let source = "deploy_prod() { echo hi; }";
233        let nodes = parse_fallback_source(source, "deploy.sh", "sh");
234        assert!(nodes.iter().any(|n| n.name == "deploy_prod"));
235    }
236
237    #[test]
238    fn parses_swift_source() {
239        let source = r#"
240class UserManager {
241    func getUser() -> User {
242        return User()
243    }
244}
245
246struct Point {
247    var x: Double
248    var y: Double
249}
250
251enum Status {
252    case active
253    case inactive
254}
255"#;
256        let nodes = parse_fallback_source(source, "Users.swift", "swift");
257        assert!(nodes
258            .iter()
259            .any(|n| n.name == "UserManager" && matches!(n.kind, NodeKind::Class)));
260        assert!(nodes
261            .iter()
262            .any(|n| n.name == "getUser" && matches!(n.kind, NodeKind::Function)));
263        assert!(nodes
264            .iter()
265            .any(|n| n.name == "Point" && matches!(n.kind, NodeKind::Struct)));
266        assert!(nodes
267            .iter()
268            .any(|n| n.name == "Status" && matches!(n.kind, NodeKind::Enum)));
269    }
270
271    #[test]
272    fn parses_ruby_source() {
273        let source = r#"
274class ApplicationController
275  def index
276    render json: { status: "ok" }
277  end
278
279  def show
280    @user = User.find(params[:id])
281  end
282end
283
284module Authentication
285end
286"#;
287        let nodes = parse_fallback_source(source, "controller.rb", "rb");
288        assert!(nodes
289            .iter()
290            .any(|n| n.name == "ApplicationController" && matches!(n.kind, NodeKind::Class)));
291        assert!(nodes
292            .iter()
293            .any(|n| n.name == "index" && matches!(n.kind, NodeKind::Function)));
294        assert!(nodes
295            .iter()
296            .any(|n| n.name == "show" && matches!(n.kind, NodeKind::Function)));
297        assert!(nodes
298            .iter()
299            .any(|n| n.name == "Authentication" && matches!(n.kind, NodeKind::Module)));
300    }
301
302    #[test]
303    fn parses_php_source() {
304        let source = r#"
305class PaymentProcessor {
306    function processPayment($amount) {
307        return true;
308    }
309}
310
311interface Gateway {
312}
313
314function helper() {
315}
316"#;
317        let nodes = parse_fallback_source(source, "payment.php", "php");
318        assert!(nodes
319            .iter()
320            .any(|n| n.name == "PaymentProcessor" && matches!(n.kind, NodeKind::Class)));
321        assert!(nodes
322            .iter()
323            .any(|n| n.name == "processPayment" && matches!(n.kind, NodeKind::Function)));
324        assert!(nodes
325            .iter()
326            .any(|n| n.name == "Gateway" && matches!(n.kind, NodeKind::Interface)));
327        assert!(nodes
328            .iter()
329            .any(|n| n.name == "helper" && matches!(n.kind, NodeKind::Function)));
330    }
331
332    #[test]
333    fn fallback_ignores_comments() {
334        // Lines starting with // or # should not produce nodes
335        let source = r#"
336// class NotAClass
337# def not_a_function
338fun realFunction(x: Int): Int = x
339"#;
340        let nodes = parse_fallback_source(source, "test.kt", "kt");
341        assert!(!nodes.iter().any(|n| n.name == "NotAClass"));
342        assert!(!nodes.iter().any(|n| n.name == "not_a_function"));
343        assert!(nodes.iter().any(|n| n.name == "realFunction"));
344    }
345
346    #[test]
347    fn fallback_kotlin_class_and_data_class() {
348        let source = r#"
349class Repository {
350}
351data class UserDto(val name: String)
352object Singleton
353"#;
354        let nodes = parse_fallback_source(source, "models.kt", "kt");
355        assert!(nodes
356            .iter()
357            .any(|n| n.name == "Repository" && matches!(n.kind, NodeKind::Class)));
358        assert!(nodes
359            .iter()
360            .any(|n| n.name == "UserDto" && matches!(n.kind, NodeKind::Class)));
361        assert!(nodes
362            .iter()
363            .any(|n| n.name == "Singleton" && matches!(n.kind, NodeKind::Class)));
364    }
365
366    #[test]
367    fn fallback_empty_source_returns_empty() {
368        let nodes = parse_fallback_source("", "empty.kt", "kt");
369        assert!(nodes.is_empty());
370    }
371
372    #[test]
373    fn fallback_unsupported_extension_returns_empty() {
374        // Make sure unsupported extensions don't panic
375        assert!(!is_fallback_supported_extension("xyz"));
376        assert!(!is_fallback_supported_extension("rs"));
377    }
378
379    #[test]
380    fn parses_shell_function_keyword() {
381        let source = "function deploy_staging { echo staging; }";
382        let nodes = parse_fallback_source(source, "deploy.bash", "bash");
383        assert!(nodes.iter().any(|n| n.name == "deploy_staging"));
384    }
385}