Skip to main content

sem_core/parser/plugins/
erb.rs

1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use crate::model::entity::{build_entity_id, SemanticEntity};
5use crate::parser::plugin::SemanticParserPlugin;
6use crate::utils::hash::content_hash;
7
8thread_local! {
9    static ERB_PARSER: RefCell<tree_sitter::Parser> = RefCell::new({
10        let mut p = tree_sitter::Parser::new();
11        let lang: tree_sitter::Language = tree_sitter_embedded_template::LANGUAGE.into();
12        let _ = p.set_language(&lang);
13        p
14    });
15}
16
17pub struct ErbParserPlugin;
18
19impl SemanticParserPlugin for ErbParserPlugin {
20    fn id(&self) -> &str {
21        "erb"
22    }
23
24    fn extensions(&self) -> &[&str] {
25        &[".erb"]
26    }
27
28    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
29        let lines: Vec<&str> = content.lines().collect();
30        if lines.is_empty() {
31            return Vec::new();
32        }
33
34        let mut entities = Vec::new();
35
36        // Top-level template entity
37        let template_name = extract_template_name(file_path);
38        let template_id = build_entity_id(file_path, "template", &template_name, None);
39        entities.push(SemanticEntity {
40            id: template_id.clone(),
41            file_path: file_path.to_string(),
42            entity_type: "template".to_string(),
43            name: template_name,
44            parent_id: None,
45            content: content.to_string(),
46            content_hash: content_hash(content),
47            structural_hash: None,
48            start_line: 1,
49            end_line: lines.len(),
50            metadata: None,
51        });
52
53        // Parse with tree-sitter and extract tags
54        let tags = ERB_PARSER.with(|parser| {
55            let mut parser = parser.borrow_mut();
56            match parser.parse(content.as_bytes(), None) {
57                Some(tree) => extract_tags_from_tree(&tree, content),
58                None => Vec::new(),
59            }
60        });
61
62        let mut block_stack: Vec<ErbTag> = Vec::new();
63        let mut name_counts: HashMap<String, usize> = HashMap::new();
64
65        for tag in tags {
66            match tag.kind {
67                TagKind::BlockOpen => {
68                    block_stack.push(tag);
69                }
70                TagKind::BlockClose => {
71                    if let Some(opener) = block_stack.pop() {
72                        let block_content =
73                            lines[opener.start_line - 1..tag.end_line].join("\n");
74                        let name = unique_name(&opener.name, &mut name_counts);
75                        entities.push(SemanticEntity {
76                            id: build_entity_id(
77                                file_path,
78                                "erb_block",
79                                &name,
80                                Some(&template_id),
81                            ),
82                            file_path: file_path.to_string(),
83                            entity_type: "erb_block".to_string(),
84                            name,
85                            parent_id: Some(template_id.clone()),
86                            content: block_content.clone(),
87                            content_hash: content_hash(&block_content),
88                            structural_hash: None,
89                            start_line: opener.start_line,
90                            end_line: tag.end_line,
91                            metadata: None,
92                        });
93                    }
94                }
95                TagKind::Expression => {
96                    let expr_content =
97                        lines[tag.start_line - 1..tag.end_line].join("\n");
98                    let name = unique_name(&tag.name, &mut name_counts);
99                    entities.push(SemanticEntity {
100                        id: build_entity_id(
101                            file_path,
102                            "erb_expression",
103                            &name,
104                            Some(&template_id),
105                        ),
106                        file_path: file_path.to_string(),
107                        entity_type: "erb_expression".to_string(),
108                        name,
109                        parent_id: Some(template_id.clone()),
110                        content: expr_content.clone(),
111                        content_hash: content_hash(&expr_content),
112                        structural_hash: None,
113                        start_line: tag.start_line,
114                        end_line: tag.end_line,
115                        metadata: None,
116                    });
117                }
118                // No separate Code variant needed; expressions cover all non-block tags
119            }
120        }
121
122        entities
123    }
124}
125
126// --- Internal types ---
127
128#[derive(Debug)]
129enum TagKind {
130    BlockOpen,
131    BlockClose,
132    Expression,
133}
134
135#[derive(Debug)]
136struct ErbTag {
137    kind: TagKind,
138    name: String,
139    start_line: usize,
140    end_line: usize,
141}
142
143// --- Helpers ---
144
145fn extract_template_name(file_path: &str) -> String {
146    let filename = file_path.rsplit('/').next().unwrap_or(file_path);
147    filename.strip_suffix(".erb").unwrap_or(filename).to_string()
148}
149
150/// Walk the tree-sitter AST and classify each directive node.
151fn extract_tags_from_tree(tree: &tree_sitter::Tree, source: &str) -> Vec<ErbTag> {
152    let mut tags = Vec::new();
153    let root = tree.root_node();
154    let mut cursor = root.walk();
155
156    for node in root.children(&mut cursor) {
157        let start_line = node.start_position().row + 1; // 1-indexed
158        let end_line = node.end_position().row + 1;
159
160        match node.kind() {
161            "directive" | "output_directive" => {
162                if let Some(code_text) = code_child_text(&node, source) {
163                    let trimmed = code_text.trim();
164                    if trimmed.is_empty() {
165                        continue;
166                    }
167
168                    if let Some(tag) = classify_code(trimmed, start_line, end_line) {
169                        tags.push(tag);
170                    }
171                }
172            }
173            // comment_directive, content -> skip
174            _ => {}
175        }
176    }
177
178    tags
179}
180
181/// Classify a code snippet from inside an ERB tag.
182/// Returns None for mid-block keywords (else, elsif, etc.) that should be skipped.
183fn classify_code(trimmed: &str, start_line: usize, end_line: usize) -> Option<ErbTag> {
184    let first_word = trimmed.split_whitespace().next().unwrap_or("");
185
186    if first_word == "end" {
187        Some(ErbTag {
188            kind: TagKind::BlockClose,
189            name: "end".to_string(),
190            start_line,
191            end_line,
192        })
193    } else if is_block_opener(trimmed) {
194        Some(ErbTag {
195            kind: TagKind::BlockOpen,
196            name: truncate_name(trimmed),
197            start_line,
198            end_line,
199        })
200    } else if is_mid_block_keyword(first_word) {
201        None
202    } else {
203        // Expression or standalone code
204        Some(ErbTag {
205            kind: TagKind::Expression,
206            name: truncate_name(trimmed),
207            start_line,
208            end_line,
209        })
210    }
211}
212
213fn code_child_text<'a>(node: &tree_sitter::Node, source: &'a str) -> Option<&'a str> {
214    let mut cursor = node.walk();
215    for child in node.children(&mut cursor) {
216        if child.kind() == "code" {
217            return child.utf8_text(source.as_bytes()).ok();
218        }
219    }
220    None
221}
222
223fn is_block_opener(content: &str) -> bool {
224    let first_word = content.split_whitespace().next().unwrap_or("");
225    if matches!(
226        first_word,
227        "if" | "unless" | "for" | "while" | "until" | "case" | "begin"
228    ) {
229        return true;
230    }
231    // Catch `.each do |item|`, `.times do`, etc.
232    content.split_whitespace().any(|w| w == "do")
233}
234
235fn is_mid_block_keyword(word: &str) -> bool {
236    matches!(word, "else" | "elsif" | "when" | "rescue" | "ensure")
237}
238
239fn truncate_name(s: &str) -> String {
240    let s = s.trim();
241    if s.len() <= 60 {
242        s.to_string()
243    } else {
244        let boundary = s.floor_char_boundary(57);
245        format!("{}...", &s[..boundary])
246    }
247}
248
249fn unique_name(base: &str, counts: &mut HashMap<String, usize>) -> String {
250    let count = counts.entry(base.to_string()).or_insert(0);
251    *count += 1;
252    if *count > 1 {
253        format!("{}#{}", base, count)
254    } else {
255        base.to_string()
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn test_erb_extraction() {
265        let erb = r#"<div class="container">
266  <% if @user.admin? %>
267    <h1>Admin Panel</h1>
268    <%= @user.name %>
269  <% else %>
270    <p>Access denied</p>
271  <% end %>
272
273  <% @items.each do |item| %>
274    <li><%= item.title %></li>
275  <% end %>
276
277  <%# This is a comment, should be skipped %>
278  <% @count = @items.length %>
279</div>
280"#;
281        let plugin = ErbParserPlugin;
282        let entities = plugin.extract_entities(erb, "views/dashboard.html.erb");
283
284        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
285        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
286        eprintln!(
287            "ERB entities: {:?}",
288            names.iter().zip(types.iter()).collect::<Vec<_>>()
289        );
290
291        // Template entity
292        assert_eq!(entities[0].entity_type, "template");
293        assert_eq!(entities[0].name, "dashboard.html");
294        assert_eq!(entities[0].start_line, 1);
295
296        // if block (lines 2-7)
297        let if_block = entities.iter().find(|e| e.name == "if @user.admin?").unwrap();
298        assert_eq!(if_block.entity_type, "erb_block");
299        assert_eq!(if_block.start_line, 2);
300        assert_eq!(if_block.end_line, 7);
301        assert!(if_block.parent_id.is_some());
302
303        // each block (lines 9-11)
304        let each_block = entities
305            .iter()
306            .find(|e| e.name == "@items.each do |item|")
307            .unwrap();
308        assert_eq!(each_block.entity_type, "erb_block");
309        assert_eq!(each_block.start_line, 9);
310        assert_eq!(each_block.end_line, 11);
311
312        // Expressions
313        assert!(names.contains(&"@user.name"));
314        assert!(names.contains(&"item.title"));
315        let user_name = entities.iter().find(|e| e.name == "@user.name").unwrap();
316        assert_eq!(user_name.entity_type, "erb_expression");
317        assert_eq!(user_name.start_line, 4);
318
319        // Standalone code shows as expression
320        let code = entities
321            .iter()
322            .find(|e| e.name == "@count = @items.length")
323            .unwrap();
324        assert_eq!(code.entity_type, "erb_expression");
325        assert_eq!(code.start_line, 14);
326
327        // Comment should be skipped
328        assert!(!names.iter().any(|n| n.contains("comment")));
329
330        // else should be skipped (mid-block keyword)
331        assert!(!names.iter().any(|n| *n == "else"));
332    }
333
334    #[test]
335    fn test_erb_nested_blocks() {
336        let erb = r#"<% if @show %>
337  <% @items.each do |item| %>
338    <%= item %>
339  <% end %>
340<% end %>
341"#;
342        let plugin = ErbParserPlugin;
343        let entities = plugin.extract_entities(erb, "nested.html.erb");
344
345        let blocks: Vec<&SemanticEntity> = entities
346            .iter()
347            .filter(|e| e.entity_type == "erb_block")
348            .collect();
349        assert_eq!(blocks.len(), 2, "Should have 2 blocks: {:?}",
350            blocks.iter().map(|b| &b.name).collect::<Vec<_>>());
351
352        // Inner block (each) closes first
353        let each = blocks.iter().find(|b| b.name.contains("each")).unwrap();
354        assert_eq!(each.start_line, 2);
355        assert_eq!(each.end_line, 4);
356
357        // Outer block (if) closes second
358        let if_block = blocks.iter().find(|b| b.name.contains("if")).unwrap();
359        assert_eq!(if_block.start_line, 1);
360        assert_eq!(if_block.end_line, 5);
361    }
362
363    #[test]
364    fn test_erb_template_name() {
365        assert_eq!(extract_template_name("views/best.html.erb"), "best.html");
366        assert_eq!(extract_template_name("loading.erb"), "loading");
367        assert_eq!(extract_template_name("app/views/_partial.html.erb"), "_partial.html");
368    }
369
370    #[test]
371    fn test_erb_dash_variant() {
372        // <%- is the whitespace-stripping variant, should produce blocks like <%
373        let erb = r#"<header>
374  <%- if @show %>
375    <%= @title %>
376  <%- else %>
377    <p>nope</p>
378  <%- end if %>
379</header>
380"#;
381        let plugin = ErbParserPlugin;
382        let entities = plugin.extract_entities(erb, "test.html.erb");
383
384        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
385        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
386        eprintln!("Dash variant: {:?}",
387            names.iter().zip(types.iter()).collect::<Vec<_>>());
388
389        // <%- if %> ... <%- end if %> should create a block
390        let if_block = entities.iter().find(|e| e.name == "if @show").unwrap();
391        assert_eq!(if_block.entity_type, "erb_block");
392        assert_eq!(if_block.start_line, 2);
393        assert_eq!(if_block.end_line, 6);
394
395        // else should be skipped
396        assert!(!names.iter().any(|n| *n == "else"));
397    }
398
399    #[test]
400    fn test_erb_duplicate_expressions() {
401        let erb = r#"<%= @title %>
402<%= @title %>
403"#;
404        let plugin = ErbParserPlugin;
405        let entities = plugin.extract_entities(erb, "test.erb");
406
407        let exprs: Vec<&SemanticEntity> = entities
408            .iter()
409            .filter(|e| e.entity_type == "erb_expression")
410            .collect();
411        assert_eq!(exprs.len(), 2);
412        assert_eq!(exprs[0].name, "@title");
413        assert_eq!(exprs[1].name, "@title#2");
414    }
415}