css_variable_lsp/
dom_tree.rs

1use crate::types::DOMNodeInfo;
2
3#[derive(Debug, Clone)]
4pub struct DomNode {
5    pub tag: String,
6    pub id: Option<String>,
7    pub classes: Vec<String>,
8    pub start: usize,
9    pub end: usize,
10    pub parent: Option<usize>,
11    pub children: Vec<usize>,
12}
13
14#[derive(Debug, Clone)]
15pub struct DomTree {
16    nodes: Vec<DomNode>,
17    roots: Vec<usize>,
18}
19
20#[derive(Debug, Clone)]
21pub struct StyleBlock {
22    pub content: String,
23    pub content_start: usize,
24}
25
26#[derive(Debug, Clone)]
27pub struct InlineStyle {
28    pub value: String,
29    pub value_start: usize,
30    pub attribute_start: usize,
31}
32
33#[derive(Debug, Clone)]
34pub struct HtmlParseResult {
35    pub dom_tree: DomTree,
36    pub style_blocks: Vec<StyleBlock>,
37    pub inline_styles: Vec<InlineStyle>,
38}
39
40impl DomTree {
41    pub fn parse(html: &str) -> HtmlParseResult {
42        let bytes = html.as_bytes();
43        let len = bytes.len();
44        let mut i = 0;
45        let mut comment_depth = 0usize;
46        let mut nodes: Vec<DomNode> = Vec::new();
47        let mut roots: Vec<usize> = Vec::new();
48        let mut stack: Vec<usize> = Vec::new();
49        let mut style_blocks = Vec::new();
50        let mut inline_styles = Vec::new();
51
52        while i < len {
53            if comment_depth > 0 {
54                if starts_with(bytes, i, b"<!--") {
55                    comment_depth += 1;
56                    i += 4;
57                    continue;
58                }
59                if starts_with(bytes, i, b"-->") {
60                    comment_depth -= 1;
61                    i += 3;
62                    continue;
63                }
64                i += 1;
65                continue;
66            }
67
68            if starts_with(bytes, i, b"<!--") {
69                comment_depth = 1;
70                i += 4;
71                continue;
72            }
73
74            if bytes[i] != b'<' {
75                i += 1;
76                continue;
77            }
78
79            if i + 1 >= len {
80                break;
81            }
82
83            if bytes[i + 1] == b'/' {
84                // end tag
85                if let Some((tag_name, end_pos)) = parse_end_tag(html, i) {
86                    let mut match_index = None;
87                    for (pos, node_idx) in stack.iter().enumerate().rev() {
88                        if nodes[*node_idx].tag == tag_name {
89                            match_index = Some(pos);
90                            break;
91                        }
92                    }
93                    if let Some(pos) = match_index {
94                        let node_idx = stack[pos];
95                        nodes[node_idx].end = end_pos;
96                        stack.truncate(pos);
97                    }
98                    i = end_pos;
99                    continue;
100                }
101            }
102
103            if bytes[i + 1] == b'!' {
104                // doctype or other markup, skip to >
105                if let Some(end_pos) = find_char(bytes, i + 2, b'>') {
106                    i = end_pos + 1;
107                    continue;
108                }
109            }
110
111            let tag_start = i;
112            i += 1;
113            while i < len && bytes[i].is_ascii_whitespace() {
114                i += 1;
115            }
116            let name_start = i;
117            while i < len && is_tag_name_char(bytes[i]) {
118                i += 1;
119            }
120            if name_start == i {
121                i += 1;
122                continue;
123            }
124            let tag_name = html[name_start..i].to_lowercase();
125
126            let mut id: Option<String> = None;
127            let mut classes: Vec<String> = Vec::new();
128            let mut self_closing = false;
129
130            while i < len {
131                while i < len && bytes[i].is_ascii_whitespace() {
132                    i += 1;
133                }
134                if i >= len {
135                    break;
136                }
137                if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'>' {
138                    self_closing = true;
139                    i += 2;
140                    break;
141                }
142                if bytes[i] == b'>' {
143                    i += 1;
144                    break;
145                }
146
147                let attr_name_start = i;
148                while i < len && is_attr_name_char(bytes[i]) {
149                    i += 1;
150                }
151                if attr_name_start == i {
152                    i += 1;
153                    continue;
154                }
155                let attr_name = html[attr_name_start..i].to_lowercase();
156                while i < len && bytes[i].is_ascii_whitespace() {
157                    i += 1;
158                }
159
160                let mut value: Option<String> = None;
161                let mut value_start = None;
162                if i < len && bytes[i] == b'=' {
163                    i += 1;
164                    while i < len && bytes[i].is_ascii_whitespace() {
165                        i += 1;
166                    }
167                    if i < len && (bytes[i] == b'"' || bytes[i] == b'\'') {
168                        let quote = bytes[i];
169                        i += 1;
170                        let start = i;
171                        while i < len && bytes[i] != quote {
172                            i += 1;
173                        }
174                        let end = i.min(len);
175                        value = Some(html[start..end].to_string());
176                        value_start = Some(start);
177                        if i < len {
178                            i += 1;
179                        }
180                    } else {
181                        let start = i;
182                        while i < len && !bytes[i].is_ascii_whitespace() && bytes[i] != b'>' {
183                            i += 1;
184                        }
185                        let end = i;
186                        value = Some(html[start..end].to_string());
187                        value_start = Some(start);
188                    }
189                }
190
191                match attr_name.as_str() {
192                    "id" => {
193                        if let Some(v) = &value {
194                            if !v.is_empty() {
195                                id = Some(v.to_string());
196                            }
197                        }
198                    }
199                    "class" | "classname" => {
200                        if let Some(v) = &value {
201                            classes.extend(v.split_whitespace().map(|c| c.to_string()));
202                        }
203                    }
204                    "style" => {
205                        if let (Some(v), Some(v_start)) = (value.clone(), value_start) {
206                            inline_styles.push(InlineStyle {
207                                value: v,
208                                value_start: v_start,
209                                attribute_start: attr_name_start,
210                            });
211                        }
212                    }
213                    _ => {}
214                }
215            }
216
217            let tag_end = i;
218            let node_idx = nodes.len();
219            let parent = stack.last().copied();
220            nodes.push(DomNode {
221                tag: tag_name.clone(),
222                id,
223                classes,
224                start: tag_start,
225                end: tag_end,
226                parent,
227                children: Vec::new(),
228            });
229
230            if let Some(parent_idx) = parent {
231                nodes[parent_idx].children.push(node_idx);
232            } else {
233                roots.push(node_idx);
234            }
235
236            if tag_name == "style" {
237                if let Some((content_start, content_end, close_end)) =
238                    find_block_content(html, tag_end, "style")
239                {
240                    let content = html[content_start..content_end].to_string();
241                    style_blocks.push(StyleBlock {
242                        content,
243                        content_start,
244                    });
245                    nodes[node_idx].end = close_end;
246                    i = close_end;
247                    continue;
248                }
249            }
250
251            if tag_name == "script" {
252                if let Some((_, _, close_end)) = find_block_content(html, tag_end, "script") {
253                    nodes[node_idx].end = close_end;
254                    i = close_end;
255                    continue;
256                }
257            }
258
259            if self_closing || is_void_tag(&tag_name) {
260                nodes[node_idx].end = tag_end;
261            } else {
262                stack.push(node_idx);
263            }
264        }
265
266        let final_end = html.len();
267        for idx in stack {
268            if nodes[idx].end < final_end {
269                nodes[idx].end = final_end;
270            }
271        }
272
273        HtmlParseResult {
274            dom_tree: DomTree { nodes, roots },
275            style_blocks,
276            inline_styles,
277        }
278    }
279
280    pub fn find_node_at_position(&self, position: usize) -> Option<DOMNodeInfo> {
281        for &root_idx in &self.roots {
282            if let Some(info) = self.find_node_recursive(root_idx, position) {
283                return Some(info);
284            }
285        }
286        None
287    }
288
289    fn find_node_recursive(&self, idx: usize, position: usize) -> Option<DOMNodeInfo> {
290        let node = &self.nodes[idx];
291        if position < node.start || position > node.end {
292            return None;
293        }
294        for &child in &node.children {
295            if let Some(found) = self.find_node_recursive(child, position) {
296                return Some(found);
297            }
298        }
299        Some(self.to_info(idx))
300    }
301
302    pub fn matches_selector(&self, node_index: usize, selector: &str) -> bool {
303        let selector = selector.trim();
304        if selector.is_empty() {
305            return false;
306        }
307        if selector == ":root" {
308            return true;
309        }
310        let selectors: Vec<&str> = selector.split(',').map(|s| s.trim()).collect();
311        for sel in selectors {
312            if sel.is_empty() {
313                continue;
314            }
315            let parts = parse_selector_parts(sel);
316            if matches_selector_parts(self, node_index, &parts) {
317                return true;
318            }
319        }
320        false
321    }
322
323    fn to_info(&self, idx: usize) -> DOMNodeInfo {
324        let node = &self.nodes[idx];
325        DOMNodeInfo {
326            tag: node.tag.clone(),
327            id: node.id.clone(),
328            classes: node.classes.clone(),
329            position: node.start,
330            node_index: Some(idx),
331        }
332    }
333}
334
335#[derive(Debug, Clone, Copy)]
336enum Combinator {
337    Descendant,
338    Child,
339}
340
341#[derive(Debug, Clone)]
342struct SimpleSelector {
343    tag: Option<String>,
344    id: Option<String>,
345    classes: Vec<String>,
346}
347
348#[derive(Debug, Clone)]
349struct SelectorPart {
350    combinator: Combinator,
351    selector: SimpleSelector,
352}
353
354fn parse_selector_parts(selector: &str) -> Vec<SelectorPart> {
355    let mut tokens: Vec<String> = Vec::new();
356    let mut current = String::new();
357    let mut in_attr = 0usize;
358    let mut in_paren = 0usize;
359    let mut last_was_space = false;
360
361    for ch in selector.chars() {
362        match ch {
363            '[' => {
364                in_attr += 1;
365                current.push(ch);
366                last_was_space = false;
367            }
368            ']' => {
369                in_attr = in_attr.saturating_sub(1);
370                current.push(ch);
371                last_was_space = false;
372            }
373            '(' => {
374                in_paren += 1;
375                current.push(ch);
376                last_was_space = false;
377            }
378            ')' => {
379                in_paren = in_paren.saturating_sub(1);
380                current.push(ch);
381                last_was_space = false;
382            }
383            '>' if in_attr == 0 && in_paren == 0 => {
384                if !current.trim().is_empty() {
385                    tokens.push(current.trim().to_string());
386                }
387                tokens.push(">".to_string());
388                current.clear();
389                last_was_space = false;
390            }
391            ch if ch.is_whitespace() && in_attr == 0 && in_paren == 0 => {
392                if !current.trim().is_empty() {
393                    tokens.push(current.trim().to_string());
394                    current.clear();
395                }
396                if !last_was_space {
397                    tokens.push(" ".to_string());
398                    last_was_space = true;
399                }
400            }
401            _ => {
402                current.push(ch);
403                last_was_space = false;
404            }
405        }
406    }
407
408    if !current.trim().is_empty() {
409        tokens.push(current.trim().to_string());
410    }
411
412    let mut parts = Vec::new();
413    let mut next_combinator = Combinator::Descendant;
414
415    for token in tokens {
416        if token == ">" {
417            next_combinator = Combinator::Child;
418            continue;
419        }
420        if token == " " {
421            next_combinator = Combinator::Descendant;
422            continue;
423        }
424        let selector = parse_simple_selector(&token);
425        parts.push(SelectorPart {
426            combinator: next_combinator,
427            selector,
428        });
429        next_combinator = Combinator::Descendant;
430    }
431
432    parts
433}
434
435fn parse_simple_selector(token: &str) -> SimpleSelector {
436    let mut tag: Option<String> = None;
437    let mut id: Option<String> = None;
438    let mut classes: Vec<String> = Vec::new();
439
440    let mut slice = token;
441    if let Some(idx) = slice.find([':', '[']) {
442        slice = &slice[..idx];
443    }
444
445    let mut current = String::new();
446    let mut mode = 't'; // t=tag, c=class, i=id
447
448    for ch in slice.chars() {
449        match ch {
450            '#' => {
451                if mode == 't' && !current.is_empty() && tag.is_none() {
452                    tag = Some(current.clone());
453                } else if mode == 'c' && !current.is_empty() {
454                    classes.push(current.clone());
455                }
456                current.clear();
457                mode = 'i';
458            }
459            '.' => {
460                if mode == 't' && !current.is_empty() && tag.is_none() {
461                    tag = Some(current.clone());
462                } else if mode == 'i' && !current.is_empty() {
463                    id = Some(current.clone());
464                } else if mode == 'c' && !current.is_empty() {
465                    classes.push(current.clone());
466                }
467                current.clear();
468                mode = 'c';
469            }
470            _ => current.push(ch),
471        }
472    }
473
474    if !current.is_empty() {
475        match mode {
476            't' => {
477                if current != "*" {
478                    tag = Some(current);
479                }
480            }
481            'i' => id = Some(current),
482            'c' => classes.push(current),
483            _ => {}
484        }
485    }
486
487    SimpleSelector { tag, id, classes }
488}
489
490fn matches_selector_parts(tree: &DomTree, node_index: usize, parts: &[SelectorPart]) -> bool {
491    if parts.is_empty() {
492        return false;
493    }
494
495    let mut current_index = Some(node_index);
496    for (idx, part) in parts.iter().enumerate().rev() {
497        let node_idx = match current_index {
498            Some(i) => i,
499            None => return false,
500        };
501        if !matches_simple_selector(&tree.nodes[node_idx], &part.selector) {
502            return false;
503        }
504
505        if idx == 0 {
506            return true;
507        }
508
509        let next_part = &parts[idx - 1];
510        match part.combinator {
511            Combinator::Child => {
512                current_index = tree.nodes[node_idx].parent;
513                if let Some(parent_idx) = current_index {
514                    if !matches_simple_selector(&tree.nodes[parent_idx], &next_part.selector) {
515                        return false;
516                    }
517                } else {
518                    return false;
519                }
520            }
521            Combinator::Descendant => {
522                let mut parent = tree.nodes[node_idx].parent;
523                let mut matched = false;
524                while let Some(parent_idx) = parent {
525                    if matches_simple_selector(&tree.nodes[parent_idx], &next_part.selector) {
526                        matched = true;
527                        current_index = Some(parent_idx);
528                        break;
529                    }
530                    parent = tree.nodes[parent_idx].parent;
531                }
532                if !matched {
533                    return false;
534                }
535            }
536        }
537    }
538
539    true
540}
541
542fn matches_simple_selector(node: &DomNode, selector: &SimpleSelector) -> bool {
543    if let Some(tag) = &selector.tag {
544        if node.tag != tag.to_lowercase() {
545            return false;
546        }
547    }
548    if let Some(id) = &selector.id {
549        if node.id.as_deref() != Some(id) {
550            return false;
551        }
552    }
553    for class in &selector.classes {
554        if !node.classes.iter().any(|c| c == class) {
555            return false;
556        }
557    }
558    true
559}
560
561fn is_tag_name_char(b: u8) -> bool {
562    b.is_ascii_alphanumeric() || b == b'-' || b == b':'
563}
564
565fn is_attr_name_char(b: u8) -> bool {
566    b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b':'
567}
568
569fn is_void_tag(tag: &str) -> bool {
570    matches!(
571        tag,
572        "area"
573            | "base"
574            | "br"
575            | "col"
576            | "embed"
577            | "hr"
578            | "img"
579            | "input"
580            | "link"
581            | "meta"
582            | "param"
583            | "source"
584            | "track"
585            | "wbr"
586    )
587}
588
589fn starts_with(bytes: &[u8], idx: usize, pattern: &[u8]) -> bool {
590    bytes.len() >= idx + pattern.len() && &bytes[idx..idx + pattern.len()] == pattern
591}
592
593fn find_char(bytes: &[u8], start: usize, target: u8) -> Option<usize> {
594    (start..bytes.len()).find(|&i| bytes[i] == target)
595}
596
597fn parse_end_tag(html: &str, start: usize) -> Option<(String, usize)> {
598    let bytes = html.as_bytes();
599    let len = bytes.len();
600    if start + 2 >= len {
601        return None;
602    }
603    let mut i = start + 2;
604    while i < len && bytes[i].is_ascii_whitespace() {
605        i += 1;
606    }
607    let name_start = i;
608    while i < len && is_tag_name_char(bytes[i]) {
609        i += 1;
610    }
611    if name_start == i {
612        return None;
613    }
614    let tag_name = html[name_start..i].to_lowercase();
615    if let Some(end_pos) = find_char(bytes, i, b'>') {
616        return Some((tag_name, end_pos + 1));
617    }
618    None
619}
620
621fn find_block_content(html: &str, start: usize, tag: &str) -> Option<(usize, usize, usize)> {
622    let lower = html.to_lowercase();
623    let bytes = lower.as_bytes();
624    let len = bytes.len();
625    let target = format!("</{}", tag.to_lowercase());
626    let target_bytes = target.as_bytes();
627    let mut i = start;
628    while i + target_bytes.len() < len {
629        if bytes[i] == b'<' && starts_with(bytes, i, target_bytes) {
630            if let Some(end_pos) = find_char(bytes, i + target_bytes.len(), b'>') {
631                return Some((start, i, end_pos + 1));
632            }
633        }
634        i += 1;
635    }
636    None
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642
643    #[test]
644    fn test_dom_tree_basic() {
645        let html = r#"
646            <html>
647                <body>
648                    <div class="container">
649                        <p id="text">Hello</p>
650                    </div>
651                </body>
652            </html>
653        "#;
654
655        let result = DomTree::parse(html);
656        let tree = result.dom_tree;
657        assert!(!tree.roots.is_empty());
658
659        // Should be able to find nodes
660        let node = tree.find_node_at_position(50);
661        assert!(node.is_some());
662    }
663
664    #[test]
665    fn test_dom_tree_nested_structure() {
666        let html =
667            r#"<div class="outer"><div class="inner"><span id="item">Text</span></div></div>"#;
668
669        let result = DomTree::parse(html);
670        let tree = result.dom_tree;
671
672        // Tree should have nodes
673        assert!(!tree.roots.is_empty());
674
675        // We can verify the parse result has the expected structure
676        assert_eq!(tree.nodes.len(), 3); // div, div, span
677    }
678
679    #[test]
680    fn test_dom_tree_multiple_classes() {
681        let html = r#"<div class="class1 class2 class3">Content</div>"#;
682
683        let result = DomTree::parse(html);
684        assert!(!result.dom_tree.roots.is_empty());
685        // Classes are parsed correctly
686    }
687
688    #[test]
689    fn test_dom_tree_self_closing_tags() {
690        let html = r#"<div><img src="test.jpg" /><br /><input type="text" /></div>"#;
691
692        let result = DomTree::parse(html);
693        let tree = result.dom_tree;
694
695        assert!(!tree.roots.is_empty());
696        // Self-closing tags are handled
697    }
698
699    #[test]
700    fn test_dom_tree_find_node_at_position() {
701        let html = r#"<div class="outer"><p id="para">Text</p></div>"#;
702
703        let result = DomTree::parse(html);
704        let tree = result.dom_tree;
705
706        // Position in div tag
707        let node = tree.find_node_at_position(5);
708        assert!(node.is_some());
709    }
710
711    #[test]
712    fn test_dom_tree_empty_html() {
713        let html = "";
714        let result = DomTree::parse(html);
715        let tree = result.dom_tree;
716        assert!(tree.roots.is_empty());
717    }
718
719    #[test]
720    fn test_dom_tree_malformed_html() {
721        // Missing closing tag
722        let html = r#"<div><p>Text"#;
723        let result = DomTree::parse(html);
724        // Should still parse what it can
725        assert!(!result.dom_tree.roots.is_empty());
726    }
727
728    #[test]
729    fn test_parse_inline_styles() {
730        let html = r#"<div style="color: red; background: blue;"></div>"#;
731
732        let parsed = DomTree::parse(html);
733        assert_eq!(parsed.inline_styles.len(), 1);
734
735        let inline = &parsed.inline_styles[0];
736        assert!(inline.value.contains("color: red"));
737        assert!(inline.value.contains("background: blue"));
738    }
739
740    #[test]
741    fn test_parse_style_blocks() {
742        let html = r#"
743            <html>
744                <head>
745                    <style>
746                        .class { color: red; }
747                    </style>
748                </head>
749                <body>
750                    <style>
751                        #id { background: blue; }
752                    </style>
753                </body>
754            </html>
755        "#;
756
757        let parsed = DomTree::parse(html);
758        assert_eq!(parsed.style_blocks.len(), 2);
759
760        assert!(parsed.style_blocks[0].content.contains("color: red"));
761        assert!(parsed.style_blocks[1].content.contains("background: blue"));
762    }
763
764    #[test]
765    fn test_parse_nested_style_tags() {
766        let html = r#"<style>outer { color: red; }<style>inner</style></style>"#;
767
768        let parsed = DomTree::parse(html);
769        // Should handle nested style tags
770        assert!(!parsed.style_blocks.is_empty());
771    }
772
773    #[test]
774    fn test_dom_tree_comment_handling() {
775        let html = r#"<div><!-- This is a comment --><p>Text</p></div>"#;
776
777        let result = DomTree::parse(html);
778        let tree = result.dom_tree;
779
780        // Comments should be handled properly
781        assert!(!tree.roots.is_empty());
782    }
783
784    #[test]
785    fn test_attributes_with_quotes() {
786        let html = r#"<div class="test" id='myid' data-value=unquoted></div>"#;
787
788        let result = DomTree::parse(html);
789        let tree = result.dom_tree;
790
791        // Should parse attributes correctly
792        assert!(!tree.roots.is_empty());
793    }
794
795    #[test]
796    fn test_dom_tree_classname_alias() {
797        let html = r#"<div className="react-class">Content</div>"#;
798        let result = DomTree::parse(html);
799        let tree = result.dom_tree;
800        assert!(!tree.roots.is_empty());
801        let node = &tree.nodes[tree.roots[0]];
802        assert!(node.classes.contains(&"react-class".to_string()));
803    }
804}