ast_grep_language/
html.rs

1use super::pre_process_pattern;
2use ast_grep_core::matcher::{Pattern, PatternBuilder, PatternError};
3use ast_grep_core::tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange};
4use ast_grep_core::Language;
5use ast_grep_core::{matcher::KindMatcher, Doc, Node};
6use std::collections::HashMap;
7
8// tree-sitter-html uses locale dependent iswalnum for tagName
9// https://github.com/tree-sitter/tree-sitter-html/blob/b5d9758e22b4d3d25704b72526670759a9e4d195/src/scanner.c#L194
10#[derive(Clone, Copy, Debug)]
11pub struct Html;
12impl Language for Html {
13  fn expando_char(&self) -> char {
14    'z'
15  }
16  fn pre_process_pattern<'q>(&self, query: &'q str) -> std::borrow::Cow<'q, str> {
17    pre_process_pattern(self.expando_char(), query)
18  }
19  fn kind_to_id(&self, kind: &str) -> u16 {
20    crate::parsers::language_html().id_for_node_kind(kind, true)
21  }
22  fn field_to_id(&self, field: &str) -> Option<u16> {
23    crate::parsers::language_html()
24      .field_id_for_name(field)
25      .map(|f| f.get())
26  }
27  fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
28    builder.build(|src| StrDoc::try_new(src, *self))
29  }
30}
31impl LanguageExt for Html {
32  fn get_ts_language(&self) -> TSLanguage {
33    crate::parsers::language_html()
34  }
35  fn injectable_languages(&self) -> Option<&'static [&'static str]> {
36    Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"])
37  }
38  fn extract_injections<L: LanguageExt>(
39    &self,
40    root: Node<StrDoc<L>>,
41  ) -> HashMap<String, Vec<TSRange>> {
42    let lang = root.lang();
43    let mut map = HashMap::new();
44    let matcher = KindMatcher::new("script_element", lang.clone());
45    for script in root.find_all(matcher) {
46      let injected = find_lang(&script).unwrap_or_else(|| "js".into());
47      let content = script.children().find(|c| c.kind() == "raw_text");
48      if let Some(content) = content {
49        map
50          .entry(injected)
51          .or_insert_with(Vec::new)
52          .push(node_to_range(&content));
53      };
54    }
55    let matcher = KindMatcher::new("style_element", lang.clone());
56    for style in root.find_all(matcher) {
57      let injected = find_lang(&style).unwrap_or_else(|| "css".into());
58      let content = style.children().find(|c| c.kind() == "raw_text");
59      if let Some(content) = content {
60        map
61          .entry(injected)
62          .or_insert_with(Vec::new)
63          .push(node_to_range(&content));
64      };
65    }
66    map
67  }
68}
69
70fn find_lang<D: Doc>(node: &Node<D>) -> Option<String> {
71  let html = node.lang();
72  let attr_matcher = KindMatcher::new("attribute", html.clone());
73  let name_matcher = KindMatcher::new("attribute_name", html.clone());
74  let val_matcher = KindMatcher::new("attribute_value", html.clone());
75  node.find_all(attr_matcher).find_map(|attr| {
76    let name = attr.find(&name_matcher)?;
77    if name.text() != "lang" {
78      return None;
79    }
80    let val = attr.find(&val_matcher)?;
81    Some(val.text().to_string())
82  })
83}
84
85fn node_to_range<D: Doc>(node: &Node<D>) -> TSRange {
86  let r = node.range();
87  let start = node.start_pos();
88  let sp = start.byte_point();
89  let sp = tree_sitter::Point::new(sp.0, sp.1);
90  let end = node.end_pos();
91  let ep = end.byte_point();
92  let ep = tree_sitter::Point::new(ep.0, ep.1);
93  TSRange {
94    start_byte: r.start,
95    end_byte: r.end,
96    start_point: sp,
97    end_point: ep,
98  }
99}
100
101#[cfg(test)]
102mod test {
103  use super::*;
104
105  fn test_match(query: &str, source: &str) {
106    use crate::test::test_match_lang;
107    test_match_lang(query, source, Html);
108  }
109
110  fn test_non_match(query: &str, source: &str) {
111    use crate::test::test_non_match_lang;
112    test_non_match_lang(query, source, Html);
113  }
114
115  #[test]
116  fn test_html_match() {
117    test_match("<input>", "<input>");
118    test_match("<$TAG>", "<input>");
119    test_match("<$TAG class='foo'>$$$</$TAG>", "<div class='foo'></div>");
120    test_match("<div>$$$</div>", "<div>123</div>");
121    test_non_match("<$TAG class='foo'>$$$</$TAG>", "<div></div>");
122    test_non_match("<div>$$$</div>", "<div class='foo'>123</div>");
123  }
124
125  fn test_replace(src: &str, pattern: &str, replacer: &str) -> String {
126    use crate::test::test_replace_lang;
127    test_replace_lang(src, pattern, replacer, Html)
128  }
129
130  #[test]
131  fn test_html_replace() {
132    let ret = test_replace(
133      r#"<div class='foo'>bar</div>"#,
134      r#"<$TAG class='foo'>$$$B</$TAG>"#,
135      r#"<$TAG class='$$$B'>foo</$TAG>"#,
136    );
137    assert_eq!(ret, r#"<div class='bar'>foo</div>"#);
138  }
139
140  fn extract(src: &str) -> HashMap<String, Vec<TSRange>> {
141    let root = Html.ast_grep(src);
142    Html.extract_injections(root.root())
143  }
144
145  #[test]
146  fn test_html_extraction() {
147    let map = extract("<script>a</script><style>.a{}</style>");
148    assert!(map.contains_key("css"));
149    assert!(map.contains_key("js"));
150    assert_eq!(map["css"].len(), 1);
151    assert_eq!(map["js"].len(), 1);
152  }
153
154  #[test]
155  fn test_explicit_lang() {
156    let map = extract("<script lang='ts'>a</script><script lang=ts>.a{}</script><style lang=scss></style><style lang=\"scss\"></style>");
157    assert!(map.contains_key("ts"));
158    assert_eq!(map["ts"].len(), 2);
159    assert_eq!(map["scss"].len(), 2);
160  }
161}