Skip to main content

ast_grep_language/
html.rs

1use super::pre_process_pattern;
2use ast_grep_core::matcher::{Pattern, PatternBuilder, PatternError};
3use ast_grep_core::tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange};
4use ast_grep_core::Language;
5use ast_grep_core::{matcher::KindMatcher, Doc, Node};
6
7// tree-sitter-html uses locale dependent iswalnum for tagName
8// https://github.com/tree-sitter/tree-sitter-html/blob/b5d9758e22b4d3d25704b72526670759a9e4d195/src/scanner.c#L194
9#[derive(Clone, Copy, Debug)]
10pub struct Html;
11impl Language for Html {
12  fn expando_char(&self) -> char {
13    'z'
14  }
15  fn pre_process_pattern<'q>(&self, query: &'q str) -> std::borrow::Cow<'q, str> {
16    pre_process_pattern(self.expando_char(), query)
17  }
18  fn kind_to_id(&self, kind: &str) -> u16 {
19    crate::parsers::language_html().id_for_node_kind(kind, true)
20  }
21  fn field_to_id(&self, field: &str) -> Option<u16> {
22    crate::parsers::language_html()
23      .field_id_for_name(field)
24      .map(|f| f.get())
25  }
26  fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
27    builder.build(|src| StrDoc::try_new(src, *self))
28  }
29}
30impl LanguageExt for Html {
31  fn get_ts_language(&self) -> TSLanguage {
32    crate::parsers::language_html()
33  }
34  fn injectable_languages(&self) -> Option<&'static [&'static str]> {
35    Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"])
36  }
37  fn extract_injections<L: LanguageExt>(
38    &self,
39    root: Node<StrDoc<L>>,
40  ) -> Vec<(String, Vec<TSRange>)> {
41    let lang = root.lang();
42    let mut ret = Vec::new();
43    let matcher = KindMatcher::new("script_element", lang.clone());
44    for script in root.find_all(matcher) {
45      let injected = find_lang(&script).unwrap_or_else(|| "js".into());
46      let content = script.children().find(|c| c.kind() == "raw_text");
47      if let Some(content) = content {
48        ret.push((injected, vec![node_to_range(&content)]));
49      };
50    }
51    let matcher = KindMatcher::new("style_element", lang.clone());
52    for style in root.find_all(matcher) {
53      let injected = find_lang(&style).unwrap_or_else(|| "css".into());
54      let content = style.children().find(|c| c.kind() == "raw_text");
55      if let Some(content) = content {
56        ret.push((injected, vec![node_to_range(&content)]));
57      };
58    }
59    ret
60  }
61}
62
63fn find_lang<D: Doc>(node: &Node<D>) -> Option<String> {
64  let html = node.lang();
65  let attr_matcher = KindMatcher::new("attribute", html.clone());
66  let name_matcher = KindMatcher::new("attribute_name", html.clone());
67  let val_matcher = KindMatcher::new("attribute_value", html.clone());
68  node.find_all(attr_matcher).find_map(|attr| {
69    let name = attr.find(&name_matcher)?;
70    if name.text() != "lang" {
71      return None;
72    }
73    let val = attr.find(&val_matcher)?;
74    Some(val.text().to_string())
75  })
76}
77
78fn node_to_range<D: Doc>(node: &Node<D>) -> TSRange {
79  let r = node.range();
80  let start = node.start_pos();
81  let sp = start.byte_point();
82  let sp = tree_sitter::Point::new(sp.0, sp.1);
83  let end = node.end_pos();
84  let ep = end.byte_point();
85  let ep = tree_sitter::Point::new(ep.0, ep.1);
86  TSRange {
87    start_byte: r.start,
88    end_byte: r.end,
89    start_point: sp,
90    end_point: ep,
91  }
92}
93
94#[cfg(test)]
95mod test {
96  use super::*;
97
98  fn test_match(query: &str, source: &str) {
99    use crate::test::test_match_lang;
100    test_match_lang(query, source, Html);
101  }
102
103  fn test_non_match(query: &str, source: &str) {
104    use crate::test::test_non_match_lang;
105    test_non_match_lang(query, source, Html);
106  }
107
108  #[test]
109  fn test_html_match() {
110    test_match("<input>", "<input>");
111    test_match("<$TAG>", "<input>");
112    test_match("<$TAG class='foo'>$$$</$TAG>", "<div class='foo'></div>");
113    test_match("<div>$$$</div>", "<div>123</div>");
114    test_non_match("<$TAG class='foo'>$$$</$TAG>", "<div></div>");
115    test_non_match("<div>$$$</div>", "<div class='foo'>123</div>");
116  }
117
118  fn test_replace(src: &str, pattern: &str, replacer: &str) -> String {
119    use crate::test::test_replace_lang;
120    test_replace_lang(src, pattern, replacer, Html)
121  }
122
123  #[test]
124  fn test_html_replace() {
125    let ret = test_replace(
126      r#"<div class='foo'>bar</div>"#,
127      r#"<$TAG class='foo'>$$$B</$TAG>"#,
128      r#"<$TAG class='$$$B'>foo</$TAG>"#,
129    );
130    assert_eq!(ret, r#"<div class='bar'>foo</div>"#);
131  }
132
133  fn extract(src: &str) -> Vec<(String, Vec<TSRange>)> {
134    let root = Html.ast_grep(src);
135    Html.extract_injections(root.root())
136  }
137
138  #[test]
139  fn test_html_extraction() {
140    let entries = extract("<script>a</script><style>.a{}</style>");
141    assert_eq!(entries.len(), 2);
142    assert_eq!(entries[0].0, "js");
143    assert_eq!(entries[0].1.len(), 1);
144    assert_eq!(entries[1].0, "css");
145    assert_eq!(entries[1].1.len(), 1);
146  }
147
148  #[test]
149  fn test_explicit_lang() {
150    let entries = extract("<script lang='ts'>a</script><script lang=ts>.a{}</script><style lang=scss></style><style lang=\"scss\"></style>");
151    assert_eq!(entries.len(), 4);
152    assert_eq!(entries[0].0, "ts");
153    assert_eq!(entries[1].0, "ts");
154    assert_eq!(entries[2].0, "scss");
155    assert_eq!(entries[3].0, "scss");
156    // each entry has exactly one range (independent parse tree)
157    assert!(entries.iter().all(|(_, ranges)| ranges.len() == 1));
158  }
159}