ast_grep_language/
html.rs1use super::pre_process_pattern;
2use ast_grep_core::matcher::{Pattern, PatternBuilder, PatternError};
3use ast_grep_core::tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange};
4use ast_grep_core::Language;
5use ast_grep_core::{matcher::KindMatcher, Doc, Node};
6
7#[derive(Clone, Copy, Debug)]
10pub struct Html;
11impl Language for Html {
12 fn expando_char(&self) -> char {
13 'z'
14 }
15 fn pre_process_pattern<'q>(&self, query: &'q str) -> std::borrow::Cow<'q, str> {
16 pre_process_pattern(self.expando_char(), query)
17 }
18 fn kind_to_id(&self, kind: &str) -> u16 {
19 crate::parsers::language_html().id_for_node_kind(kind, true)
20 }
21 fn field_to_id(&self, field: &str) -> Option<u16> {
22 crate::parsers::language_html()
23 .field_id_for_name(field)
24 .map(|f| f.get())
25 }
26 fn build_pattern(&self, builder: &PatternBuilder) -> Result<Pattern, PatternError> {
27 builder.build(|src| StrDoc::try_new(src, *self))
28 }
29}
30impl LanguageExt for Html {
31 fn get_ts_language(&self) -> TSLanguage {
32 crate::parsers::language_html()
33 }
34 fn injectable_languages(&self) -> Option<&'static [&'static str]> {
35 Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"])
36 }
37 fn extract_injections<L: LanguageExt>(
38 &self,
39 root: Node<StrDoc<L>>,
40 ) -> Vec<(String, Vec<TSRange>)> {
41 let lang = root.lang();
42 let mut ret = Vec::new();
43 let matcher = KindMatcher::new("script_element", lang.clone());
44 for script in root.find_all(matcher) {
45 let injected = find_lang(&script).unwrap_or_else(|| "js".into());
46 let content = script.children().find(|c| c.kind() == "raw_text");
47 if let Some(content) = content {
48 ret.push((injected, vec![node_to_range(&content)]));
49 };
50 }
51 let matcher = KindMatcher::new("style_element", lang.clone());
52 for style in root.find_all(matcher) {
53 let injected = find_lang(&style).unwrap_or_else(|| "css".into());
54 let content = style.children().find(|c| c.kind() == "raw_text");
55 if let Some(content) = content {
56 ret.push((injected, vec![node_to_range(&content)]));
57 };
58 }
59 ret
60 }
61}
62
63fn find_lang<D: Doc>(node: &Node<D>) -> Option<String> {
64 let html = node.lang();
65 let attr_matcher = KindMatcher::new("attribute", html.clone());
66 let name_matcher = KindMatcher::new("attribute_name", html.clone());
67 let val_matcher = KindMatcher::new("attribute_value", html.clone());
68 node.find_all(attr_matcher).find_map(|attr| {
69 let name = attr.find(&name_matcher)?;
70 if name.text() != "lang" {
71 return None;
72 }
73 let val = attr.find(&val_matcher)?;
74 Some(val.text().to_string())
75 })
76}
77
78fn node_to_range<D: Doc>(node: &Node<D>) -> TSRange {
79 let r = node.range();
80 let start = node.start_pos();
81 let sp = start.byte_point();
82 let sp = tree_sitter::Point::new(sp.0, sp.1);
83 let end = node.end_pos();
84 let ep = end.byte_point();
85 let ep = tree_sitter::Point::new(ep.0, ep.1);
86 TSRange {
87 start_byte: r.start,
88 end_byte: r.end,
89 start_point: sp,
90 end_point: ep,
91 }
92}
93
94#[cfg(test)]
95mod test {
96 use super::*;
97
98 fn test_match(query: &str, source: &str) {
99 use crate::test::test_match_lang;
100 test_match_lang(query, source, Html);
101 }
102
103 fn test_non_match(query: &str, source: &str) {
104 use crate::test::test_non_match_lang;
105 test_non_match_lang(query, source, Html);
106 }
107
108 #[test]
109 fn test_html_match() {
110 test_match("<input>", "<input>");
111 test_match("<$TAG>", "<input>");
112 test_match("<$TAG class='foo'>$$$</$TAG>", "<div class='foo'></div>");
113 test_match("<div>$$$</div>", "<div>123</div>");
114 test_non_match("<$TAG class='foo'>$$$</$TAG>", "<div></div>");
115 test_non_match("<div>$$$</div>", "<div class='foo'>123</div>");
116 }
117
118 fn test_replace(src: &str, pattern: &str, replacer: &str) -> String {
119 use crate::test::test_replace_lang;
120 test_replace_lang(src, pattern, replacer, Html)
121 }
122
123 #[test]
124 fn test_html_replace() {
125 let ret = test_replace(
126 r#"<div class='foo'>bar</div>"#,
127 r#"<$TAG class='foo'>$$$B</$TAG>"#,
128 r#"<$TAG class='$$$B'>foo</$TAG>"#,
129 );
130 assert_eq!(ret, r#"<div class='bar'>foo</div>"#);
131 }
132
133 fn extract(src: &str) -> Vec<(String, Vec<TSRange>)> {
134 let root = Html.ast_grep(src);
135 Html.extract_injections(root.root())
136 }
137
138 #[test]
139 fn test_html_extraction() {
140 let entries = extract("<script>a</script><style>.a{}</style>");
141 assert_eq!(entries.len(), 2);
142 assert_eq!(entries[0].0, "js");
143 assert_eq!(entries[0].1.len(), 1);
144 assert_eq!(entries[1].0, "css");
145 assert_eq!(entries[1].1.len(), 1);
146 }
147
148 #[test]
149 fn test_explicit_lang() {
150 let entries = extract("<script lang='ts'>a</script><script lang=ts>.a{}</script><style lang=scss></style><style lang=\"scss\"></style>");
151 assert_eq!(entries.len(), 4);
152 assert_eq!(entries[0].0, "ts");
153 assert_eq!(entries[1].0, "ts");
154 assert_eq!(entries[2].0, "scss");
155 assert_eq!(entries[3].0, "scss");
156 assert!(entries.iter().all(|(_, ranges)| ranges.len() == 1));
158 }
159}