autocorrect/code/
code.rs

1// autocorrect: false
2use super::*;
3use crate::config::toggle;
4pub use crate::result::*;
5use crate::rule::CJK_RE;
6use crate::Config;
7use pest::error::Error;
8use pest::iterators::{Pair, Pairs};
9use pest::RuleType;
10use std::result::Result;
11
12trait RuleTypeToString {
13    fn to_string(&self) -> String;
14}
15
16impl<R: RuleType> RuleTypeToString for R {
17    fn to_string(&self) -> String {
18        format!("{self:?}")
19    }
20}
21
22pub fn format_pairs<R: RuleType, O: Results>(out: O, pairs: Result<Pairs<R>, Error<R>>) -> O {
23    // Limit parse stack max depth for avoiding some complex parser will hangs indefinitely.
24    pest::set_call_limit(Some(10_000_000usize.try_into().unwrap()));
25
26    let mut out = out;
27
28    match pairs {
29        Ok(pairs) => {
30            for pair in pairs {
31                format_pair(&mut out, pair);
32            }
33        }
34        Err(_err) => {
35            out.error(&_err.to_string());
36        }
37    }
38
39    out
40}
41
42fn format_pair<R: RuleType, O: Results>(results: &mut O, pair: Pair<R>) {
43    let rule = pair.as_rule();
44    let rule_name = rule.to_string();
45    let rule_name = rule_name.as_str();
46
47    // println!("rule: {}, {}", rule_name, item.as_str());
48    match rule_name {
49        "string" | "link_string" | "mark_string" | "text" | "inner_text" | "comment"
50        | "COMMENT" => {
51            format_or_lint(results, rule_name, pair);
52        }
53        "inline_style" | "inline_javascript" | "codeblock" => {
54            format_or_lint_for_inline_scripts(results, pair, rule_name);
55        }
56        _ => {
57            let mut has_child = false;
58            let pair_str = pair.as_str();
59            let sub_pairs = pair.into_inner();
60
61            // Special hotfix for Markdown block / paragraph / blockquote
62            // If they has CJK chars, disable `halfwidth-punctuation` rule temporary.
63            let mut last_toggle = None;
64            if rule_name == "block" && CJK_RE.is_match(pair_str) {
65                last_toggle = Some(results.get_toggle().clone());
66                results.toggle_merge_for_codeblock();
67            }
68
69            for child in sub_pairs {
70                format_pair(results, child);
71                has_child = true;
72            }
73
74            // Restore toggle if last_toggle is some
75            if let Some(t) = &last_toggle {
76                results.toggle(t);
77            }
78
79            if !has_child {
80                results.ignore(pair_str);
81            }
82        }
83    };
84}
85
86/// Format or Lint a matched item
87pub fn format_or_lint<R: RuleType, O: Results>(results: &mut O, rule_name: &str, pair: Pair<R>) {
88    let part = pair.as_str();
89    let (line, col) = pair.line_col();
90
91    // Check AutoCorrect enable/disable toggle marker
92    // If disable results.is_enabled() will be false
93    if rule_name == "comment" || rule_name == "COMMENT" {
94        results.toggle(&toggle::parse(part));
95    }
96
97    let disabled_rules = results.get_toggle().disable_rules();
98    if results.is_lint() {
99        // Skip lint if AutoCorrect disabled
100        if !results.is_enabled() {
101            return;
102        }
103
104        let lines = part.split('\n');
105
106        // sub line in a part
107        let mut sub_line = 0;
108        for line_str in lines {
109            // format trimmed string
110            let line_result =
111                crate::rule::format_or_lint_with_disable_rules(line_str, true, &disabled_rules);
112
113            // skip, when no difference
114            if line_result.severity.is_pass() {
115                sub_line += 1;
116                continue;
117            }
118
119            // trim start whitespace
120            let mut trimmed = line_str.trim_start();
121            // number of start whitespace in this line
122            let leading_spaces = line_str.len() - trimmed.len();
123            // trim end whitespace
124            trimmed = trimmed.trim_end();
125            // println!("{}||{},{}", line_result.out, trimmed, new_line.eq(trimmed));
126
127            let current_line = line + sub_line;
128            let current_col = if sub_line > 0 {
129                // col will equal numner of removed leading whitespace
130                leading_spaces + 1
131            } else {
132                col
133            };
134
135            // Add error lint result, if new_line has get changed result
136            results.push(LineResult {
137                line: current_line,
138                col: current_col,
139                old: String::from(trimmed),
140                new: line_result.out.trim().to_string(),
141                severity: line_result.severity,
142            });
143
144            sub_line += 1;
145        }
146    } else {
147        let mut new_part = String::from(part);
148
149        // Skip format if AutoCorrect disabled
150        if results.is_enabled() {
151            let lines = part.split('\n');
152
153            new_part = lines
154                .into_iter()
155                .map(|l| {
156                    crate::rule::format_or_lint_with_disable_rules(l, false, &disabled_rules).out
157                })
158                .collect::<Vec<_>>()
159                .join("\n");
160        }
161
162        results.push(LineResult {
163            line,
164            col,
165            old: String::from(part),
166            new: new_part,
167            severity: Severity::Pass,
168        });
169    }
170}
171
172/// Format / Lint for the inline scripts.
173///
174/// For example, The script / style in HTML or Codeblock in Markdown.
175fn format_or_lint_for_inline_scripts<R: RuleType, O: Results>(
176    results: &mut O,
177    pair: Pair<R>,
178    rule_name: &str,
179) {
180    let part = pair.as_str();
181    let (base_line, _) = pair.line_col();
182
183    let is_enable_context =
184        rule_name != "codeblock" || Config::current().is_enabled_context("codeblock");
185
186    if results.is_lint() {
187        // Skip lint if AutoCorrect disabled
188        if !results.is_enabled() {
189            return;
190        }
191
192        if !is_enable_context {
193            return;
194        }
195
196        let sub_result = match rule_name {
197            "inline_style" => Some(lint_for(part, "css")),
198            "inline_javascript" => Some(lint_for(part, "js")),
199            "codeblock" => {
200                let codeblock = Codeblock::from_pair(pair);
201                Some(lint_for(&codeblock.code, &codeblock.lang))
202            }
203            _ => None,
204        };
205
206        if let Some(result) = sub_result {
207            if result.has_error() {
208                results.error(&result.error);
209            }
210
211            for mut line in result.lines {
212                // Inline script's lines need add base_line - 1 offset.
213                line.line += base_line - 1;
214                results.push(line);
215            }
216        }
217    } else {
218        let mut new_part = String::from(part);
219
220        // Skip format if AutoCorrect disabled
221        if results.is_enabled() && is_enable_context {
222            let sub_result = match rule_name {
223                "inline_style" => Some(format_for(part, "css")),
224                "inline_javascript" => Some(format_for(part, "js")),
225                "codeblock" => {
226                    // WARNING: nested codeblock, when call format_for again.
227                    // Because codeblock.data has wrap chars, this make overflowed its stack.
228                    let mut codeblock = Codeblock::from_pair(pair);
229
230                    let mut result = format_for(&codeblock.code, &codeblock.lang);
231                    codeblock.update_data(&result.out);
232                    result.out = codeblock.data;
233                    Some(result)
234                }
235                _ => None,
236            };
237
238            if let Some(result) = sub_result {
239                if result.has_error() {
240                    results.error(&result.error);
241                }
242
243                new_part = result.out;
244            }
245        }
246
247        results.push(LineResult {
248            line: 1,
249            col: 1,
250            old: String::from(part),
251            new: new_part,
252            severity: Severity::Pass,
253        });
254    }
255}
256
257struct Codeblock {
258    pub lang: String,
259    // All string of codeblock
260    pub data: String,
261    // Code string of codeblock
262    pub code: String,
263}
264
265impl Codeblock {
266    // Update codeblock data replace code as new code.
267    pub fn update_data(&mut self, new_code: &str) {
268        self.data = self.data.replace(&self.code, new_code);
269        self.code = new_code.to_string();
270    }
271
272    pub fn from_pair<R: RuleType>(item: Pair<R>) -> Codeblock {
273        let mut codeblock = Codeblock {
274            lang: String::new(),
275            data: String::new(),
276            code: String::new(),
277        };
278
279        codeblock.data = item.as_str().to_string();
280
281        for child in item.into_inner() {
282            match child.as_rule().to_string().as_str() {
283                "codeblock_lang" => {
284                    codeblock.lang = child.as_str().to_string();
285                }
286                "codeblock_code" => {
287                    codeblock.code = child.as_str().to_string();
288                }
289                _ => {}
290            }
291        }
292
293        codeblock
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use indoc::indoc;
301    use pretty_assertions::assert_eq;
302
303    #[test]
304    fn test_format_for() {
305        let mut raw = "// Hello你好";
306        let mut result = format_for(raw, "rust");
307        assert_eq!(result.out, "// Hello 你好");
308
309        result = format_for(raw, "js");
310        assert_eq!(result.out, "// Hello 你好");
311
312        result = format_for(raw, "ruby");
313        assert_eq!(result.out, "// Hello你好");
314
315        raw = "// Hello你好";
316        result = format_for(raw, "not-exist-type");
317        assert_eq!(result.out, raw);
318    }
319
320    #[test]
321    fn test_lint_for() {
322        let mut raw = "// Hello你好";
323        let mut result = lint_for(raw, "rust");
324        assert_eq!(result.lines.len(), 1);
325
326        result = lint_for(raw, "js");
327        assert_eq!(result.lines.len(), 1);
328
329        result = lint_for(raw, "ruby");
330        assert_eq!(result.lines.len(), 0);
331
332        raw = "// Hello你好";
333        result = lint_for(raw, "not-exist-type");
334        assert_eq!(result.lines.len(), 0);
335    }
336
337    #[test]
338    fn test_codeblock() {
339        let mut codeblock = Codeblock {
340            data: "```rb\nhello\n```".to_string(),
341            code: "\nhello\n".to_string(),
342            lang: "rb".to_string(),
343        };
344
345        codeblock.update_data("\nhello world\n");
346        assert_eq!(codeblock.data, "```rb\nhello world\n```".to_string());
347        assert_eq!(codeblock.code, "\nhello world\n".to_string());
348    }
349
350    #[test]
351    fn test_inline_script_line_number() {
352        let raw = indoc! { r###"
353        Hello world
354
355        ```ts
356        // hello世界
357        const a = "string字符串";
358        ```
359
360        ### 外部test
361
362        Second line
363
364        ```rb
365        class User
366            # 查找user
367            def find
368            end
369        end
370        ```
371        "###};
372
373        let expected = indoc! { r###"
374        {
375          "filepath": "md",
376          "lines": [
377              {
378              "l": 4,
379              "c": 1,
380              "new": "// hello 世界",
381              "old": "// hello世界",
382              "severity": 1
383              },
384              {
385              "l": 5,
386              "c": 11,
387              "new": "\"string 字符串\"",
388              "old": "\"string字符串\"",
389              "severity": 1
390              },
391              {
392              "l": 8,
393              "c": 5,
394              "new": "外部 test",
395              "old": "外部test",
396              "severity": 1
397              },
398              {
399              "l": 14,
400              "c": 5,
401              "new": "# 查找 user",
402              "old": "# 查找user",
403              "severity": 1
404              }
405          ],
406          "error": ""
407        }
408        "###};
409
410        let result = lint_for(raw, "md");
411        assert_json_eq!(expected, result.to_json_pretty());
412    }
413
414    #[test]
415    fn test_disable_rules_all() {
416        let raw = r#"// autocorrect-disable
417        // hello世界
418        // autocorrect-enable
419        // hello世界
420        // autocorrect-disable space-word
421        // hello世界.
422        // autocorrect-disable fullwidth
423        // hello世界.
424        // autocorrect-disable space-word,fullwidth
425        // hello世界.
426        const a = "hello世界."
427        “"#;
428
429        let expected = r#"// autocorrect-disable
430        // hello世界
431        // autocorrect-enable
432        // hello 世界
433        // autocorrect-disable space-word
434        // hello世界。
435        // autocorrect-disable fullwidth
436        // hello 世界.
437        // autocorrect-disable space-word,fullwidth
438        // hello世界.
439        const a = "hello世界."
440        “"#;
441
442        assert_eq!(expected, format_for(raw, "js").out);
443        let result = lint_for(raw, "js");
444        assert_eq!(result.lines.len(), 3);
445        assert_eq!(result.lines[0].new, "// hello 世界");
446        assert_eq!(result.lines[1].new, "// hello世界。");
447        assert_eq!(result.lines[2].new, "// hello 世界.");
448    }
449}