tex_fmt/
regexes.rs

1//! Regexes and matching utilities
2
3use crate::LINE_END;
4use regex::{Regex, RegexSet};
5use std::sync::LazyLock;
6
7/// Match a LaTeX \item
8pub const ITEM: &str = "\\item";
9
10/// Match a LaTeX \begin{...}
11pub const ENV_BEGIN: &str = "\\begin{";
12
13/// Match a LaTeX \end{...}
14pub const ENV_END: &str = "\\end{";
15
16/// Acceptable LaTeX file extensions
17pub const EXTENSIONS: [&str; 4] = [".tex", ".bib", ".sty", ".cls"];
18/// Match a LaTeX \verb|...|
19pub const VERB: &str = "\\verb|";
20
21/// Regex matches for sectioning commands
22const SPLITTING: [&str; 6] = [
23    r"\\begin\{",
24    r"\\end\{",
25    r"\\item(?:$|[^a-zA-Z])",
26    r"\\(?:sub){0,2}section\*?\{",
27    r"\\chapter\*?\{",
28    r"\\part\*?\{",
29];
30
31// A static `String` which is a regex to match any of [`SPLITTING_COMMANDS`].
32static SPLITTING_STRING: LazyLock<String> =
33    LazyLock::new(|| ["(", SPLITTING.join("|").as_str(), ")"].concat());
34
35// Regex to match newlines
36pub static RE_NEWLINES: LazyLock<Regex> = LazyLock::new(|| {
37    Regex::new(&format!(r"{LINE_END}{LINE_END}({LINE_END})+")).unwrap()
38});
39
40// Regex to match trailing new ines
41pub static RE_TRAIL: LazyLock<Regex> =
42    LazyLock::new(|| Regex::new(&format!(r" +{LINE_END}")).unwrap());
43
44// Regex that matches splitting commands
45pub static RE_SPLITTING: LazyLock<RegexSet> =
46    LazyLock::new(|| RegexSet::new(SPLITTING).unwrap());
47
48// Matches splitting commands with non-whitespace characters before it.
49pub static RE_SPLITTING_SHARED_LINE: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(
51        [r"(:?\S.*?)", "(:?", SPLITTING_STRING.as_str(), ".*)"]
52            .concat()
53            .as_str(),
54    )
55    .unwrap()
56});
57
58// Matches any splitting command with non-whitespace
59// characters before it, catches the previous text in a group called
60// "prev" and captures the command itself and the remaining text
61// in a group called "env".
62pub static RE_SPLITTING_SHARED_LINE_CAPTURE: LazyLock<Regex> =
63    LazyLock::new(|| {
64        Regex::new(
65            [
66                r"(?P<prev>\S.*?)",
67                "(?P<env>",
68                SPLITTING_STRING.as_str(),
69                ".*)",
70            ]
71            .concat()
72            .as_str(),
73        )
74        .unwrap()
75    });