rof_rs/object_format/
ignore_str_split.rs

1#[derive(Clone)]
2pub enum SplitIgnoreRuleType {
3    PAIR(char), /* Won't split when within a pair of this character, eg single or double quotation marks */
4    NEST(
5        char, /* Start new nest, eg opening curly bracker */
6        char, /* End nest, eg closing curly bracket */
7    ), /* Won't split when within a nest of characters, difference from pair is that it supports multiple nests, and will only allowing splitting when all of those nests have been closed */
8}
9
10#[derive(Clone)]
11pub struct SplitIgnoreRule {
12    split_ignore_rule_type: SplitIgnoreRuleType,
13    within_pair: bool,
14    nest_index: usize,
15    encapsulates_raw_text: bool,
16}
17
18impl SplitIgnoreRule {
19    pub fn new(split_ignore_rule_type: SplitIgnoreRuleType) -> Self {
20        Self {
21            split_ignore_rule_type,
22            within_pair: false,
23            nest_index: 0,
24            encapsulates_raw_text: false,
25        }
26    }
27
28    // Defines whether other split ignore rules should be ignored and whether escape characters can be used to ignore all rule checks, and all split checks
29
30    pub fn set_ecapsulates_raw_text(mut self, encapsulates_raw_text: bool) -> Self {
31        self.encapsulates_raw_text = encapsulates_raw_text;
32
33        self
34    }
35
36    pub fn read_char(&mut self, character: char) {
37        match self.split_ignore_rule_type {
38            SplitIgnoreRuleType::PAIR(pair_character) => {
39                if character == pair_character {
40                    self.within_pair = !self.within_pair;
41                }
42            }
43            SplitIgnoreRuleType::NEST(nest_start, nest_end) => {
44                if character == nest_start {
45                    self.nest_index += 1;
46                }
47
48                if character == nest_end {
49                    if self.nest_index > 0 {
50                        self.nest_index -= 1;
51                    }
52                }
53            }
54        }
55    }
56
57    pub fn in_raw_text(&self) -> bool {
58        self.should_ignore() && self.encapsulates_raw_text
59    }
60
61    pub fn should_ignore(&self) -> bool {
62        self.within_pair || self.nest_index > 0
63    }
64}
65
66// Used for splitting struct properties at semicolons, but not splitting at the semicolons when within a pair of single or double quotes, escape sequences are also checked so that single and double quotes can be written in strings without confusing the compiler
67
68fn ignoring_compliant_split_str_max_splits(
69    input_str: &str,
70    split_character: char,
71    retain_backslashes: bool,
72    ignore_rules: Vec<SplitIgnoreRule>,
73    max_splits: Option<usize>,
74) -> Vec<String> {
75    let mut str_fragments: Vec<String> = Vec::new();
76
77    let mut built_str_fragment: String = String::new();
78
79    let mut ignore_rules = ignore_rules.clone();
80
81    let mut char_iter = input_str.chars();
82
83    let mut splits: usize = 0;
84
85    loop {
86        match char_iter.next() {
87            Some(str_char) => {
88                if ignore_rules
89                    .iter()
90                    .any(|ignore_rule| ignore_rule.in_raw_text())
91                {
92                    // Only check for rules activating the raw text state
93
94                    ignore_rules
95                        .iter_mut()
96                        .filter(|ignore_rule| ignore_rule.in_raw_text())
97                        .for_each(|ignore_rule| ignore_rule.read_char(str_char));
98                } else {
99                    // Check for any rule
100
101                    ignore_rules
102                        .iter_mut()
103                        .for_each(|ignore_rule| ignore_rule.read_char(str_char));
104                }
105
106                if str_char == '\\' {
107                    match char_iter.next() {
108                        Some(escape_char) => {
109                            if retain_backslashes {
110                                built_str_fragment.push('\\');
111                            }
112
113                            built_str_fragment.push(escape_char);
114                        }
115                        None => (),
116                    }
117                } else if str_char == split_character
118                    && !ignore_rules
119                        .iter()
120                        .any(|ignore_rule| ignore_rule.should_ignore())
121                    && (max_splits.is_none() || splits < max_splits.unwrap())
122                {
123                    str_fragments.push(built_str_fragment.clone());
124
125                    splits += 1;
126
127                    built_str_fragment.clear();
128                } else {
129                    built_str_fragment.push(str_char);
130                }
131            }
132            None => break,
133        }
134    }
135
136    if !built_str_fragment.is_empty() {
137        str_fragments.push(built_str_fragment);
138    }
139
140    str_fragments
141}
142
143pub fn ignoring_compliant_split_str<'a>(
144    input_str: &str,
145    split_character: char,
146    retain_backslashes: bool,
147    ignore_rules: Vec<SplitIgnoreRule>,
148) -> Vec<String> {
149    ignoring_compliant_split_str_max_splits(
150        input_str,
151        split_character,
152        retain_backslashes,
153        ignore_rules,
154        None,
155    )
156}
157
158pub fn ignoring_compliant_split_once(
159    input_str: &str,
160    split_character: char,
161    retain_backslashes: bool,
162    ignore_rules: Vec<SplitIgnoreRule>,
163) -> Option<(String, String)> {
164    let split_data = ignoring_compliant_split_str_max_splits(
165        input_str,
166        split_character,
167        retain_backslashes,
168        ignore_rules,
169        Some(1),
170    );
171
172    match split_data.len() {
173        2 => Some((
174            split_data.get(0).unwrap().clone(),
175            split_data.get(1).unwrap().clone(),
176        )),
177        _ => None,
178    }
179}