Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct Token(String);
5
6impl Deref for Token {
7    type Target = str;
8    fn deref(&self) -> &str {
9        &self.0
10    }
11}
12
13#[derive(Copy, Clone)]
14pub struct WordSet(&'static [&'static str]);
15
16impl WordSet {
17    pub const fn new(words: &'static [&'static str]) -> Self {
18        let mut i = 1;
19        while i < words.len() {
20            assert!(
21                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
22                "WordSet: entries must be sorted, no duplicates"
23            );
24            i += 1;
25        }
26        Self(words)
27    }
28
29    pub const fn flags(words: &'static [&'static str]) -> Self {
30        let mut i = 0;
31        while i < words.len() {
32            let b = words[i].as_bytes();
33            assert!(b.len() >= 2, "WordSet::flags: flag too short (need at least 2 chars)");
34            assert!(b[0] == b'-', "WordSet::flags: flag must start with '-'");
35            if b[1] == b'-' {
36                assert!(b.len() >= 3, "WordSet::flags: long flag needs at least 3 chars (e.g. --x)");
37            }
38            i += 1;
39        }
40        Self::new(words)
41    }
42
43    pub fn contains(&self, s: &str) -> bool {
44        self.0.binary_search(&s).is_ok()
45    }
46
47    pub fn contains_short(&self, b: u8) -> bool {
48        let target = [b'-', b];
49        std::str::from_utf8(&target).is_ok_and(|s| self.0.binary_search(&s).is_ok())
50    }
51
52    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
53        self.0.iter().copied()
54    }
55}
56
57const fn const_less(a: &[u8], b: &[u8]) -> bool {
58    let min = if a.len() < b.len() { a.len() } else { b.len() };
59    let mut i = 0;
60    while i < min {
61        if a[i] < b[i] {
62            return true;
63        }
64        if a[i] > b[i] {
65            return false;
66        }
67        i += 1;
68    }
69    a.len() < b.len()
70}
71
72impl Token {
73    pub(crate) fn from_raw(s: String) -> Self {
74        Self(s)
75    }
76
77    #[cfg(test)]
78    pub(crate) fn from_test(s: &str) -> Self {
79        Self(s.to_string())
80    }
81
82    pub fn as_str(&self) -> &str {
83        &self.0
84    }
85
86    pub fn command_name(&self) -> &str {
87        let s = self.as_str();
88        if s.starts_with('@') {
89            return s;
90        }
91        s.rsplit('/').next().unwrap_or(s)
92    }
93
94    pub fn is_one_of(&self, options: &[&str]) -> bool {
95        options.contains(&self.as_str())
96    }
97
98    pub fn split_value(&self, sep: &str) -> Option<&str> {
99        self.as_str().split_once(sep).map(|(_, v)| v)
100    }
101
102    pub fn content_outside_double_quotes(&self) -> String {
103        let bytes = self.as_str().as_bytes();
104        let mut result = Vec::with_capacity(bytes.len());
105        let mut i = 0;
106        while i < bytes.len() {
107            if bytes[i] == b'"' {
108                result.push(b' ');
109                i += 1;
110                while i < bytes.len() {
111                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
112                        i += 2;
113                        continue;
114                    }
115                    if bytes[i] == b'"' {
116                        i += 1;
117                        break;
118                    }
119                    i += 1;
120                }
121            } else {
122                result.push(bytes[i]);
123                i += 1;
124            }
125        }
126        String::from_utf8(result).unwrap_or_default()
127    }
128}
129
130impl PartialEq<str> for Token {
131    fn eq(&self, other: &str) -> bool {
132        self.0 == other
133    }
134}
135
136impl PartialEq<&str> for Token {
137    fn eq(&self, other: &&str) -> bool {
138        self.0 == *other
139    }
140}
141
142impl PartialEq<Token> for str {
143    fn eq(&self, other: &Token) -> bool {
144        self == other.as_str()
145    }
146}
147
148impl PartialEq<Token> for &str {
149    fn eq(&self, other: &Token) -> bool {
150        *self == other.as_str()
151    }
152}
153
154impl std::fmt::Display for Token {
155    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156        f.write_str(&self.0)
157    }
158}
159
160pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
161    for token in &tokens[1..] {
162        if token == "--" {
163            return false;
164        }
165        if let Some(long_flag) = long
166            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
167        {
168            return true;
169        }
170        if let Some(short_flag) = short {
171            let short_char = short_flag.trim_start_matches('-');
172            if token.starts_with('-')
173                && !token.starts_with("--")
174                && token[1..].contains(short_char)
175            {
176                return true;
177            }
178        }
179    }
180    false
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    fn tok(s: &str) -> Token {
188        Token(s.to_string())
189    }
190
191    fn toks(words: &[&str]) -> Vec<Token> {
192        words.iter().map(|s| tok(s)).collect()
193    }
194
195    #[test]
196    fn has_flag_short() {
197        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
198        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
199    }
200
201    #[test]
202    fn has_flag_long_with_eq() {
203        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
204        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
205    }
206
207    #[test]
208    fn has_flag_combined_short() {
209        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
210        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
211    }
212
213    #[test]
214    fn has_flag_stops_at_double_dash() {
215        let tokens = toks(&["cmd", "--", "-i"]);
216        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
217    }
218
219    #[test]
220    fn has_flag_long_only() {
221        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
222        assert!(has_flag(&tokens, None, Some("--compress-program")));
223    }
224
225    #[test]
226    fn has_flag_long_only_eq() {
227        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
228        assert!(has_flag(&tokens, None, Some("--compress-program")));
229    }
230
231    #[test]
232    fn has_flag_long_only_absent() {
233        let tokens = toks(&["sort", "-r", "file.txt"]);
234        assert!(!has_flag(&tokens, None, Some("--compress-program")));
235    }
236
237    #[test]
238    fn command_name_simple() {
239        assert_eq!(tok("ls").command_name(), "ls");
240    }
241
242    #[test]
243    fn command_name_with_path() {
244        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
245    }
246
247    #[test]
248    fn command_name_relative_path() {
249        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
250    }
251
252    #[test]
253    fn command_name_scoped_package() {
254        assert_eq!(tok("@herb-tools/linter").command_name(), "@herb-tools/linter");
255    }
256
257    #[test]
258    fn reverse_partial_eq() {
259        let t = tok("hello");
260        assert!("hello" == t);
261        assert!("world" != t);
262    }
263
264    #[test]
265    fn token_deref() {
266        let t = tok("--flag");
267        assert!(t.starts_with("--"));
268        assert!(t.contains("fl"));
269        assert_eq!(t.len(), 6);
270    }
271
272    #[test]
273    fn token_is_one_of() {
274        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
275        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
276    }
277
278    #[test]
279    fn token_split_value() {
280        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
281        assert_eq!(tok("--flag").split_value("="), None);
282    }
283
284    #[test]
285    fn word_set_contains() {
286        let set = WordSet::new(&["list", "show", "view"]);
287        assert!(set.contains(&tok("list")));
288        assert!(set.contains(&tok("view")));
289        assert!(!set.contains(&tok("delete")));
290    }
291
292    #[test]
293    fn word_set_iter() {
294        let set = WordSet::new(&["a", "b", "c"]);
295        let items: Vec<&str> = set.iter().collect();
296        assert_eq!(items, vec!["a", "b", "c"]);
297    }
298
299    #[test]
300    fn content_outside_double_quotes_strips_string() {
301        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
302    }
303
304    #[test]
305    fn content_outside_double_quotes_preserves_code() {
306        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
307        assert_eq!(result, r#"{print  } END{print NR}"#);
308    }
309
310    #[test]
311    fn content_outside_double_quotes_escaped() {
312        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
313        assert_eq!(result, "{print  }");
314    }
315
316    #[test]
317    fn content_outside_double_quotes_no_quotes() {
318        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
319    }
320
321    #[test]
322    fn content_outside_double_quotes_empty() {
323        assert_eq!(tok("").content_outside_double_quotes(), "");
324    }
325}