hostlist_parser/
lib.rs

1// Copyright (c) 2022 DDN. All rights reserved.
2// Use of this source code is governed by a MIT-style
3// license that can be found in the LICENSE file.
4
5mod structures;
6
7use crate::structures::{flatten_ranges, Part, RangeOutput};
8use combine::{
9    attempt, between, choice, eof,
10    error::{ParseError, StreamError},
11    many1, not_followed_by, optional,
12    parser::{
13        char::{alpha_num, digit, spaces},
14        combinator::ignore,
15        repeat::repeat_until,
16        EasyParser,
17    },
18    sep_by1,
19    stream::{Stream, StreamErrorFor},
20    token, Parser,
21};
22use itertools::Itertools as _;
23
24fn comma<I>() -> impl Parser<I, Output = char>
25where
26    I: Stream<Token = char>,
27    I::Error: ParseError<I::Token, I::Range, I::Position>,
28{
29    token(',')
30}
31
32fn open_bracket<I>() -> impl Parser<I, Output = char>
33where
34    I: Stream<Token = char>,
35    I::Error: ParseError<I::Token, I::Range, I::Position>,
36{
37    token('[')
38}
39
40fn close_bracket<I>() -> impl Parser<I, Output = char>
41where
42    I: Stream<Token = char>,
43    I::Error: ParseError<I::Token, I::Range, I::Position>,
44{
45    token(']')
46}
47
48fn dash<I>() -> impl Parser<I, Output = char>
49where
50    I: Stream<Token = char>,
51    I::Error: ParseError<I::Token, I::Range, I::Position>,
52{
53    token('-')
54}
55
56fn optional_spaces<I>() -> impl Parser<I, Output = Option<()>>
57where
58    I: Stream<Token = char>,
59    I::Error: ParseError<I::Token, I::Range, I::Position>,
60{
61    optional(spaces())
62}
63
64fn host_elements<I>() -> impl Parser<I, Output = String>
65where
66    I: Stream<Token = char>,
67    I::Error: ParseError<I::Token, I::Range, I::Position>,
68{
69    many1(alpha_num().or(dash()).or(token('.')))
70}
71
72fn digits<I>() -> impl Parser<I, Output = String>
73where
74    I: Stream<Token = char>,
75    I::Error: ParseError<I::Token, I::Range, I::Position>,
76{
77    many1(digit())
78}
79
80fn leading_zeros<I>() -> impl Parser<I, Output = (usize, u64)>
81where
82    I: Stream<Token = char>,
83    I::Error: ParseError<I::Token, I::Range, I::Position>,
84{
85    digits().and_then(|x| {
86        let mut digits = x.chars().take_while(|x| x == &'0').count();
87
88        if x.len() == digits {
89            digits -= 1;
90        }
91
92        x.parse::<u64>()
93            .map(|num| (digits, num))
94            .map_err(StreamErrorFor::<I>::other)
95    })
96}
97
98fn range_digits<I>() -> impl Parser<I, Output = RangeOutput>
99where
100    I: Stream<Token = char>,
101    I::Error: ParseError<I::Token, I::Range, I::Position>,
102{
103    attempt((
104        leading_zeros(),
105        optional_spaces().with(dash()),
106        optional_spaces().with(leading_zeros()),
107    ))
108    .and_then(|((start_zeros, start), _, (end_zeros, end))| {
109        let mut xs = [start, end];
110        xs.sort_unstable();
111
112        let same_prefix_len = start_zeros == end_zeros;
113
114        let (range, start_zeros, end_zeros) = if start > end {
115            (
116                RangeOutput::RangeReversed(end_zeros, same_prefix_len, end, start),
117                end_zeros,
118                start_zeros,
119            )
120        } else {
121            (
122                RangeOutput::Range(start_zeros, same_prefix_len, start, end),
123                start_zeros,
124                end_zeros,
125            )
126        };
127
128        if end_zeros > start_zeros {
129            Err(StreamErrorFor::<I>::unexpected_static_message(
130                "larger end padding",
131            ))
132        } else {
133            Ok(range)
134        }
135    })
136}
137
138fn disjoint_digits<I>() -> impl Parser<I, Output = RangeOutput>
139where
140    I: Stream<Token = char>,
141    I::Error: ParseError<I::Token, I::Range, I::Position>,
142{
143    let not_name = not_followed_by(
144        optional_spaces()
145            .with(digits())
146            .skip(optional_spaces())
147            .skip(dash())
148            .map(|_| ""),
149    );
150
151    sep_by1(
152        optional_spaces()
153            .with(leading_zeros())
154            .skip(optional_spaces()),
155        attempt(comma().skip(not_name)),
156    )
157    .map(RangeOutput::Disjoint)
158}
159
160fn range<I>() -> impl Parser<I, Output = Vec<RangeOutput>>
161where
162    I: Stream<Token = char>,
163    I::Error: ParseError<I::Token, I::Range, I::Position>,
164{
165    between(
166        open_bracket(),
167        close_bracket(),
168        sep_by1(range_digits().or(disjoint_digits()), comma()),
169    )
170}
171
172fn hostlist<I>() -> impl Parser<I, Output = Vec<Part>>
173where
174    I: Stream<Token = char>,
175    I::Error: ParseError<I::Token, I::Range, I::Position>,
176{
177    repeat_until(
178        choice([
179            range().map(Part::Range).left(),
180            optional_spaces()
181                .with(host_elements())
182                .map(Part::String)
183                .right(),
184        ]),
185        attempt(optional_spaces().skip(ignore(comma()).or(eof()))),
186    )
187    .and_then(|xs: Vec<_>| {
188        if xs.is_empty() {
189            Err(StreamErrorFor::<I>::unexpected_static_message(
190                "no host found",
191            ))
192        } else {
193            Ok(xs)
194        }
195    })
196}
197
198fn hostlists<I>() -> impl Parser<I, Output = Vec<Vec<Part>>>
199where
200    I: Stream<Token = char>,
201    I::Error: ParseError<I::Token, I::Range, I::Position>,
202{
203    sep_by1(hostlist(), optional_spaces().with(comma()))
204}
205
206pub fn parse(input: &str) -> Result<Vec<String>, combine::stream::easy::Errors<char, &str, usize>> {
207    let (hosts, _) = hostlists()
208        .easy_parse(input)
209        .map_err(|err| err.map_position(|p| p.translate_position(input)))?;
210
211    let mut xs = vec![];
212
213    for parts in hosts {
214        let x_prod: Vec<_> = parts
215            .iter()
216            .filter_map(Part::get_ranges)
217            .map(|xs| flatten_ranges(xs))
218            .multi_cartesian_product()
219            .collect();
220
221        // No ranges means no interpolation
222        if x_prod.is_empty() {
223            let mut s = String::new();
224
225            for p in parts.clone() {
226                if let Part::String(x) = p {
227                    s.push_str(&x)
228                }
229            }
230
231            xs.push(s);
232        } else {
233            for ys in x_prod {
234                let mut it = ys.iter();
235
236                let mut s = String::new();
237
238                for p in parts.clone() {
239                    match p {
240                        Part::String(x) => s.push_str(&x),
241                        Part::Range(_) => s.push_str(it.next().unwrap()),
242                    }
243                }
244
245                xs.push(s);
246            }
247        }
248    }
249
250    Ok(xs.into_iter().unique().collect())
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use combine::parser::EasyParser;
257    use insta::assert_debug_snapshot;
258
259    #[test]
260    fn test_leading_zeros() {
261        assert_debug_snapshot!(leading_zeros().easy_parse("001"));
262        assert_debug_snapshot!(leading_zeros().easy_parse("0001"));
263        assert_debug_snapshot!(leading_zeros().easy_parse("01"));
264        assert_debug_snapshot!(leading_zeros().easy_parse("00"));
265        assert_debug_snapshot!(leading_zeros().easy_parse("0"));
266        assert_debug_snapshot!(leading_zeros().easy_parse("042"));
267        assert_debug_snapshot!(leading_zeros().easy_parse("042"));
268    }
269
270    #[test]
271    fn test_range_digits() {
272        assert_debug_snapshot!(range_digits().easy_parse("001-003"));
273        assert_debug_snapshot!(range_digits().easy_parse("001 -  003"));
274        assert_debug_snapshot!(range_digits().easy_parse("1-100"));
275        assert_debug_snapshot!(range_digits().easy_parse("100-0"));
276    }
277
278    #[test]
279    fn test_disjoint_digits() {
280        assert_debug_snapshot!(disjoint_digits().easy_parse("1,2,3,4,5]"));
281        assert_debug_snapshot!(disjoint_digits().easy_parse("1,2,3-5"));
282        assert_debug_snapshot!(disjoint_digits().easy_parse("1,2,006,0007,3-5"));
283    }
284
285    #[test]
286    fn test_range() {
287        assert_debug_snapshot!(range().easy_parse("[1,2,3,4,5]"));
288        assert_debug_snapshot!(range().easy_parse("[1,2,3-5]"));
289        assert_debug_snapshot!(range().easy_parse("[1,2,3-5,6,7,8-10]"));
290        assert_debug_snapshot!(range().easy_parse("[01-10]"));
291    }
292
293    #[test]
294    fn test_hostlist() {
295        assert_debug_snapshot!(hostlist().easy_parse("oss1.local"));
296        assert_debug_snapshot!(hostlist().easy_parse("oss[1,2].local"));
297        assert_debug_snapshot!(hostlist().easy_parse(
298            "hostname[2,6,7].iml.com,hostname[10,11-12,2-3,5].iml.com,hostname[15-17].iml.com"
299        ));
300    }
301
302    #[test]
303    fn test_hostlists() {
304        assert_debug_snapshot!(hostlists().easy_parse("oss1.local"));
305        assert_debug_snapshot!(hostlists().easy_parse("oss[1,2].local"));
306        assert_debug_snapshot!(hostlists().easy_parse(
307            "hostname[2,6,7].iml.com,hostname[10,11-12,2-3,5].iml.com,hostname[15-17].iml.com"
308        ));
309        assert_debug_snapshot!(hostlists().easy_parse(
310            "hostname[2,6,7].iml.com, hostname[10,11-12,2-3,5].iml.com, hostname[15-17].iml.com"
311        ));
312    }
313
314    #[test]
315    fn test_parse() {
316        assert_debug_snapshot!(parse("oss[1,2].local"));
317
318        assert_debug_snapshot!(parse("oss1.local"));
319
320        assert_debug_snapshot!(parse("hostname[10,11-12,002-003,5].iml.com"));
321
322        assert_debug_snapshot!(parse(
323            "hostname[2,6,7].iml.com,hostname[10,11-12,2-3,5].iml.com,hostname[15-17].iml.com"
324        ));
325
326        assert_debug_snapshot!(parse(
327            "hostname[2,6,7].iml.com,  hostname[10,11-12,2-3,5].iml.com, hostname[15-17].iml.com"
328        ));
329
330        assert_debug_snapshot!("single item without ranges", parse("hostname1.iml.com"));
331
332        assert_debug_snapshot!(
333            "two items without ranges",
334            parse("hostname1.iml.com, hostname2.iml.com")
335        );
336
337        assert_debug_snapshot!(
338            "single item with single range",
339            parse("hostname[6].iml.com")
340        );
341
342        assert_debug_snapshot!(
343            "single item with single range and nothing after the range",
344            parse("hostname[6]")
345        );
346
347        assert_debug_snapshot!(
348            "single item with single digit prefixed range",
349            parse("hostname[09-11]")
350        );
351        assert_debug_snapshot!(
352            "single item with double digit prefixed range",
353            parse("hostname[009-011]")
354        );
355
356        assert_debug_snapshot!("single item in reverse order", parse("hostname[7-5]"));
357
358        assert_debug_snapshot!(
359            "multiple items combined into regular and reverse order",
360            parse("hostname[7-5], hostname[8,9], hostname[3,2,1]")
361        );
362
363        assert_debug_snapshot!("long range with prefix", parse("hostname[001-999]"));
364
365        assert_debug_snapshot!(
366            "single item with two ranges",
367            parse("hostname[6,7]-[9-11].iml.com")
368        );
369
370        assert_debug_snapshot!(
371            "single item with range containing mixture of comma and dash",
372            parse("hostname[7,9-11].iml.com")
373        );
374
375        assert_debug_snapshot!(
376            "Single range per hostname with dup",
377            parse(
378                "hostname[2,6,7].iml.com,hostname[10,11-12,2-4,5].iml.com, hostname[15-17].iml.com"
379            )
380        );
381
382        assert_debug_snapshot!("Multiple ranges per hostname in which the difference is 1", parse("hostname[1,2-3].iml[2,3].com,hostname[3,4,5].iml[2,3].com,hostname[5-6,7].iml[2,3].com"));
383
384        assert_debug_snapshot!(
385            "Multiple ranges per hostname in which the difference is 1 two formats",
386            parse("hostname[1,2-3].iml[2,3].com,hostname[1,2,3].iml[2,4].com")
387        );
388
389        assert_debug_snapshot!(
390            "Multiple ranges per hostname in which the difference is gt 1",
391            parse("hostname[1,2-3].iml[2,3].com,hostname[4,5].iml[3,4].com")
392        );
393
394        assert_debug_snapshot!(
395            "no prefix to prefix should throw an error",
396            parse("hostname[9-0011]").unwrap_err()
397        );
398
399        assert_debug_snapshot!(
400            "Overlapping ranges",
401            parse("hostname[1,2-3].iml[2,3].com,hostname[3,4,5].iml[3,4].com")
402        );
403
404        assert_debug_snapshot!(
405            "Duplicate without using a range",
406            parse("hostname4.iml.com,hostname4.iml.com")
407        );
408
409        assert_debug_snapshot!(
410            "Single item with single range and additional characters after range",
411            parse("hostname[0-3]-eth0.iml.com")
412        );
413
414        assert_debug_snapshot!(
415            "Single item with three ranges separated by character",
416            parse("hostname[1,2]-[3-4]-[5,6].iml.com")
417        );
418
419        assert_debug_snapshot!(
420            "Single item with two ranges and no separation between the ranges",
421            parse("hostname[1,2][3,4].iml.com")
422        );
423
424        assert_debug_snapshot!(
425            "Single item with prefix range starting at 0",
426            parse("test[000-002].localdomain")
427        );
428
429        assert_debug_snapshot!(
430            "Combined Ranges",
431            parse(
432                "hostname[2,6,7].iml.com,hostname[10,11-12,2-3,5].iml.com, hostname[15-17].iml.com"
433            )
434        );
435
436        assert_debug_snapshot!(
437            "Padding with a single and double digit number",
438            parse("hostname[06-10]")
439        );
440
441        assert_debug_snapshot!(
442            "Invalid character in range (snowman)",
443            parse("test[00☃-002].localdomain")
444        );
445
446        assert_debug_snapshot!("Empty expression", parse(""));
447
448        assert_debug_snapshot!(
449            "No separation between comma's",
450            parse("hostname[1,,2].iml.com")
451        );
452
453        assert_debug_snapshot!(
454            "No separation between dashes",
455            parse("hostname[1--2].iml.com")
456        );
457
458        assert_debug_snapshot!(
459            "No separation between dash and comma",
460            parse("hostname[1-,2].iml.com")
461        );
462
463        assert_debug_snapshot!(
464            "No separation between comma and dash",
465            parse("hostname[1,-2].iml.com")
466        );
467
468        assert_debug_snapshot!("Missing closing brace", parse("hostname[1"));
469
470        assert_debug_snapshot!("Ending an expression with a comma", parse("hostname[1],"));
471
472        assert_debug_snapshot!(
473            "Beginning and ending prefixes don't match two single digit numbers",
474            parse("hostname[01-009]")
475        );
476
477        assert_debug_snapshot!(
478            "Having a closing brace before an opening brace",
479            parse("hostname]00[asdf")
480        );
481    }
482
483    #[test]
484    fn test_parse_large_expression() {
485        let xs = parse("atla-pio-03-o[048-051],atla-pio-05-o[052-055],atla-pio-07-o[056-059],atla-pio-09-o[060-063],atla-pio-11-o[064-067],atla-pio-13-o[068-071],atla-pip-03-o[072-075],atla-pip-05-o[076-079],atla-pip-07-o[080-083],atla-pip-11-o[088-091],atla-pip-13-o[092-095],atla-pip-09-o[085,087],atla-piq-03-o[096-099],atla-piq-05-o[100-103],atla-piq-07-o[104-107],atla-piq-09-o[108-111],atla-piq-11-o[112-115],atla-piq-13-o[116-119],atla-pir-03-o[120-123],atla-pir-05-o[124-127],atla-pir-07-o[128-131],atla-pir-09-o[132-135],atla-pir-11-o[136-139],atla-pir-13-o[140-143],atla-pis-03-m[000-003],atla-pis-05-o[000-003],atla-pis-07-o[004-007],atla-pis-09-o[008-011],atla-pis-11-o[012-015],atla-pis-13-o[016-019],atla-pis-15-o[020-023],atla-pit-03-m[004-007],atla-pit-05-o[024-027],atla-pit-07-o[028-031],atla-pit-09-o[032-035],atla-pit-11-o[036-039],atla-pit-15-o[044-047],atla-pit-13-o[040,042]").unwrap();
486
487        assert_debug_snapshot!("Large expression", xs);
488    }
489
490    #[test]
491
492    fn test_parse_osts() {
493        assert_debug_snapshot!("Leading 0s", parse("OST01[00,01]"));
494    }
495}