clitest_lib/parser/
v0.rs

1use grok::Grok;
2use std::sync::Arc;
3
4use crate::command::CommandLine;
5use crate::output::*;
6use crate::script::*;
7use crate::util::ShellBit;
8use crate::util::shell_split;
9
10#[derive(Debug, Clone, derive_more::IsVariant, derive_more::Unwrap)]
11enum BlockType {
12    /// A command block.
13    Command(CommandLine),
14    /// Comments and whitespace lines.
15    Ineffectual,
16    /// Pattern lines.
17    Pattern,
18    /// Meta lines (`%EXPECT_FAILURE`, `%EXIT`, etc.).
19    Meta,
20    /// Any (`*`) block
21    Any,
22}
23
24impl BlockType {
25    fn is_same_type_as(&self, other: &Self) -> bool {
26        match (self, other) {
27            (BlockType::Command(_), BlockType::Command(_)) => true,
28            (BlockType::Ineffectual, BlockType::Ineffectual) => true,
29            (BlockType::Pattern, BlockType::Pattern) => true,
30            (BlockType::Meta, BlockType::Meta) => true,
31            (BlockType::Any, BlockType::Any) => true,
32            _ => false,
33        }
34    }
35}
36
37struct ScriptV0Block {
38    location: ScriptLocation,
39    block_type: BlockType,
40    lines: Vec<ScriptLine>,
41}
42
43impl ScriptV0Block {
44    /// Take the current block, replacing with an empty block at the given location.
45    pub fn take(&mut self, location: ScriptLocation, block_type: BlockType) -> Self {
46        Self {
47            location: std::mem::replace(&mut self.location, location),
48            block_type: std::mem::replace(&mut self.block_type, block_type),
49            lines: std::mem::take(&mut self.lines),
50        }
51    }
52
53    /// Split the first pattern line from the rest. If not a pattern block,
54    /// return None. May leave an empty block if the first line is the only line.
55    pub fn split_first(&mut self) -> Option<Self> {
56        match self.block_type {
57            BlockType::Pattern => {
58                let lines = &mut self.lines;
59                if lines.is_empty() {
60                    debug_assert!(false, "split_first called on empty pattern block");
61                    return None;
62                }
63                let first = lines.remove(0);
64                Some(Self {
65                    location: first.location.clone(),
66                    block_type: BlockType::Pattern,
67                    lines: vec![first],
68                })
69            }
70            _ => None,
71        }
72    }
73}
74
75impl std::fmt::Debug for ScriptV0Block {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        if f.alternate() {
78            let indent = f.width().unwrap_or_default();
79            let indent = " ".repeat(indent);
80            // HACK: Repurpose width as indent
81            // Left-pad by "indent" spaces
82            let c = match self.block_type {
83                BlockType::Command(_) => "$",
84                BlockType::Ineffectual => "#",
85                BlockType::Pattern => "",
86                BlockType::Meta => "%",
87                BlockType::Any => "*",
88            };
89            writeln!(f, "{indent}:{} {c}[", self.location.line)?;
90            for line in &self.lines {
91                writeln!(f, "{indent}  {:?}", line.text())?;
92            }
93            write!(f, "{indent}]")?;
94            Ok(())
95        } else {
96            f.debug_struct("ScriptBlock")
97                .field("location", &self.location)
98                .field("block_type", &self.block_type)
99                .field("lines", &self.lines)
100                .finish()
101        }
102    }
103}
104
105/// A segment of a script. This is the first stage of parsing, where we split
106/// the script.
107enum ScriptV0Segment {
108    Block(ScriptV0Block),
109    SubBlock(ScriptLocation, String, Vec<ShellBit>, Vec<ScriptV0Segment>),
110    Semi(ScriptLocation, String, Vec<ShellBit>),
111}
112
113impl std::fmt::Debug for ScriptV0Segment {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        if f.alternate() {
116            let indent = f.width().unwrap_or_default();
117            let indent_str = " ".repeat(indent);
118            // HACK: Indent the segments by using width, but don't print indent here
119            match self {
120                ScriptV0Segment::Block(block) => writeln!(f, "{:#indent$?}", block),
121                ScriptV0Segment::SubBlock(location, text, args, segments) => {
122                    writeln!(f, "{indent_str}:{} {text:?}{args:?} {{", location.line)?;
123                    for segment in segments {
124                        write!(f, "{segment:#indent$?}", indent = indent + 2)?;
125                    }
126                    writeln!(f, "{indent_str}}}")?;
127                    Ok(())
128                }
129                ScriptV0Segment::Semi(location, text, args) => {
130                    writeln!(f, "{indent_str}:{} {text:?}{args:?};", location.line)?;
131                    Ok(())
132                }
133            }
134        } else {
135            match self {
136                ScriptV0Segment::Block(block) => f
137                    .debug_struct("Block")
138                    .field("location", &block.location)
139                    .field("block_type", &block.block_type)
140                    .field("lines", &block.lines)
141                    .finish(),
142                ScriptV0Segment::SubBlock(location, text, args, segments) => f
143                    .debug_struct("SubBlock")
144                    .field("location", &location)
145                    .field("text", &text)
146                    .field("args", &args)
147                    .field("segments", &segments)
148                    .finish(),
149                ScriptV0Segment::Semi(location, text, args) => f
150                    .debug_struct("Semi")
151                    .field("location", &location)
152                    .field("text", &text)
153                    .field("args", &args)
154                    .finish(),
155            }
156        }
157    }
158}
159
160impl ScriptV0Segment {
161    fn is_empty(&self) -> bool {
162        match self {
163            ScriptV0Segment::Block(block) => block.lines.is_empty(),
164            ScriptV0Segment::SubBlock(_, text, _args, segments) => {
165                text != "*"
166                    && (segments.is_empty() || segments.iter().all(|segment| segment.is_empty()))
167            }
168            ScriptV0Segment::Semi(..) => false,
169        }
170    }
171
172    /// Used by wildcard handling.
173    fn split_first(&mut self) -> Option<Self> {
174        match self {
175            ScriptV0Segment::Block(block) => block.split_first().map(ScriptV0Segment::Block),
176            &mut ScriptV0Segment::SubBlock(ref location, ..) => {
177                if self.is_command_block() {
178                    None
179                } else {
180                    Some(std::mem::replace(
181                        self,
182                        ScriptV0Segment::Block(ScriptV0Block {
183                            location: location.clone(),
184                            block_type: BlockType::Ineffectual,
185                            lines: vec![],
186                        }),
187                    ))
188                }
189            }
190            ScriptV0Segment::Semi(..) => None,
191        }
192    }
193
194    /// Returns true if this segment is a command block, or the first block it
195    /// contains is a command block. Note that this should only be called on
196    /// normalized segments.
197    fn is_command_block(&self) -> bool {
198        match self {
199            ScriptV0Segment::Block(block) => block.block_type.is_command(),
200            ScriptV0Segment::SubBlock(.., segments) => {
201                segments.iter().any(|segment| segment.is_command_block())
202            }
203            ScriptV0Segment::Semi(..) => true,
204        }
205    }
206
207    #[expect(unused)]
208    fn is_meta_block(&self) -> bool {
209        match self {
210            ScriptV0Segment::Block(block) => block.block_type.is_meta(),
211            _ => false,
212        }
213    }
214
215    fn location(&self) -> &ScriptLocation {
216        match self {
217            ScriptV0Segment::Block(block) => &block.location,
218            ScriptV0Segment::SubBlock(location, ..) => location,
219            ScriptV0Segment::Semi(location, ..) => location,
220        }
221    }
222
223    #[allow(unused)]
224    fn last_location(&self) -> &ScriptLocation {
225        match self {
226            ScriptV0Segment::Block(block) => &block.lines.last().unwrap().location,
227            ScriptV0Segment::SubBlock(location, .., segments) => {
228                if let Some(last) = segments.last() {
229                    last.last_location()
230                } else {
231                    location
232                }
233            }
234            ScriptV0Segment::Semi(location, ..) => location,
235        }
236    }
237}
238
239pub fn parse_script(file_name: ScriptFile, script: &str) -> Result<Script, ScriptError> {
240    let lines = ScriptLine::parse(file_name.clone(), script);
241    let segments = segment_script(true, &mut lines.as_slice())?;
242    let normalized = normalize_segments(segments);
243    parse_normalized_script_v0(&normalized, file_name)
244}
245
246/// Split the script into parsing segments. These allow us to more easily parse
247/// in later phases because we avoid having to check for block boundaries.
248fn segment_script(
249    top_level: bool,
250    lines_slice: &mut &[ScriptLine],
251) -> Result<Vec<ScriptV0Segment>, ScriptError> {
252    let mut segments = Vec::new();
253    let mut current_segment = None;
254
255    fn is_subblock(text: &str) -> Option<(bool, &str, &str)> {
256        // Workaround for missing let chains
257        if text.starts_with(|c: char| c.is_alphabetic()) {
258            let is_semi = text.ends_with(';');
259            text.strip_suffix(|c: char| c == '{' || c == ';')
260                .map(|text| {
261                    if let Some((block_type, args)) = text.trim().split_once(char::is_whitespace) {
262                        (is_semi, block_type.trim(), args.trim())
263                    } else {
264                        (is_semi, text.trim(), "")
265                    }
266                })
267        } else {
268            None
269        }
270    }
271
272    let mut lines = lines_slice.iter();
273    let orig_slice = *lines_slice;
274    let mut multiline_terminator = None;
275    while let Some(line) = lines.next() {
276        if let Some(terminator) = multiline_terminator {
277            if line.text() == terminator {
278                multiline_terminator = None;
279            }
280        } else if line.text() == "!!!" {
281            multiline_terminator = Some("!!!");
282        } else if line.text() == "???" {
283            multiline_terminator = Some("???");
284        }
285
286        // For commands, we greedily consume all lines until we successfully
287        // parse a command (or fail to parse).
288        if line.starts_with("$") {
289            if let Some(segment) = current_segment.take() {
290                segments.push(ScriptV0Segment::Block(segment));
291            }
292            let mut block_lines = vec![line.clone()];
293            let mut command = line.text()[1..].trim().to_string();
294            let mut line_count = 1;
295            let command = loop {
296                match parse_command_line(line.location.clone(), line_count, &command) {
297                    Ok(command) => break command,
298                    Err(e @ ScriptErrorType::UnclosedQuote)
299                    | Err(e @ ScriptErrorType::UnclosedBackslash) => match lines.next() {
300                        Some(line) => {
301                            block_lines.push(line.clone());
302                            command.push('\n');
303                            command.push_str(line.text());
304                            line_count += 1;
305                        }
306                        None => {
307                            return Err(ScriptError::new(e, line.location.clone()));
308                        }
309                    },
310                    Err(e) => {
311                        return Err(ScriptError::new(e, line.location.clone()));
312                    }
313                }
314            };
315
316            segments.push(ScriptV0Segment::Block(ScriptV0Block {
317                block_type: BlockType::Command(command),
318                lines: block_lines,
319                location: line.location.clone(),
320            }));
321        } else if let Some((is_semi, block_type, args)) = is_subblock(line.text()) {
322            if let Some(segment) = current_segment.take() {
323                segments.push(ScriptV0Segment::Block(segment));
324            }
325
326            let args = shell_split(args).map_err(|_| {
327                ScriptError::new_with_data(
328                    ScriptErrorType::InvalidBlockArgs,
329                    line.location.clone(),
330                    format!("{block_type} {args}"),
331                )
332            })?;
333
334            if is_semi {
335                segments.push(ScriptV0Segment::Semi(
336                    line.location.clone(),
337                    block_type.to_string(),
338                    args,
339                ));
340            } else {
341                // Temporaraliy swap from iterator to slice
342                let mut rest = lines.as_slice();
343                if rest.is_empty() {
344                    return Err(ScriptError::new(
345                        ScriptErrorType::InvalidBlockEnd,
346                        line.location.clone(),
347                    ));
348                }
349
350                segments.push(ScriptV0Segment::SubBlock(
351                    line.location.clone(),
352                    block_type.to_string(),
353                    args,
354                    segment_script(false, &mut rest)?,
355                ));
356                lines = rest.iter();
357            }
358        } else if line.text() == "}" {
359            // Note that the closing brace is not included in the current
360            // segment, we omit these lines from the segment tree.
361            if top_level {
362                return Err(ScriptError::new(
363                    ScriptErrorType::InvalidBlockEnd,
364                    line.location.clone(),
365                ));
366            }
367            *lines_slice = lines.as_slice();
368            if let Some(segment) = current_segment.take() {
369                segments.push(ScriptV0Segment::Block(segment));
370            }
371            return Ok(segments);
372        } else {
373            // Split into ineffectual and non-ineffectual lines
374            let block_type = if multiline_terminator.is_some() {
375                BlockType::Pattern
376            } else if line.starts_with("#") || line.is_empty() {
377                BlockType::Ineffectual
378            } else if line.starts_with("%") {
379                BlockType::Meta
380            } else if line.starts_with("*") {
381                BlockType::Any
382            } else {
383                BlockType::Pattern
384            };
385
386            let segment = current_segment.get_or_insert(ScriptV0Block {
387                block_type: block_type.clone(),
388                lines: Vec::new(),
389                location: line.location.clone(),
390            });
391            if !segment.block_type.is_same_type_as(&block_type) {
392                segments.push(ScriptV0Segment::Block(
393                    segment.take(line.location.clone(), block_type),
394                ));
395            }
396            segment.lines.push(line.clone());
397        }
398    }
399
400    if !top_level {
401        return Err(ScriptError::new(
402            ScriptErrorType::InvalidBlockEnd,
403            orig_slice.last().unwrap().location.clone(),
404        ));
405    }
406
407    if let Some(segment) = current_segment.take() {
408        segments.push(ScriptV0Segment::Block(segment));
409    }
410
411    Ok(segments)
412}
413
414fn insert_virtual_end_block(location: ScriptLocation, segments: &mut Vec<ScriptV0Segment>) {
415    let line = ScriptLine::new(location.file.clone(), location.line - 1, "end");
416
417    segments.push(ScriptV0Segment::Block(ScriptV0Block {
418        location: line.location.clone(),
419        block_type: BlockType::Pattern,
420        lines: vec![line],
421    }));
422}
423
424/// Remove all ineffectual blocks, and merge consecutive blocks that are of the same type.
425fn normalize_segments(segments: Vec<ScriptV0Segment>) -> Vec<ScriptV0Segment> {
426    let mut new_segments = vec![];
427    let mut command_needs_end = false;
428
429    let Some(last_line) = segments.last().map(|segment| segment.location().clone()) else {
430        return segments;
431    };
432
433    for mut segment in segments {
434        if segment.is_command_block() && command_needs_end {
435            insert_virtual_end_block(segment.location().clone(), &mut new_segments);
436            command_needs_end = false;
437        }
438        match segment {
439            ScriptV0Segment::Block(ref mut block) => {
440                debug_assert!(
441                    !block.lines.is_empty(),
442                    "empty blocks should not exist here"
443                );
444                if block.block_type.is_ineffectual() {
445                    continue;
446                }
447                if block.block_type.is_command() {
448                    command_needs_end = true;
449                }
450                if let Some(ScriptV0Segment::Block(last_block)) = new_segments.last_mut() {
451                    if block.block_type.is_command() {
452                        new_segments.push(segment);
453                    } else if block.block_type.is_same_type_as(&last_block.block_type) {
454                        last_block.lines.extend(std::mem::take(&mut block.lines));
455                    } else {
456                        new_segments.push(segment);
457                    }
458                } else {
459                    new_segments.push(segment);
460                }
461            }
462            ScriptV0Segment::SubBlock(location, text, args, segments) => {
463                let normalized = normalize_segments(segments);
464                new_segments.push(ScriptV0Segment::SubBlock(location, text, args, normalized));
465            }
466            ScriptV0Segment::Semi(location, text, args) => {
467                new_segments.push(ScriptV0Segment::Semi(location, text, args));
468            }
469        }
470    }
471
472    // Add a virtual "end" block to the end of the last command block.
473    if command_needs_end {
474        insert_virtual_end_block(last_line, &mut new_segments);
475    }
476
477    // Pass 2: Convert any "any"-type blocks to sub-blocks and steal the next line or non-command subblock.
478    let mut i = 0;
479    while i < new_segments.len() {
480        if let ScriptV0Segment::Block(block) = &mut new_segments[i] {
481            if block.block_type.is_any() {
482                let location = block.location.clone();
483                new_segments[i] =
484                    ScriptV0Segment::SubBlock(location.clone(), "*".to_string(), vec![], vec![]);
485
486                if i + 1 < new_segments.len() {
487                    if let Some(first) = new_segments[i + 1].split_first() {
488                        new_segments[i] = ScriptV0Segment::SubBlock(
489                            location.clone(),
490                            "*".to_string(),
491                            vec![],
492                            vec![first],
493                        );
494                    }
495                }
496            }
497        }
498        if new_segments[i].is_empty() {
499            new_segments.remove(i);
500        } else {
501            i += 1;
502        }
503    }
504
505    new_segments
506}
507
508pub fn parse_command_line(
509    location: ScriptLocation,
510    line_count: usize,
511    command: &str,
512) -> Result<CommandLine, ScriptErrorType> {
513    let command_str = command.to_string();
514    // Process the accumulated command
515    const SEPARATORS: &[&str] = &[
516        "&&", "||", "1>&2", "2>&1", "1>", "2>", "&", "|", ";", "(", ")", ">", "<", "=",
517    ];
518    let command = match shellish_parse::multiparse(
519        command,
520        shellish_parse::ParseOptions::default(),
521        SEPARATORS,
522    ) {
523        Ok(command) => command,
524        Err(shellish_parse::ParseError::DanglingString) => {
525            return Err(ScriptErrorType::UnclosedQuote);
526        }
527        Err(shellish_parse::ParseError::DanglingBackslash) => {
528            return Err(ScriptErrorType::UnclosedBackslash);
529        }
530        _ => {
531            return Err(ScriptErrorType::IllegalShellCommand);
532        }
533    };
534    let mut command_bits = vec![];
535    for (_, seperator) in command {
536        if let Some(seperator) = seperator {
537            if SEPARATORS[seperator] == "&" {
538                return Err(ScriptErrorType::BackgroundProcessNotAllowed);
539            }
540            if SEPARATORS[seperator] == ">&" {
541                return Err(ScriptErrorType::UnsupportedRedirection);
542            }
543            command_bits.push(SEPARATORS[seperator].to_string());
544        }
545    }
546
547    Ok(CommandLine::new(command_str, location, line_count))
548}
549
550#[derive(Default)]
551struct OutputPatternBuilder {
552    ignore: Vec<OutputPattern>,
553    reject: Vec<OutputPattern>,
554    patterns: Vec<OutputPattern>,
555}
556
557fn parse_normalized_script_v0(
558    segments: &[ScriptV0Segment],
559    file: ScriptFile,
560) -> Result<Script, ScriptError> {
561    // Handle the preamble before the first command block
562
563    let preamble_index = segments
564        .iter()
565        .position(|segment| segment.is_command_block())
566        .unwrap_or(segments.len());
567    let (preamble, rest) = segments.split_at(preamble_index);
568
569    let mut grok = Grok::with_default_patterns();
570
571    let builder = parse_script_v0_segments(preamble, &mut grok)?;
572    if let Some(pattern) = builder.patterns.first() {
573        return Err(ScriptError::new(
574            ScriptErrorType::InvalidGlobalPattern,
575            pattern.location.clone(),
576        ));
577    }
578    let global_ignore = builder.ignore;
579    let global_reject = builder.reject;
580
581    let commands =
582        parse_normalized_script_v0_commands(rest, &mut grok, &global_ignore, &global_reject)?;
583
584    Ok(Script {
585        commands,
586        file,
587        grok,
588    })
589}
590
591fn parse_normalized_script_v0_commands(
592    mut segments: &[ScriptV0Segment],
593    grok: &mut Grok,
594    global_ignore: &Vec<OutputPattern>,
595    global_reject: &Vec<OutputPattern>,
596) -> Result<Vec<ScriptBlock>, ScriptError> {
597    let mut commands = vec![];
598    while let Some((command, remaining)) = segments.split_first() {
599        debug_assert!(
600            command.is_command_block(),
601            "not a command block: {command:?}"
602        );
603
604        if let ScriptV0Segment::SubBlock(_, block_type, args, sub_segments) = command {
605            let blocks = parse_normalized_script_v0_commands(
606                sub_segments,
607                grok,
608                global_ignore,
609                global_reject,
610            )?;
611
612            if block_type == "if" {
613                let condition = parse_if_condition(command.location().clone(), args)?;
614                commands.push(ScriptBlock::If(condition, blocks));
615            } else if block_type == "for" {
616                if args.len() >= 3 && args[1] == "in" {
617                    commands.push(ScriptBlock::For(
618                        ForCondition::Env(args[0].to_string(), args[2..].to_vec()),
619                        blocks,
620                    ));
621                } else {
622                    return Err(ScriptError::new_with_data(
623                        ScriptErrorType::InvalidBlockType,
624                        command.location().clone(),
625                        format!("for {args:?}"),
626                    ));
627                }
628            } else if block_type == "background" {
629                commands.push(ScriptBlock::Background(blocks));
630            } else if block_type == "retry" {
631                commands.push(ScriptBlock::Retry(blocks));
632            } else if block_type == "defer" {
633                commands.push(ScriptBlock::Defer(blocks));
634            } else {
635                return Err(ScriptError::new_with_data(
636                    ScriptErrorType::InvalidBlockType,
637                    command.location().clone(),
638                    block_type.clone(),
639                ));
640            }
641
642            segments = remaining;
643            continue;
644        }
645
646        if let ScriptV0Segment::Semi(location, text, args) = command {
647            segments = remaining;
648            if text == "using" {
649                if args.len() == 1 && args[0] == "tempdir" {
650                    commands.push(ScriptBlock::InternalCommand(InternalCommand::UsingTempdir));
651                    continue;
652                }
653                if args.len() == 2 && args[0] == "dir" {
654                    commands.push(ScriptBlock::InternalCommand(InternalCommand::UsingDir(
655                        args[1].clone(),
656                        false,
657                    )));
658                    continue;
659                }
660                if args.len() == 3 && args[0] == "new" && args[1] == "dir" {
661                    commands.push(ScriptBlock::InternalCommand(InternalCommand::UsingDir(
662                        args[2].clone(),
663                        true,
664                    )));
665                    continue;
666                }
667            }
668            if text == "cd" && args.len() == 1 {
669                commands.push(ScriptBlock::InternalCommand(InternalCommand::ChangeDir(
670                    args[0].clone(),
671                )));
672                continue;
673            }
674            if text == "set" && args.len() == 2 {
675                commands.push(ScriptBlock::InternalCommand(InternalCommand::Set(
676                    args[0].to_string(),
677                    args[1].clone(),
678                )));
679                continue;
680            }
681            return Err(ScriptError::new_with_data(
682                ScriptErrorType::InvalidInternalCommand,
683                location.clone(),
684                format!("{text} {args:?}"),
685            ));
686        }
687
688        let next_command = remaining
689            .iter()
690            .position(|segment| segment.is_command_block())
691            .unwrap_or(remaining.len());
692        let mut pattern;
693        (pattern, segments) = remaining.split_at(next_command);
694
695        let location = command.location().clone();
696        let mut command = ScriptCommand {
697            command: match command {
698                ScriptV0Segment::Block(block) => block.block_type.clone().unwrap_command(),
699                _ => unreachable!(),
700            },
701            pattern: OutputPattern {
702                pattern: OutputPatternType::None,
703                ignore: Default::default(),
704                reject: Default::default(),
705                location: location.clone(),
706            },
707            exit: CommandExit::Success,
708            expect_failure: false,
709            set_var: None,
710        };
711
712        if let Some(ScriptV0Segment::Block(maybe_meta)) = pattern.first() {
713            if maybe_meta.block_type.is_meta() {
714                pattern = pattern.split_first().unwrap().1;
715
716                for line in maybe_meta.lines.iter() {
717                    if line.starts_with("%SET") {
718                        if let Some(var) = line.text()[4..].split_whitespace().next() {
719                            command.set_var = Some(var.to_string());
720                        } else {
721                            return Err(ScriptError::new(
722                                ScriptErrorType::InvalidSetVariable,
723                                line.location.clone(),
724                            ));
725                        }
726                    } else if line.starts_with("%EXPECT_FAILURE") {
727                        command.expect_failure = true;
728                    } else if line.starts_with("%EXIT any") {
729                        command.exit = CommandExit::Any;
730                    } else if line.starts_with("%EXIT ") {
731                        if let Ok(status) = line.text()[6..].parse::<i32>() {
732                            command.exit = CommandExit::Failure(status);
733                        } else {
734                            return Err(ScriptError::new(
735                                ScriptErrorType::InvalidExitStatus,
736                                line.location.clone(),
737                            ));
738                        }
739                    }
740                }
741            }
742        }
743
744        let builder = parse_script_v0_segments(pattern, grok)?;
745        command.pattern = OutputPattern::new_sequence(location, builder.patterns);
746        command.pattern.ignore = global_ignore
747            .iter()
748            .cloned()
749            .chain(builder.ignore.iter().cloned())
750            .collect::<Vec<_>>()
751            .into();
752        command.pattern.reject = global_reject
753            .iter()
754            .cloned()
755            .chain(builder.reject.iter().cloned())
756            .collect::<Vec<_>>()
757            .into();
758        commands.push(ScriptBlock::Command(command));
759    }
760    Ok(commands)
761}
762
763fn parse_script_v0_segments(
764    segments: &[ScriptV0Segment],
765    grok: &mut Grok,
766) -> Result<OutputPatternBuilder, ScriptError> {
767    let mut builder = OutputPatternBuilder::default();
768    for segment in segments {
769        parse_script_v0_segment(grok, &mut builder, segment)?;
770    }
771    Ok(builder)
772}
773
774fn parse_script_v0_segment(
775    grok: &mut Grok,
776    builder: &mut OutputPatternBuilder,
777    segment: &ScriptV0Segment,
778) -> Result<(), ScriptError> {
779    if segment.is_command_block() {
780        return Err(ScriptError::new(
781            ScriptErrorType::UnsupportedCommandPosition,
782            segment.location().clone(),
783        ));
784    }
785    match segment {
786        ScriptV0Segment::Block(block) => {
787            let mut pattern = block.lines.as_slice();
788            while let Some((line, rest)) = pattern.split_first() {
789                pattern = rest;
790                if line.text() == "!!!" {
791                    let indent = line.text_untrimmed().find("!!!").unwrap();
792                    while let Some((line, rest)) = pattern.split_first() {
793                        pattern = rest;
794                        if line.text() == "!!!" {
795                            break;
796                        } else {
797                            builder.patterns.push(parse_pattern_line(
798                                grok,
799                                line.location.clone(),
800                                &line.text_untrimmed()[indent.min(line.text_untrimmed().len())..],
801                                '!',
802                            )?);
803                        }
804                    }
805                } else if line.text() == "???" {
806                    let indent = line.text_untrimmed().find("???").unwrap();
807                    while let Some((line, rest)) = pattern.split_first() {
808                        pattern = rest;
809                        if line.text() == "???" {
810                            break;
811                        } else {
812                            builder.patterns.push(parse_pattern_line(
813                                grok,
814                                line.location.clone(),
815                                &line.text_untrimmed()[indent.min(line.text_untrimmed().len())..],
816                                '?',
817                            )?);
818                        }
819                    }
820                } else if line.text() == "!" || line.text() == "?" {
821                    builder.patterns.push(parse_pattern_line(
822                        grok,
823                        line.location.clone(),
824                        "",
825                        line.first_char().unwrap(),
826                    )?);
827                } else if line.starts_with("! ") || line.starts_with("? ") {
828                    builder.patterns.push(parse_pattern_line(
829                        grok,
830                        line.location.clone(),
831                        &line.text()[2..],
832                        line.first_char().unwrap(),
833                    )?);
834                } else if let Some(pattern) = line.strip_prefix("pattern ") {
835                    if let Some((name, pattern)) = pattern.split_once(' ') {
836                        grok.add_pattern(name, pattern);
837                    } else {
838                        return Err(ScriptError::new(
839                            ScriptErrorType::InvalidPatternDefinition,
840                            line.location.clone(),
841                        ));
842                    }
843                } else if line.text() == "end" {
844                    builder.patterns.push(OutputPattern {
845                        pattern: OutputPatternType::End,
846                        ignore: Default::default(),
847                        reject: Default::default(),
848                        location: line.location.clone(),
849                    });
850                } else if line.text() == "none" {
851                    builder.patterns.push(OutputPattern {
852                        pattern: OutputPatternType::None,
853                        ignore: Default::default(),
854                        reject: Default::default(),
855                        location: line.location.clone(),
856                    });
857                } else {
858                    return Err(ScriptError::new_with_data(
859                        ScriptErrorType::InvalidPattern,
860                        line.location.clone(),
861                        format!("{:?}", line.text()),
862                    ));
863                }
864            }
865        }
866        ScriptV0Segment::SubBlock(location, text, args, segments) => {
867            if text != "if" && !args.is_empty() {
868                return Err(ScriptError::new_with_data(
869                    ScriptErrorType::InvalidPattern,
870                    location.clone(),
871                    format!("{text} {args:?}"),
872                ));
873            }
874            if text == "reject" {
875                let next = parse_script_v0_segments(segments, grok)?;
876                if !next.ignore.is_empty() || !next.reject.is_empty() {
877                    return Err(ScriptError::new(
878                        ScriptErrorType::InvalidPattern,
879                        location.clone(),
880                    ));
881                }
882                builder.reject.extend(next.patterns);
883            } else if text == "ignore" {
884                let next = parse_script_v0_segments(segments, grok)?;
885                if !next.ignore.is_empty() || !next.reject.is_empty() {
886                    return Err(ScriptError::new(
887                        ScriptErrorType::InvalidPattern,
888                        location.clone(),
889                    ));
890                }
891                builder.ignore.extend(next.patterns);
892            } else if text == "if" {
893                let condition = parse_if_condition(location.clone(), args)?;
894                let new_builder = parse_script_v0_segments(segments, grok)?;
895                let pattern = OutputPattern {
896                    pattern: OutputPatternType::If(
897                        condition,
898                        Box::new(OutputPattern::new_sequence(
899                            location.clone(),
900                            new_builder.patterns,
901                        )),
902                    ),
903                    ignore: Arc::new(new_builder.ignore),
904                    reject: Arc::new(new_builder.reject),
905                    location: location.clone(),
906                };
907                builder.patterns.push(pattern);
908            } else {
909                let factory: &dyn Fn(&ScriptLocation, Vec<OutputPattern>) -> OutputPatternType =
910                    match text.as_str() {
911                        "repeat" => &|location, patterns| {
912                            OutputPatternType::Repeat(Box::new(OutputPattern::new_sequence(
913                                location.clone(),
914                                patterns,
915                            )))
916                        },
917                        "choice" => &|_location, patterns| OutputPatternType::Choice(patterns),
918                        "unordered" => {
919                            &|_location, patterns| OutputPatternType::Unordered(patterns)
920                        }
921                        "sequence" => &|_location, patterns| OutputPatternType::Sequence(patterns),
922                        "optional" => &|location, patterns| {
923                            OutputPatternType::Optional(Box::new(OutputPattern::new_sequence(
924                                location.clone(),
925                                patterns,
926                            )))
927                        },
928                        "*" => &|location: &ScriptLocation, patterns| {
929                            OutputPatternType::Any(Box::new(OutputPattern::new_sequence(
930                                location.clone(),
931                                patterns,
932                            )))
933                        },
934                        _ => {
935                            return Err(ScriptError::new_with_data(
936                                ScriptErrorType::InvalidPattern,
937                                location.clone(),
938                                text.to_string(),
939                            ));
940                        }
941                    };
942
943                let new_builder = parse_script_v0_segments(segments, grok)?;
944                let pattern = OutputPattern {
945                    pattern: factory(location, new_builder.patterns),
946                    ignore: Arc::new(new_builder.ignore),
947                    reject: Arc::new(new_builder.reject),
948                    location: location.clone(),
949                };
950                builder.patterns.push(pattern);
951            }
952        }
953        ScriptV0Segment::Semi(location, text, args) => {
954            return Err(ScriptError::new_with_data(
955                ScriptErrorType::UnsupportedCommandPosition,
956                location.clone(),
957                format!("{text} {args:?}"),
958            ));
959        }
960    }
961    Ok(())
962}
963
964fn parse_if_condition(
965    location: ScriptLocation,
966    args: &[ShellBit],
967) -> Result<IfCondition, ScriptError> {
968    if args.len() == 1 && args[0] == "true" {
969        Ok(IfCondition::True)
970    } else if args.len() == 1 && args[0] == "false" {
971        Ok(IfCondition::False)
972    } else if args.len() == 3 && args[1] == "==" {
973        Ok(IfCondition::EnvEq(
974            false,
975            args[0].to_string(),
976            args[2].clone(),
977        ))
978    } else if args.len() == 3 && args[1] == "!=" {
979        Ok(IfCondition::EnvEq(
980            true,
981            args[0].to_string(),
982            args[2].clone(),
983        ))
984    } else {
985        return Err(ScriptError::new_with_data(
986            ScriptErrorType::InvalidIfCondition,
987            location.clone(),
988            format!("{args:?}"),
989        ));
990    }
991}
992
993fn parse_pattern_line(
994    grok: &mut Grok,
995    location: ScriptLocation,
996    text: &str,
997    line_start: char,
998) -> Result<OutputPattern, ScriptError> {
999    if text.is_empty() {
1000        return Ok(OutputPattern {
1001            pattern: OutputPatternType::Literal("".to_string()),
1002            ignore: Default::default(),
1003            reject: Default::default(),
1004            location,
1005        });
1006    }
1007
1008    let text = text.trim_end();
1009
1010    if line_start == '!' {
1011        if !text.contains("%") {
1012            return Ok(OutputPattern {
1013                pattern: OutputPatternType::Literal(text.to_string()),
1014                ignore: Default::default(),
1015                reject: Default::default(),
1016                location,
1017            });
1018        }
1019
1020        let pattern = GrokPattern::compile(grok, text, true).map_err(|e| {
1021            ScriptError::new_with_data(
1022                ScriptErrorType::InvalidPattern,
1023                location.clone(),
1024                e.to_string(),
1025            )
1026        })?;
1027        Ok(OutputPattern {
1028            pattern: OutputPatternType::Pattern(Arc::new(pattern)),
1029            ignore: Default::default(),
1030            reject: Default::default(),
1031            location,
1032        })
1033    } else if line_start == '?' {
1034        let text = if text.ends_with('$') {
1035            format!(r#"^{text}"#)
1036        } else {
1037            format!(r#"^{text}\s*$"#)
1038        };
1039        let pattern = GrokPattern::compile(grok, &text, false).map_err(|e| {
1040            ScriptError::new_with_data(
1041                ScriptErrorType::InvalidPattern,
1042                location.clone(),
1043                e.to_string(),
1044            )
1045        })?;
1046        Ok(OutputPattern {
1047            pattern: OutputPatternType::Pattern(Arc::new(pattern)),
1048            ignore: Default::default(),
1049            reject: Default::default(),
1050            location,
1051        })
1052    } else {
1053        unreachable!("Invalid line start: {line_start}");
1054    }
1055}
1056
1057#[cfg(test)]
1058mod tests {
1059    use super::*;
1060
1061    fn parse_pattern(pattern: &str) -> Result<OutputPattern, ScriptError> {
1062        let lines = ScriptLine::parse(ScriptFile::new("test.cli"), pattern);
1063        let segments = segment_script(true, &mut lines.as_slice()).unwrap();
1064        let normalized = normalize_segments(segments);
1065        Ok(
1066            parse_script_v0_segments(&normalized, &mut Grok::with_default_patterns())?
1067                .patterns
1068                .first()
1069                .unwrap()
1070                .clone(),
1071        )
1072    }
1073
1074    fn parse_lines(lines: &str) -> Result<Lines, ScriptError> {
1075        Ok(Lines::new(
1076            lines.lines().map(|l| l.to_string()).collect::<Vec<_>>(),
1077        ))
1078    }
1079
1080    #[test]
1081    fn test_v0_patterns() {
1082        let mut patterns = vec![];
1083        patterns.push(parse_pattern("! a\n! b\n! c\n").unwrap());
1084        patterns.push(parse_pattern("!!!\na\nb\nc\n!!!\n").unwrap());
1085
1086        let context = ScriptRunContext::default();
1087        let context = OutputMatchContext::new(&context);
1088        let output = parse_lines("a\nb\nc\n").unwrap();
1089
1090        for pattern in patterns {
1091            let result = pattern.matches(context.clone(), output.clone());
1092            assert!(result.is_ok());
1093        }
1094    }
1095
1096    #[test]
1097    fn test_v0_block_pattern() {
1098        let pattern = r#"
1099        repeat {
1100            choice {
1101    ? pattern1 %{DATA}
1102    ? pattern2 %{DATA}
1103    ? pattern3 %{DATA}
1104            }
1105        }
1106        "#;
1107        let pattern = parse_pattern(pattern).unwrap();
1108        eprintln!("{pattern:?}");
1109    }
1110}