Skip to main content

osp_cli/completion/
parse.rs

1use crate::completion::model::{CommandLine, CursorState, FlagOccurrence, ParsedLine, QuoteStyle};
2use std::collections::BTreeMap;
3
4#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct TokenSpan {
6    pub value: String,
7    pub start: usize,
8    pub end: usize,
9}
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12enum LexState {
13    Normal,
14    SingleQuote,
15    DoubleQuote,
16    EscapeNormal,
17    EscapeDouble,
18}
19
20/// Parsed line assembly after tokenization.
21///
22/// The parser keeps the command head separate until it sees the first option-like
23/// token. After that point the rest of the line is interpreted as flags, args,
24/// or pipes. That mirrors how the completer reasons about scope: command path
25/// first, then option/value mode.
26#[derive(Debug, Default)]
27struct ParseState {
28    head: Vec<String>,
29    tail: Vec<crate::completion::model::TailItem>,
30    flag_values: BTreeMap<String, Vec<String>>,
31    pipes: Vec<String>,
32    has_pipe: bool,
33}
34
35impl ParseState {
36    fn finish(self) -> CommandLine {
37        CommandLine {
38            head: self.head,
39            tail: self.tail,
40            flag_values: self.flag_values,
41            pipes: self.pipes,
42            has_pipe: self.has_pipe,
43        }
44    }
45
46    fn start_pipe<'a>(&mut self, iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>) {
47        self.has_pipe = true;
48        self.pipes.extend(iter.cloned());
49    }
50
51    fn collect_positional_tail<'a>(
52        &mut self,
53        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
54    ) {
55        while let Some(next) = iter.next() {
56            if next == "|" {
57                self.start_pipe(iter);
58                break;
59            }
60            self.tail
61                .push(crate::completion::model::TailItem::Positional(next.clone()));
62        }
63    }
64
65    fn parse_flag_tail<'a>(
66        &mut self,
67        first_token: String,
68        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
69    ) {
70        // Once the parser has seen the first flag-like token, the rest of the
71        // line stays in "tail mode". From that point on we only distinguish
72        // between more flags, their values, `--`, and a pipe into DSL mode.
73        let mut current = first_token;
74        loop {
75            if current == "|" {
76                self.start_pipe(iter);
77                return;
78            }
79
80            if current == "--" {
81                self.collect_positional_tail(iter);
82                return;
83            }
84
85            if let Some((flag, value)) = split_inline_flag_value(&current) {
86                let mut occurrence_values = Vec::new();
87                if !value.is_empty() {
88                    self.flag_values
89                        .entry(flag.clone())
90                        .or_default()
91                        .push(value.clone());
92                    occurrence_values.push(value);
93                } else {
94                    self.flag_values.entry(flag.clone()).or_default();
95                }
96                self.tail
97                    .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
98                        name: flag.clone(),
99                        values: occurrence_values,
100                    }));
101                let Some(next) = iter.next().cloned() else {
102                    break;
103                };
104                current = next;
105                continue;
106            }
107
108            let flag = current;
109            let values = self.consume_flag_values(iter);
110            self.tail
111                .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
112                    name: flag.clone(),
113                    values: values.clone(),
114                }));
115            self.flag_values
116                .entry(flag.clone())
117                .or_default()
118                .extend(values);
119
120            let Some(next) = iter.next().cloned() else {
121                break;
122            };
123            current = next;
124        }
125    }
126
127    fn consume_flag_values<'a>(
128        &mut self,
129        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
130    ) -> Vec<String> {
131        let mut values = Vec::new();
132
133        while let Some(next) = iter.peek() {
134            if *next == "|" || *next == "--" {
135                break;
136            }
137            if looks_like_flag_start(next) {
138                break;
139            }
140
141            values.push((*next).clone());
142            iter.next();
143        }
144
145        values
146    }
147}
148
149#[derive(Debug, Clone, Default)]
150pub struct CommandLineParser;
151
152#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct ParsedCursorLine {
154    pub parsed: ParsedLine,
155    pub cursor: CursorState,
156}
157
158#[derive(Debug, Clone)]
159struct CursorTokenization {
160    full_tokens: Vec<String>,
161    cursor_tokens: Vec<String>,
162    cursor_quote_style: Option<QuoteStyle>,
163}
164
165impl CommandLineParser {
166    /// Tokenization is intentionally permissive for interactive use.
167    ///
168    /// If the user is mid-quote while pressing tab, we retry with a synthetic
169    /// closing quote before finally falling back to whitespace splitting.
170    pub fn tokenize(&self, line: &str) -> Vec<String> {
171        self.tokenize_inner(line)
172            .or_else(|| self.tokenize_inner(&format!("{line}\"")))
173            .or_else(|| self.tokenize_inner(&format!("{line}'")))
174            .unwrap_or_else(|| line.split_whitespace().map(str::to_string).collect())
175    }
176
177    pub fn tokenize_with_spans(&self, line: &str) -> Vec<TokenSpan> {
178        self.tokenize_with_spans_inner(line)
179            .or_else(|| self.tokenize_with_spans_fallback(line))
180            .unwrap_or_default()
181    }
182
183    /// Parse the full line and the cursor-local prefix from one lexical walk.
184    ///
185    /// The common case keeps completion analysis in one tokenization pass. If
186    /// the line ends in an unmatched quote we fall back to the permissive
187    /// tokenization path so interactive behavior stays unchanged.
188    pub fn analyze(&self, line: &str, cursor: usize) -> ParsedCursorLine {
189        let safe_cursor = clamp_to_char_boundary(line, cursor.min(line.len()));
190        let before_cursor = &line[..safe_cursor];
191
192        if let Some(tokenized) = self.tokenize_with_cursor_inner(line, safe_cursor) {
193            let full_cmd = self.parse(&tokenized.full_tokens);
194            let cursor_cmd = self.parse(&tokenized.cursor_tokens);
195            let cursor = self.build_cursor_state(
196                before_cursor,
197                safe_cursor,
198                &tokenized.cursor_tokens,
199                tokenized.cursor_quote_style,
200            );
201
202            return ParsedCursorLine {
203                parsed: ParsedLine {
204                    safe_cursor,
205                    full_tokens: tokenized.full_tokens,
206                    cursor_tokens: tokenized.cursor_tokens,
207                    full_cmd,
208                    cursor_cmd,
209                },
210                cursor,
211            };
212        }
213
214        let full_tokens = self.tokenize(line);
215        let cursor_tokens = self.tokenize(before_cursor);
216        let full_cmd = self.parse(&full_tokens);
217        let cursor_cmd = self.parse(&cursor_tokens);
218        let cursor = self.cursor_state(before_cursor, safe_cursor);
219
220        ParsedCursorLine {
221            parsed: ParsedLine {
222                safe_cursor,
223                full_tokens,
224                cursor_tokens,
225                full_cmd,
226                cursor_cmd,
227            },
228            cursor,
229        }
230    }
231
232    fn tokenize_inner(&self, line: &str) -> Option<Vec<String>> {
233        let mut out = Vec::new();
234        let mut state = LexState::Normal;
235        let mut current = String::new();
236
237        for ch in line.chars() {
238            match state {
239                LexState::Normal => {
240                    if ch.is_whitespace() {
241                        push_current(&mut out, &mut current);
242                    } else {
243                        match ch {
244                            '|' => {
245                                push_current(&mut out, &mut current);
246                                out.push("|".to_string());
247                            }
248                            '\\' => state = LexState::EscapeNormal,
249                            '\'' => state = LexState::SingleQuote,
250                            '"' => state = LexState::DoubleQuote,
251                            _ => current.push(ch),
252                        }
253                    }
254                }
255                LexState::SingleQuote => {
256                    if ch == '\'' {
257                        state = LexState::Normal;
258                    } else {
259                        current.push(ch);
260                    }
261                }
262                LexState::DoubleQuote => match ch {
263                    '"' => state = LexState::Normal,
264                    '\\' => state = LexState::EscapeDouble,
265                    _ => current.push(ch),
266                },
267                LexState::EscapeNormal => {
268                    current.push(ch);
269                    state = LexState::Normal;
270                }
271                LexState::EscapeDouble => {
272                    current.push(ch);
273                    state = LexState::DoubleQuote;
274                }
275            }
276        }
277
278        match state {
279            LexState::Normal => {
280                push_current(&mut out, &mut current);
281                Some(out)
282            }
283            _ => None,
284        }
285    }
286
287    fn tokenize_with_spans_inner(&self, line: &str) -> Option<Vec<TokenSpan>> {
288        let mut out = Vec::new();
289        let mut state = LexState::Normal;
290        let mut current = String::new();
291        let mut current_start = None;
292
293        for (idx, ch) in line.char_indices() {
294            match state {
295                LexState::Normal => {
296                    if ch.is_whitespace() {
297                        push_current_span(&mut out, &mut current, &mut current_start, idx);
298                    } else {
299                        match ch {
300                            '|' => {
301                                push_current_span(&mut out, &mut current, &mut current_start, idx);
302                                out.push(TokenSpan {
303                                    value: "|".to_string(),
304                                    start: idx,
305                                    end: idx + ch.len_utf8(),
306                                });
307                            }
308                            '\\' => {
309                                current_start.get_or_insert(idx);
310                                state = LexState::EscapeNormal;
311                            }
312                            '\'' => {
313                                current_start.get_or_insert(idx);
314                                state = LexState::SingleQuote;
315                            }
316                            '"' => {
317                                current_start.get_or_insert(idx);
318                                state = LexState::DoubleQuote;
319                            }
320                            _ => {
321                                current_start.get_or_insert(idx);
322                                current.push(ch);
323                            }
324                        }
325                    }
326                }
327                LexState::SingleQuote => {
328                    if ch == '\'' {
329                        state = LexState::Normal;
330                    } else {
331                        current.push(ch);
332                    }
333                }
334                LexState::DoubleQuote => match ch {
335                    '"' => state = LexState::Normal,
336                    '\\' => state = LexState::EscapeDouble,
337                    _ => current.push(ch),
338                },
339                LexState::EscapeNormal => {
340                    current.push(ch);
341                    state = LexState::Normal;
342                }
343                LexState::EscapeDouble => {
344                    current.push(ch);
345                    state = LexState::DoubleQuote;
346                }
347            }
348        }
349
350        match state {
351            LexState::Normal => {
352                push_current_span(&mut out, &mut current, &mut current_start, line.len());
353                Some(out)
354            }
355            _ => None,
356        }
357    }
358
359    fn tokenize_with_spans_fallback(&self, line: &str) -> Option<Vec<TokenSpan>> {
360        let mut out = Vec::new();
361        let mut search_from = 0usize;
362        for token in line.split_whitespace() {
363            let rel = line.get(search_from..)?.find(token)?;
364            let start = search_from + rel;
365            let end = start + token.len();
366            out.push(TokenSpan {
367                value: token.to_string(),
368                start,
369                end,
370            });
371            search_from = end;
372        }
373        Some(out)
374    }
375
376    pub fn parse(&self, tokens: &[String]) -> CommandLine {
377        let mut state = ParseState::default();
378        let mut iter = tokens.iter().peekable();
379
380        while let Some(token) = iter.next() {
381            if token == "|" {
382                state.start_pipe(&mut iter);
383                return state.finish();
384            }
385            if token == "--" {
386                state.collect_positional_tail(&mut iter);
387                return state.finish();
388            }
389            if token.starts_with('-') {
390                state.parse_flag_tail(token.clone(), &mut iter);
391                return state.finish();
392            }
393            state.head.push(token.clone());
394        }
395
396        state.finish()
397    }
398
399    pub fn cursor_state(&self, text_before_cursor: &str, safe_cursor: usize) -> CursorState {
400        let tokens = self.tokenize(text_before_cursor);
401        self.build_cursor_state(
402            text_before_cursor,
403            safe_cursor,
404            &tokens,
405            self.compute_stub_quote(text_before_cursor),
406        )
407    }
408
409    fn build_cursor_state(
410        &self,
411        text_before_cursor: &str,
412        safe_cursor: usize,
413        tokens: &[String],
414        quote_style: Option<QuoteStyle>,
415    ) -> CursorState {
416        let token_stub = self.compute_stub(text_before_cursor, tokens);
417        let replace_start = token_replace_start(text_before_cursor, safe_cursor, quote_style);
418        let raw_stub = text_before_cursor
419            .get(replace_start..safe_cursor)
420            .unwrap_or("")
421            .to_string();
422
423        CursorState::new(
424            token_stub,
425            raw_stub,
426            replace_start..safe_cursor,
427            quote_style,
428        )
429    }
430
431    fn tokenize_with_cursor_inner(
432        &self,
433        line: &str,
434        safe_cursor: usize,
435    ) -> Option<CursorTokenization> {
436        let mut out = Vec::new();
437        let mut state = LexState::Normal;
438        let mut current = String::new();
439        let mut cursor_tokens = None;
440        let mut cursor_quote_style = None;
441
442        for (idx, ch) in line.char_indices() {
443            if idx == safe_cursor && cursor_tokens.is_none() {
444                cursor_tokens = Some(snapshot_tokens(&out, &current));
445                cursor_quote_style = Some(quote_style_for_state(state));
446            }
447
448            match state {
449                LexState::Normal => {
450                    if ch.is_whitespace() {
451                        push_current(&mut out, &mut current);
452                    } else {
453                        match ch {
454                            '|' => {
455                                push_current(&mut out, &mut current);
456                                out.push("|".to_string());
457                            }
458                            '\\' => state = LexState::EscapeNormal,
459                            '\'' => state = LexState::SingleQuote,
460                            '"' => state = LexState::DoubleQuote,
461                            _ => current.push(ch),
462                        }
463                    }
464                }
465                LexState::SingleQuote => {
466                    if ch == '\'' {
467                        state = LexState::Normal;
468                    } else {
469                        current.push(ch);
470                    }
471                }
472                LexState::DoubleQuote => match ch {
473                    '"' => state = LexState::Normal,
474                    '\\' => state = LexState::EscapeDouble,
475                    _ => current.push(ch),
476                },
477                LexState::EscapeNormal => {
478                    current.push(ch);
479                    state = LexState::Normal;
480                }
481                LexState::EscapeDouble => {
482                    current.push(ch);
483                    state = LexState::DoubleQuote;
484                }
485            }
486        }
487
488        if safe_cursor == line.len() && cursor_tokens.is_none() {
489            cursor_tokens = Some(snapshot_tokens(&out, &current));
490            cursor_quote_style = Some(quote_style_for_state(state));
491        }
492
493        match state {
494            LexState::Normal => {
495                push_current(&mut out, &mut current);
496                Some(CursorTokenization {
497                    full_tokens: out,
498                    cursor_tokens: cursor_tokens.unwrap_or_default(),
499                    cursor_quote_style: cursor_quote_style.unwrap_or(None),
500                })
501            }
502            _ => None,
503        }
504    }
505
506    fn compute_stub(&self, text_before_cursor: &str, tokens: &[String]) -> String {
507        if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
508            return String::new();
509        }
510        let Some(last) = tokens.last() else {
511            return String::new();
512        };
513
514        if last.starts_with("--") && last.ends_with('=') && last.contains('=') {
515            return String::new();
516        }
517
518        last.clone()
519    }
520
521    pub fn compute_stub_quote(&self, text_before_cursor: &str) -> Option<QuoteStyle> {
522        current_quote_state(text_before_cursor)
523    }
524}
525
526fn snapshot_tokens(out: &[String], current: &str) -> Vec<String> {
527    let mut tokens = out.to_vec();
528    if !current.is_empty() {
529        tokens.push(current.to_string());
530    }
531    tokens
532}
533
534fn clamp_to_char_boundary(input: &str, cursor: usize) -> usize {
535    if input.is_char_boundary(cursor) {
536        return cursor;
537    }
538    let mut safe = cursor;
539    while safe > 0 && !input.is_char_boundary(safe) {
540        safe -= 1;
541    }
542    safe
543}
544
545fn quote_style_for_state(state: LexState) -> Option<QuoteStyle> {
546    match state {
547        LexState::SingleQuote => Some(QuoteStyle::Single),
548        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
549        LexState::Normal | LexState::EscapeNormal => None,
550    }
551}
552
553fn split_inline_flag_value(token: &str) -> Option<(String, String)> {
554    if !token.starts_with("--") || !token.contains('=') {
555        return None;
556    }
557
558    let mut split = token.splitn(2, '=');
559    let flag = split.next().unwrap_or_default().to_string();
560    let value = split.next().unwrap_or_default().to_string();
561    Some((flag, value))
562}
563
564fn push_current(out: &mut Vec<String>, current: &mut String) {
565    if !current.is_empty() {
566        out.push(std::mem::take(current));
567    }
568}
569
570fn push_current_span(
571    out: &mut Vec<TokenSpan>,
572    current: &mut String,
573    current_start: &mut Option<usize>,
574    end: usize,
575) {
576    if !current.is_empty() {
577        out.push(TokenSpan {
578            value: std::mem::take(current),
579            start: current_start.take().unwrap_or(end),
580            end,
581        });
582    } else {
583        *current_start = None;
584    }
585}
586
587fn looks_like_flag_start(token: &str) -> bool {
588    token.starts_with('-') && token != "-" && !is_number(token)
589}
590
591fn is_number(text: &str) -> bool {
592    text.parse::<f64>().is_ok()
593}
594
595fn current_quote_state(text: &str) -> Option<QuoteStyle> {
596    let mut state = LexState::Normal;
597
598    for ch in text.chars() {
599        match state {
600            LexState::Normal => match ch {
601                '\\' => state = LexState::EscapeNormal,
602                '\'' => state = LexState::SingleQuote,
603                '"' => state = LexState::DoubleQuote,
604                _ => {}
605            },
606            LexState::SingleQuote => {
607                if ch == '\'' {
608                    state = LexState::Normal;
609                }
610            }
611            LexState::DoubleQuote => match ch {
612                '"' => state = LexState::Normal,
613                '\\' => state = LexState::EscapeDouble,
614                _ => {}
615            },
616            LexState::EscapeNormal => state = LexState::Normal,
617            LexState::EscapeDouble => state = LexState::DoubleQuote,
618        }
619    }
620
621    match state {
622        LexState::SingleQuote => Some(QuoteStyle::Single),
623        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
624        LexState::Normal | LexState::EscapeNormal => None,
625    }
626}
627
628fn token_replace_start(
629    text_before_cursor: &str,
630    safe_cursor: usize,
631    quote_style: Option<QuoteStyle>,
632) -> usize {
633    if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
634        return safe_cursor;
635    }
636
637    let mut state = LexState::Normal;
638    let mut token_start = 0usize;
639    let mut token_active = false;
640    let mut quote_start = None;
641
642    for (idx, ch) in text_before_cursor.char_indices() {
643        match state {
644            LexState::Normal => {
645                if ch.is_whitespace() {
646                    token_active = false;
647                    token_start = idx + ch.len_utf8();
648                    quote_start = None;
649                    continue;
650                }
651
652                if !token_active {
653                    token_active = true;
654                    token_start = idx;
655                }
656
657                match ch {
658                    '\'' => {
659                        quote_start = Some(idx + ch.len_utf8());
660                        state = LexState::SingleQuote;
661                    }
662                    '"' => {
663                        quote_start = Some(idx + ch.len_utf8());
664                        state = LexState::DoubleQuote;
665                    }
666                    '\\' => state = LexState::EscapeNormal,
667                    _ => {}
668                }
669            }
670            LexState::SingleQuote => {
671                if ch == '\'' {
672                    state = LexState::Normal;
673                }
674            }
675            LexState::DoubleQuote => match ch {
676                '"' => state = LexState::Normal,
677                '\\' => state = LexState::EscapeDouble,
678                _ => {}
679            },
680            LexState::EscapeNormal => state = LexState::Normal,
681            LexState::EscapeDouble => state = LexState::DoubleQuote,
682        }
683    }
684
685    match quote_style {
686        Some(_) => quote_start.unwrap_or(token_start),
687        None => token_start,
688    }
689}
690
691#[cfg(test)]
692mod tests {
693    use crate::completion::model::{FlagOccurrence, QuoteStyle};
694
695    use super::CommandLineParser;
696
697    #[test]
698    fn tokenize_handles_pipes_and_quotes() {
699        let parser = CommandLineParser;
700        let tokens = parser.tokenize("orch provision --request 'name=a|b' | F name");
701        assert_eq!(
702            tokens,
703            vec![
704                "orch",
705                "provision",
706                "--request",
707                "name=a|b",
708                "|",
709                "F",
710                "name"
711            ]
712        );
713    }
714
715    #[test]
716    fn tokenize_falls_back_for_unmatched_quotes() {
717        let parser = CommandLineParser;
718        let tokens = parser.tokenize("--os 'alma");
719        assert_eq!(tokens, vec!["--os", "alma"]);
720    }
721
722    #[test]
723    fn parse_handles_flags_and_pipes() {
724        let parser = CommandLineParser;
725        let tokens = parser.tokenize("orch provision --provider vmware --os rhel | F name");
726        let cmd = parser.parse(&tokens);
727        assert_eq!(cmd.head(), ["orch".to_string(), "provision".to_string()]);
728        assert_eq!(
729            cmd.flag_values("--provider"),
730            Some(&["vmware".to_string()][..])
731        );
732        assert_eq!(cmd.flag_values("--os"), Some(&vec!["rhel".to_string()][..]));
733        assert!(cmd.has_pipe());
734        assert_eq!(cmd.pipes(), ["F".to_string(), "name".to_string()]);
735    }
736
737    #[test]
738    fn parse_handles_end_of_options_and_negative_numbers() {
739        let parser = CommandLineParser;
740
741        let tokens = parser.tokenize("cmd -- --not-a-flag");
742        let cmd = parser.parse(&tokens);
743        assert_eq!(cmd.head(), ["cmd".to_string()]);
744        assert_eq!(
745            cmd.positional_args().cloned().collect::<Vec<_>>(),
746            vec!["--not-a-flag".to_string()]
747        );
748
749        let tokens = parser.tokenize("cmd --count -5");
750        let cmd = parser.parse(&tokens);
751        assert_eq!(
752            cmd.flag_values("--count"),
753            Some(&vec!["-5".to_string()][..])
754        );
755
756        let tokens = parser.tokenize("cmd --os=");
757        let cmd = parser.parse(&tokens);
758        assert_eq!(cmd.flag_values("--os"), Some(&[][..]));
759    }
760
761    #[test]
762    fn parse_preserves_repeated_flag_occurrence_boundaries() {
763        let parser = CommandLineParser;
764        let tokens = parser.tokenize("cmd --tag red --mode fast --tag blue");
765        let cmd = parser.parse(&tokens);
766        let occurrences = cmd.flag_occurrences().cloned().collect::<Vec<_>>();
767
768        assert_eq!(occurrences.len(), 3);
769        assert_eq!(occurrences[0].name, "--tag");
770        assert_eq!(occurrences[0].values, vec!["red".to_string()]);
771        assert_eq!(occurrences[1].name, "--mode");
772        assert_eq!(occurrences[1].values, vec!["fast".to_string()]);
773        assert_eq!(occurrences[2].name, "--tag");
774        assert_eq!(occurrences[2].values, vec!["blue".to_string()]);
775    }
776
777    #[test]
778    fn compute_stub_respects_equals_boundary() {
779        let parser = CommandLineParser;
780        let before = "cmd --flag=";
781        let cursor = parser.cursor_state(before, before.len());
782        assert_eq!(cursor.token_stub, "");
783    }
784
785    #[test]
786    fn compute_stub_quote_tracks_unfinished_quotes() {
787        let parser = CommandLineParser;
788        assert_eq!(
789            parser.compute_stub_quote("cmd --name \"al"),
790            Some(QuoteStyle::Double)
791        );
792        assert_eq!(
793            parser.compute_stub_quote("cmd --name 'al"),
794            Some(QuoteStyle::Single)
795        );
796        assert_eq!(parser.compute_stub_quote("cmd --name al"), None);
797    }
798
799    #[test]
800    fn cursor_state_tracks_replace_range_inside_open_quotes() {
801        let parser = CommandLineParser;
802        let line = "ldap user \"oi";
803        let cursor = parser.cursor_state(line, line.len());
804
805        assert_eq!(cursor.token_stub, "oi");
806        assert_eq!(cursor.raw_stub, "oi");
807        assert_eq!(cursor.replace_range, 11..13);
808        assert_eq!(cursor.quote_style, Some(QuoteStyle::Double));
809    }
810
811    #[test]
812    fn analyze_reuses_one_cursor_snapshot_for_full_and_prefix_parse() {
813        let parser = CommandLineParser;
814        let line = "orch provision --provider vmware --os rhel | F name";
815        let cursor = "orch provision --provider vmware".len();
816        let analyzed = parser.analyze(line, cursor);
817
818        assert_eq!(
819            analyzed.parsed.full_tokens,
820            vec![
821                "orch",
822                "provision",
823                "--provider",
824                "vmware",
825                "--os",
826                "rhel",
827                "|",
828                "F",
829                "name",
830            ]
831        );
832        assert_eq!(
833            analyzed.parsed.cursor_tokens,
834            vec!["orch", "provision", "--provider", "vmware"]
835        );
836        assert_eq!(
837            analyzed.parsed.cursor_cmd.flag_values("--provider"),
838            Some(&["vmware".to_string()][..])
839        );
840    }
841
842    #[test]
843    fn analyze_preserves_cursor_quote_state_inside_balanced_line() {
844        let parser = CommandLineParser;
845        let line = r#"ldap user "oi ste" --format json"#;
846        let cursor = r#"ldap user "oi"#.len();
847        let analyzed = parser.analyze(line, cursor);
848
849        assert_eq!(analyzed.cursor.token_stub, "oi");
850        assert_eq!(analyzed.cursor.raw_stub, "oi");
851        assert_eq!(analyzed.cursor.quote_style, Some(QuoteStyle::Double));
852    }
853
854    #[test]
855    fn tokenize_with_spans_preserves_offsets_for_quotes_and_pipes() {
856        let parser = CommandLineParser;
857        let source = r#"ldap user "alice smith" | P uid"#;
858        let spans = parser.tokenize_with_spans(source);
859
860        assert_eq!(spans[0].value, "ldap");
861        assert_eq!(spans[0].start, 0);
862        assert_eq!(spans[2].value, "alice smith");
863        assert_eq!(&source[spans[2].start..spans[2].end], "\"alice smith\"");
864        assert_eq!(spans[3].value, "|");
865    }
866
867    #[test]
868    fn parse_keeps_inline_flag_values_and_repeated_empty_occurrences() {
869        let parser = CommandLineParser;
870        let tokens = parser.tokenize("cmd --format=json --os= --format=table");
871        let cmd = parser.parse(&tokens);
872
873        assert_eq!(
874            cmd.flag_occurrences().cloned().collect::<Vec<_>>(),
875            vec![
876                FlagOccurrence {
877                    name: "--format".to_string(),
878                    values: vec!["json".to_string()],
879                },
880                FlagOccurrence {
881                    name: "--os".to_string(),
882                    values: vec![],
883                },
884                FlagOccurrence {
885                    name: "--format".to_string(),
886                    values: vec!["table".to_string()],
887                },
888            ]
889        );
890    }
891
892    #[test]
893    fn analyze_clamps_non_char_boundary_cursor_back_to_safe_boundary() {
894        let parser = CommandLineParser;
895        let line = "ldap user å";
896        let analyzed = parser.analyze(line, line.len() - 1);
897
898        assert!(analyzed.parsed.safe_cursor < line.len());
899        assert_eq!(analyzed.cursor.token_stub, "");
900    }
901
902    #[test]
903    fn parse_switches_to_tail_mode_after_first_flag_like_token() {
904        let parser = CommandLineParser;
905        let tokens = parser.tokenize("ldap user --provider vmware region eu-central");
906        let cmd = parser.parse(&tokens);
907
908        assert_eq!(cmd.head(), ["ldap".to_string(), "user".to_string()]);
909        assert_eq!(
910            cmd.flag_values("--provider"),
911            Some(
912                &[
913                    "vmware".to_string(),
914                    "region".to_string(),
915                    "eu-central".to_string()
916                ][..]
917            )
918        );
919    }
920
921    #[test]
922    fn parse_treats_pipe_after_double_dash_as_dsl_boundary() {
923        let parser = CommandLineParser;
924        let tokens = parser.tokenize("cmd -- literal | F name");
925        let cmd = parser.parse(&tokens);
926
927        assert_eq!(cmd.head(), ["cmd".to_string()]);
928        assert_eq!(
929            cmd.positional_args().cloned().collect::<Vec<_>>(),
930            vec!["literal".to_string()]
931        );
932        assert!(cmd.has_pipe());
933        assert_eq!(cmd.pipes(), ["F".to_string(), "name".to_string()]);
934    }
935
936    #[test]
937    fn tokenize_with_spans_falls_back_for_unmatched_quotes() {
938        let parser = CommandLineParser;
939        let spans = parser.tokenize_with_spans("cmd --name 'alice");
940
941        assert_eq!(spans.len(), 3);
942        assert_eq!(spans[0].value, "cmd");
943        assert_eq!(spans[1].value, "--name");
944        assert_eq!(spans[2].value, "'alice");
945    }
946
947    #[test]
948    fn analyze_falls_back_when_cursor_is_inside_unbalanced_quote() {
949        let parser = CommandLineParser;
950        let line = r#"ldap user "alice"#;
951        let analyzed = parser.analyze(line, line.len());
952
953        assert_eq!(analyzed.parsed.full_tokens, vec!["ldap", "user", "alice"]);
954        assert_eq!(analyzed.parsed.cursor_tokens, vec!["ldap", "user", "alice"]);
955        assert_eq!(analyzed.cursor.quote_style, Some(QuoteStyle::Double));
956        assert_eq!(analyzed.cursor.token_stub, "alice");
957    }
958}