Skip to main content

osp_cli/completion/
parse.rs

1//! Shell-like tokenization and cursor analysis for completion.
2//!
3//! This module exists to turn a partially typed input line plus a cursor offset
4//! into the structured data the completion engine actually needs: command path,
5//! tail items, pipe mode, and the active replacement span.
6//!
7//! Contract:
8//!
9//! - parsing here stays permissive for interactive use
10//! - the parser owns lexical structure, not suggestion ranking
11//! - callers should rely on `ParsedCursorLine` and `CursorState` rather than
12//!   re-deriving cursor spans themselves
13
14use crate::completion::model::{CommandLine, CursorState, FlagOccurrence, ParsedLine, QuoteStyle};
15use std::collections::BTreeMap;
16
17/// Token value with byte offsets into the original input line.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct TokenSpan {
20    /// Unescaped token text.
21    pub value: String,
22    /// Inclusive start byte offset.
23    pub start: usize,
24    /// Exclusive end byte offset.
25    pub end: usize,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29enum LexState {
30    Normal,
31    SingleQuote,
32    DoubleQuote,
33    EscapeNormal,
34    EscapeDouble,
35}
36
37/// Parsed line assembly after tokenization.
38///
39/// The parser keeps the command head separate until it sees the first option-like
40/// token. After that point the rest of the line is interpreted as flags, args,
41/// or pipes. That mirrors how the completer reasons about scope: command path
42/// first, then option/value mode.
43#[derive(Debug, Default)]
44struct ParseState {
45    head: Vec<String>,
46    tail: Vec<crate::completion::model::TailItem>,
47    flag_values: BTreeMap<String, Vec<String>>,
48    pipes: Vec<String>,
49    has_pipe: bool,
50}
51
52impl ParseState {
53    fn finish(self) -> CommandLine {
54        CommandLine {
55            head: self.head,
56            tail: self.tail,
57            flag_values: self.flag_values,
58            pipes: self.pipes,
59            has_pipe: self.has_pipe,
60        }
61    }
62
63    fn start_pipe<'a>(&mut self, iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>) {
64        self.has_pipe = true;
65        self.pipes.extend(iter.cloned());
66    }
67
68    fn collect_positional_tail<'a>(
69        &mut self,
70        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
71    ) {
72        while let Some(next) = iter.next() {
73            if next == "|" {
74                self.start_pipe(iter);
75                break;
76            }
77            self.tail
78                .push(crate::completion::model::TailItem::Positional(next.clone()));
79        }
80    }
81
82    fn parse_flag_tail<'a>(
83        &mut self,
84        first_token: String,
85        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
86    ) {
87        // Once the parser has seen the first flag-like token, the rest of the
88        // line stays in "tail mode". From that point on we only distinguish
89        // between more flags, their values, `--`, and a pipe into DSL mode.
90        let mut current = first_token;
91        loop {
92            if current == "|" {
93                self.start_pipe(iter);
94                return;
95            }
96
97            if current == "--" {
98                self.collect_positional_tail(iter);
99                return;
100            }
101
102            if let Some((flag, value)) = split_inline_flag_value(&current) {
103                let mut occurrence_values = Vec::new();
104                if !value.is_empty() {
105                    self.flag_values
106                        .entry(flag.clone())
107                        .or_default()
108                        .push(value.clone());
109                    occurrence_values.push(value);
110                } else {
111                    self.flag_values.entry(flag.clone()).or_default();
112                }
113                self.tail
114                    .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
115                        name: flag.clone(),
116                        values: occurrence_values,
117                    }));
118                let Some(next) = iter.next().cloned() else {
119                    break;
120                };
121                current = next;
122                continue;
123            }
124
125            let flag = current;
126            let values = self.consume_flag_values(iter);
127            self.tail
128                .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
129                    name: flag.clone(),
130                    values: values.clone(),
131                }));
132            self.flag_values
133                .entry(flag.clone())
134                .or_default()
135                .extend(values);
136
137            let Some(next) = iter.next().cloned() else {
138                break;
139            };
140            current = next;
141        }
142    }
143
144    fn consume_flag_values<'a>(
145        &mut self,
146        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
147    ) -> Vec<String> {
148        let mut values = Vec::new();
149
150        while let Some(next) = iter.peek() {
151            if *next == "|" || *next == "--" {
152                break;
153            }
154            if looks_like_flag_start(next) {
155                break;
156            }
157
158            values.push((*next).clone());
159            iter.next();
160        }
161
162        values
163    }
164}
165
166/// Shell-like parser used by the completion engine.
167#[derive(Debug, Clone, Default)]
168pub struct CommandLineParser;
169
170/// Parsed command-line state for the full line and the cursor position.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct ParsedCursorLine {
173    /// Parsed tokens and command structure.
174    pub parsed: ParsedLine,
175    /// Cursor-local replacement information.
176    pub cursor: CursorState,
177}
178
179#[derive(Debug, Clone)]
180struct CursorTokenization {
181    full_tokens: Vec<String>,
182    cursor_tokens: Vec<String>,
183    cursor_quote_style: Option<QuoteStyle>,
184}
185
186impl CommandLineParser {
187    /// Tokenizes a line using shell-like quoting rules.
188    ///
189    /// Tokenization is intentionally permissive for interactive use. If the
190    /// user is mid-quote while pressing tab, we retry with a synthetic closing
191    /// quote before finally falling back to whitespace splitting.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use osp_cli::completion::CommandLineParser;
197    ///
198    /// let parser = CommandLineParser;
199    ///
200    /// assert_eq!(
201    ///     parser.tokenize(r#"ldap user "alice smith""#),
202    ///     vec!["ldap", "user", "alice smith"]
203    /// );
204    /// ```
205    pub fn tokenize(&self, line: &str) -> Vec<String> {
206        self.tokenize_inner(line)
207            .or_else(|| self.tokenize_inner(&format!("{line}\"")))
208            .or_else(|| self.tokenize_inner(&format!("{line}'")))
209            .unwrap_or_else(|| line.split_whitespace().map(str::to_string).collect())
210    }
211
212    /// Tokenizes `line` and preserves byte spans for each token when possible.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use osp_cli::completion::CommandLineParser;
218    ///
219    /// let spans = CommandLineParser.tokenize_with_spans("ldap user alice");
220    ///
221    /// assert_eq!(spans[0].value, "ldap");
222    /// assert_eq!(spans[1].start, 5);
223    /// assert_eq!(spans[2].end, 15);
224    /// ```
225    pub fn tokenize_with_spans(&self, line: &str) -> Vec<TokenSpan> {
226        self.tokenize_with_spans_inner(line)
227            .or_else(|| self.tokenize_with_spans_fallback(line))
228            .unwrap_or_default()
229    }
230
231    /// Parse the full line and the cursor-local prefix from one lexical walk.
232    ///
233    /// The common case keeps completion analysis in one tokenization pass. If
234    /// the line ends in an unmatched quote we fall back to the permissive
235    /// tokenization path so interactive behavior stays unchanged.
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// use osp_cli::completion::CommandLineParser;
241    ///
242    /// let parsed = CommandLineParser.analyze("ldap user ali", 13);
243    ///
244    /// assert_eq!(parsed.parsed.cursor_tokens, vec!["ldap", "user", "ali"]);
245    /// assert_eq!(parsed.cursor.token_stub, "ali");
246    /// ```
247    pub fn analyze(&self, line: &str, cursor: usize) -> ParsedCursorLine {
248        let safe_cursor = clamp_to_char_boundary(line, cursor.min(line.len()));
249        let before_cursor = &line[..safe_cursor];
250
251        if let Some(tokenized) = self.tokenize_with_cursor_inner(line, safe_cursor) {
252            let full_cmd = self.parse(&tokenized.full_tokens);
253            let cursor_cmd = self.parse(&tokenized.cursor_tokens);
254            let cursor = self.build_cursor_state(
255                before_cursor,
256                safe_cursor,
257                &tokenized.cursor_tokens,
258                tokenized.cursor_quote_style,
259            );
260
261            return ParsedCursorLine {
262                parsed: ParsedLine {
263                    safe_cursor,
264                    full_tokens: tokenized.full_tokens,
265                    cursor_tokens: tokenized.cursor_tokens,
266                    full_cmd,
267                    cursor_cmd,
268                },
269                cursor,
270            };
271        }
272
273        let full_tokens = self.tokenize(line);
274        let cursor_tokens = self.tokenize(before_cursor);
275        let full_cmd = self.parse(&full_tokens);
276        let cursor_cmd = self.parse(&cursor_tokens);
277        let cursor = self.cursor_state(before_cursor, safe_cursor);
278
279        ParsedCursorLine {
280            parsed: ParsedLine {
281                safe_cursor,
282                full_tokens,
283                cursor_tokens,
284                full_cmd,
285                cursor_cmd,
286            },
287            cursor,
288        }
289    }
290
291    fn tokenize_inner(&self, line: &str) -> Option<Vec<String>> {
292        let mut out = Vec::new();
293        let mut state = LexState::Normal;
294        let mut current = String::new();
295
296        for ch in line.chars() {
297            match state {
298                LexState::Normal => {
299                    if ch.is_whitespace() {
300                        push_current(&mut out, &mut current);
301                    } else {
302                        match ch {
303                            '|' => {
304                                push_current(&mut out, &mut current);
305                                out.push("|".to_string());
306                            }
307                            '\\' => state = LexState::EscapeNormal,
308                            '\'' => state = LexState::SingleQuote,
309                            '"' => state = LexState::DoubleQuote,
310                            _ => current.push(ch),
311                        }
312                    }
313                }
314                LexState::SingleQuote => {
315                    if ch == '\'' {
316                        state = LexState::Normal;
317                    } else {
318                        current.push(ch);
319                    }
320                }
321                LexState::DoubleQuote => match ch {
322                    '"' => state = LexState::Normal,
323                    '\\' => state = LexState::EscapeDouble,
324                    _ => current.push(ch),
325                },
326                LexState::EscapeNormal => {
327                    current.push(ch);
328                    state = LexState::Normal;
329                }
330                LexState::EscapeDouble => {
331                    current.push(ch);
332                    state = LexState::DoubleQuote;
333                }
334            }
335        }
336
337        match state {
338            LexState::Normal => {
339                push_current(&mut out, &mut current);
340                Some(out)
341            }
342            _ => None,
343        }
344    }
345
346    fn tokenize_with_spans_inner(&self, line: &str) -> Option<Vec<TokenSpan>> {
347        let mut out = Vec::new();
348        let mut state = LexState::Normal;
349        let mut current = String::new();
350        let mut current_start = None;
351
352        for (idx, ch) in line.char_indices() {
353            match state {
354                LexState::Normal => {
355                    if ch.is_whitespace() {
356                        push_current_span(&mut out, &mut current, &mut current_start, idx);
357                    } else {
358                        match ch {
359                            '|' => {
360                                push_current_span(&mut out, &mut current, &mut current_start, idx);
361                                out.push(TokenSpan {
362                                    value: "|".to_string(),
363                                    start: idx,
364                                    end: idx + ch.len_utf8(),
365                                });
366                            }
367                            '\\' => {
368                                current_start.get_or_insert(idx);
369                                state = LexState::EscapeNormal;
370                            }
371                            '\'' => {
372                                current_start.get_or_insert(idx);
373                                state = LexState::SingleQuote;
374                            }
375                            '"' => {
376                                current_start.get_or_insert(idx);
377                                state = LexState::DoubleQuote;
378                            }
379                            _ => {
380                                current_start.get_or_insert(idx);
381                                current.push(ch);
382                            }
383                        }
384                    }
385                }
386                LexState::SingleQuote => {
387                    if ch == '\'' {
388                        state = LexState::Normal;
389                    } else {
390                        current.push(ch);
391                    }
392                }
393                LexState::DoubleQuote => match ch {
394                    '"' => state = LexState::Normal,
395                    '\\' => state = LexState::EscapeDouble,
396                    _ => current.push(ch),
397                },
398                LexState::EscapeNormal => {
399                    current.push(ch);
400                    state = LexState::Normal;
401                }
402                LexState::EscapeDouble => {
403                    current.push(ch);
404                    state = LexState::DoubleQuote;
405                }
406            }
407        }
408
409        match state {
410            LexState::Normal => {
411                push_current_span(&mut out, &mut current, &mut current_start, line.len());
412                Some(out)
413            }
414            _ => None,
415        }
416    }
417
418    fn tokenize_with_spans_fallback(&self, line: &str) -> Option<Vec<TokenSpan>> {
419        let mut out = Vec::new();
420        let mut search_from = 0usize;
421        for token in line.split_whitespace() {
422            let rel = line.get(search_from..)?.find(token)?;
423            let start = search_from + rel;
424            let end = start + token.len();
425            out.push(TokenSpan {
426                value: token.to_string(),
427                start,
428                end,
429            });
430            search_from = end;
431        }
432        Some(out)
433    }
434
435    /// Parses tokens into command-path, flag, positional, and pipe segments.
436    ///
437    /// # Examples
438    ///
439    /// ```
440    /// use osp_cli::completion::CommandLineParser;
441    ///
442    /// let tokens = vec![
443    ///     "ldap".to_string(),
444    ///     "user".to_string(),
445    ///     "--json".to_string(),
446    ///     "|".to_string(),
447    ///     "P".to_string(),
448    /// ];
449    /// let parsed = CommandLineParser.parse(&tokens);
450    ///
451    /// assert_eq!(parsed.head(), &["ldap".to_string(), "user".to_string()]);
452    /// assert!(parsed.has_pipe());
453    /// ```
454    pub fn parse(&self, tokens: &[String]) -> CommandLine {
455        let mut state = ParseState::default();
456        let mut iter = tokens.iter().peekable();
457
458        while let Some(token) = iter.next() {
459            if token == "|" {
460                state.start_pipe(&mut iter);
461                return state.finish();
462            }
463            if token == "--" {
464                state.collect_positional_tail(&mut iter);
465                return state.finish();
466            }
467            if token.starts_with('-') {
468                state.parse_flag_tail(token.clone(), &mut iter);
469                return state.finish();
470            }
471            state.head.push(token.clone());
472        }
473
474        state.finish()
475    }
476
477    /// Computes the cursor replacement range and current token stub.
478    ///
479    /// # Examples
480    ///
481    /// ```
482    /// use osp_cli::completion::CommandLineParser;
483    ///
484    /// let cursor = CommandLineParser.cursor_state("ldap user ali", 13);
485    ///
486    /// assert_eq!(cursor.token_stub, "ali");
487    /// assert_eq!(cursor.replace_range, 10..13);
488    /// ```
489    pub fn cursor_state(&self, text_before_cursor: &str, safe_cursor: usize) -> CursorState {
490        let tokens = self.tokenize(text_before_cursor);
491        self.build_cursor_state(
492            text_before_cursor,
493            safe_cursor,
494            &tokens,
495            self.compute_stub_quote(text_before_cursor),
496        )
497    }
498
499    fn build_cursor_state(
500        &self,
501        text_before_cursor: &str,
502        safe_cursor: usize,
503        tokens: &[String],
504        quote_style: Option<QuoteStyle>,
505    ) -> CursorState {
506        let token_stub = self.compute_stub(text_before_cursor, tokens);
507        let replace_start = token_replace_start(text_before_cursor, safe_cursor, quote_style);
508        let raw_stub = text_before_cursor
509            .get(replace_start..safe_cursor)
510            .unwrap_or("")
511            .to_string();
512
513        CursorState::new(
514            token_stub,
515            raw_stub,
516            replace_start..safe_cursor,
517            quote_style,
518        )
519    }
520
521    fn tokenize_with_cursor_inner(
522        &self,
523        line: &str,
524        safe_cursor: usize,
525    ) -> Option<CursorTokenization> {
526        let mut out = Vec::new();
527        let mut state = LexState::Normal;
528        let mut current = String::new();
529        let mut cursor_tokens = None;
530        let mut cursor_quote_style = None;
531
532        for (idx, ch) in line.char_indices() {
533            if idx == safe_cursor && cursor_tokens.is_none() {
534                cursor_tokens = Some(snapshot_tokens(&out, &current));
535                cursor_quote_style = Some(quote_style_for_state(state));
536            }
537
538            match state {
539                LexState::Normal => {
540                    if ch.is_whitespace() {
541                        push_current(&mut out, &mut current);
542                    } else {
543                        match ch {
544                            '|' => {
545                                push_current(&mut out, &mut current);
546                                out.push("|".to_string());
547                            }
548                            '\\' => state = LexState::EscapeNormal,
549                            '\'' => state = LexState::SingleQuote,
550                            '"' => state = LexState::DoubleQuote,
551                            _ => current.push(ch),
552                        }
553                    }
554                }
555                LexState::SingleQuote => {
556                    if ch == '\'' {
557                        state = LexState::Normal;
558                    } else {
559                        current.push(ch);
560                    }
561                }
562                LexState::DoubleQuote => match ch {
563                    '"' => state = LexState::Normal,
564                    '\\' => state = LexState::EscapeDouble,
565                    _ => current.push(ch),
566                },
567                LexState::EscapeNormal => {
568                    current.push(ch);
569                    state = LexState::Normal;
570                }
571                LexState::EscapeDouble => {
572                    current.push(ch);
573                    state = LexState::DoubleQuote;
574                }
575            }
576        }
577
578        if safe_cursor == line.len() && cursor_tokens.is_none() {
579            cursor_tokens = Some(snapshot_tokens(&out, &current));
580            cursor_quote_style = Some(quote_style_for_state(state));
581        }
582
583        match state {
584            LexState::Normal => {
585                push_current(&mut out, &mut current);
586                Some(CursorTokenization {
587                    full_tokens: out,
588                    cursor_tokens: cursor_tokens.unwrap_or_default(),
589                    cursor_quote_style: cursor_quote_style.unwrap_or(None),
590                })
591            }
592            _ => None,
593        }
594    }
595
596    fn compute_stub(&self, text_before_cursor: &str, tokens: &[String]) -> String {
597        if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
598            return String::new();
599        }
600        let Some(last) = tokens.last() else {
601            return String::new();
602        };
603
604        if last.starts_with("--") && last.ends_with('=') && last.contains('=') {
605            return String::new();
606        }
607
608        last.clone()
609    }
610
611    /// Returns the active quote style for the token being edited, if any.
612    ///
613    /// # Examples
614    ///
615    /// ```
616    /// use osp_cli::completion::{CommandLineParser, QuoteStyle};
617    ///
618    /// assert_eq!(
619    ///     CommandLineParser.compute_stub_quote(r#"ldap user "ali"#),
620    ///     Some(QuoteStyle::Double)
621    /// );
622    /// ```
623    pub fn compute_stub_quote(&self, text_before_cursor: &str) -> Option<QuoteStyle> {
624        current_quote_state(text_before_cursor)
625    }
626}
627
628fn snapshot_tokens(out: &[String], current: &str) -> Vec<String> {
629    let mut tokens = out.to_vec();
630    if !current.is_empty() {
631        tokens.push(current.to_string());
632    }
633    tokens
634}
635
636fn clamp_to_char_boundary(input: &str, cursor: usize) -> usize {
637    if input.is_char_boundary(cursor) {
638        return cursor;
639    }
640    let mut safe = cursor;
641    while safe > 0 && !input.is_char_boundary(safe) {
642        safe -= 1;
643    }
644    safe
645}
646
647fn quote_style_for_state(state: LexState) -> Option<QuoteStyle> {
648    match state {
649        LexState::SingleQuote => Some(QuoteStyle::Single),
650        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
651        LexState::Normal | LexState::EscapeNormal => None,
652    }
653}
654
655fn split_inline_flag_value(token: &str) -> Option<(String, String)> {
656    if !token.starts_with("--") || !token.contains('=') {
657        return None;
658    }
659
660    let mut split = token.splitn(2, '=');
661    let flag = split.next().unwrap_or_default().to_string();
662    let value = split.next().unwrap_or_default().to_string();
663    Some((flag, value))
664}
665
666fn push_current(out: &mut Vec<String>, current: &mut String) {
667    if !current.is_empty() {
668        out.push(std::mem::take(current));
669    }
670}
671
672fn push_current_span(
673    out: &mut Vec<TokenSpan>,
674    current: &mut String,
675    current_start: &mut Option<usize>,
676    end: usize,
677) {
678    if !current.is_empty() {
679        out.push(TokenSpan {
680            value: std::mem::take(current),
681            start: current_start.take().unwrap_or(end),
682            end,
683        });
684    } else {
685        *current_start = None;
686    }
687}
688
689fn looks_like_flag_start(token: &str) -> bool {
690    token.starts_with('-') && token != "-" && !is_number(token)
691}
692
693fn is_number(text: &str) -> bool {
694    text.parse::<f64>().is_ok()
695}
696
697fn current_quote_state(text: &str) -> Option<QuoteStyle> {
698    let mut state = LexState::Normal;
699
700    for ch in text.chars() {
701        match state {
702            LexState::Normal => match ch {
703                '\\' => state = LexState::EscapeNormal,
704                '\'' => state = LexState::SingleQuote,
705                '"' => state = LexState::DoubleQuote,
706                _ => {}
707            },
708            LexState::SingleQuote => {
709                if ch == '\'' {
710                    state = LexState::Normal;
711                }
712            }
713            LexState::DoubleQuote => match ch {
714                '"' => state = LexState::Normal,
715                '\\' => state = LexState::EscapeDouble,
716                _ => {}
717            },
718            LexState::EscapeNormal => state = LexState::Normal,
719            LexState::EscapeDouble => state = LexState::DoubleQuote,
720        }
721    }
722
723    match state {
724        LexState::SingleQuote => Some(QuoteStyle::Single),
725        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
726        LexState::Normal | LexState::EscapeNormal => None,
727    }
728}
729
730fn token_replace_start(
731    text_before_cursor: &str,
732    safe_cursor: usize,
733    quote_style: Option<QuoteStyle>,
734) -> usize {
735    if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
736        return safe_cursor;
737    }
738
739    let mut state = LexState::Normal;
740    let mut token_start = 0usize;
741    let mut token_active = false;
742    let mut quote_start = None;
743
744    for (idx, ch) in text_before_cursor.char_indices() {
745        match state {
746            LexState::Normal => {
747                if ch.is_whitespace() {
748                    token_active = false;
749                    token_start = idx + ch.len_utf8();
750                    quote_start = None;
751                    continue;
752                }
753
754                if !token_active {
755                    token_active = true;
756                    token_start = idx;
757                }
758
759                match ch {
760                    '\'' => {
761                        quote_start = Some(idx + ch.len_utf8());
762                        state = LexState::SingleQuote;
763                    }
764                    '"' => {
765                        quote_start = Some(idx + ch.len_utf8());
766                        state = LexState::DoubleQuote;
767                    }
768                    '\\' => state = LexState::EscapeNormal,
769                    _ => {}
770                }
771            }
772            LexState::SingleQuote => {
773                if ch == '\'' {
774                    state = LexState::Normal;
775                }
776            }
777            LexState::DoubleQuote => match ch {
778                '"' => state = LexState::Normal,
779                '\\' => state = LexState::EscapeDouble,
780                _ => {}
781            },
782            LexState::EscapeNormal => state = LexState::Normal,
783            LexState::EscapeDouble => state = LexState::DoubleQuote,
784        }
785    }
786
787    match quote_style {
788        Some(_) => quote_start.unwrap_or(token_start),
789        None => token_start,
790    }
791}
792
793#[cfg(test)]
794mod tests {
795    use crate::completion::model::{FlagOccurrence, QuoteStyle};
796
797    use super::CommandLineParser;
798
799    fn parser() -> CommandLineParser {
800        CommandLineParser
801    }
802
803    mod scanner_contracts {
804        use super::*;
805
806        #[test]
807        fn tokenization_preserves_pipes_and_recovers_from_unmatched_quotes() {
808            let parser = parser();
809
810            assert_eq!(
811                parser.tokenize("orch provision --request 'name=a|b' | F name"),
812                vec![
813                    "orch",
814                    "provision",
815                    "--request",
816                    "name=a|b",
817                    "|",
818                    "F",
819                    "name",
820                ]
821            );
822            assert_eq!(parser.tokenize("--os 'alma"), vec!["--os", "alma"]);
823
824            let spans = parser.tokenize_with_spans("cmd --name 'alice");
825            assert_eq!(spans.len(), 3);
826            assert_eq!(spans[0].value, "cmd");
827            assert_eq!(spans[1].value, "--name");
828            assert_eq!(spans[2].value, "'alice");
829        }
830
831        #[test]
832        fn span_tracking_preserves_offsets_for_balanced_quotes_and_pipes() {
833            let parser = parser();
834            let source = r#"ldap user "alice smith" | P uid"#;
835            let spans = parser.tokenize_with_spans(source);
836
837            assert_eq!(spans[0].value, "ldap");
838            assert_eq!(spans[0].start, 0);
839            assert_eq!(spans[2].value, "alice smith");
840            assert_eq!(&source[spans[2].start..spans[2].end], "\"alice smith\"");
841            assert_eq!(spans[3].value, "|");
842        }
843    }
844
845    mod command_shape_contracts {
846        use super::*;
847
848        #[test]
849        fn parse_tracks_flag_values_pipes_and_repeated_occurrence_boundaries() {
850            let parser = parser();
851
852            let tokens = parser.tokenize("orch provision --provider vmware --os rhel | F name");
853            let cmd = parser.parse(&tokens);
854            assert_eq!(cmd.head(), ["orch".to_string(), "provision".to_string()]);
855            assert_eq!(
856                cmd.flag_values("--provider"),
857                Some(&["vmware".to_string()][..])
858            );
859            assert_eq!(cmd.flag_values("--os"), Some(&["rhel".to_string()][..]));
860            assert!(cmd.has_pipe());
861            assert_eq!(cmd.pipes(), ["F".to_string(), "name".to_string()]);
862
863            let repeated = parser.parse(&parser.tokenize("cmd --tag red --mode fast --tag blue"));
864            assert_eq!(
865                repeated.flag_occurrences().cloned().collect::<Vec<_>>(),
866                vec![
867                    FlagOccurrence {
868                        name: "--tag".to_string(),
869                        values: vec!["red".to_string()],
870                    },
871                    FlagOccurrence {
872                        name: "--mode".to_string(),
873                        values: vec!["fast".to_string()],
874                    },
875                    FlagOccurrence {
876                        name: "--tag".to_string(),
877                        values: vec!["blue".to_string()],
878                    },
879                ]
880            );
881        }
882
883        #[test]
884        fn parse_respects_option_boundaries_inline_values_and_negative_numbers() {
885            let parser = parser();
886
887            let after_double_dash = parser.parse(&parser.tokenize("cmd -- --not-a-flag"));
888            assert_eq!(after_double_dash.head(), ["cmd".to_string()]);
889            assert_eq!(
890                after_double_dash
891                    .positional_args()
892                    .cloned()
893                    .collect::<Vec<_>>(),
894                vec!["--not-a-flag".to_string()]
895            );
896
897            let negative_value = parser.parse(&parser.tokenize("cmd --count -5"));
898            assert_eq!(
899                negative_value.flag_values("--count"),
900                Some(&["-5".to_string()][..])
901            );
902
903            let inline = parser.parse(&parser.tokenize("cmd --format=json --os= --format=table"));
904            assert_eq!(inline.flag_values("--os"), Some(&[][..]));
905            assert_eq!(
906                inline.flag_occurrences().cloned().collect::<Vec<_>>(),
907                vec![
908                    FlagOccurrence {
909                        name: "--format".to_string(),
910                        values: vec!["json".to_string()],
911                    },
912                    FlagOccurrence {
913                        name: "--os".to_string(),
914                        values: vec![],
915                    },
916                    FlagOccurrence {
917                        name: "--format".to_string(),
918                        values: vec!["table".to_string()],
919                    },
920                ]
921            );
922        }
923
924        #[test]
925        fn parse_distinguishes_tail_mode_from_dsl_boundaries() {
926            let parser = parser();
927
928            let tail =
929                parser.parse(&parser.tokenize("ldap user --provider vmware region eu-central"));
930            assert_eq!(tail.head(), ["ldap".to_string(), "user".to_string()]);
931            assert_eq!(
932                tail.flag_values("--provider"),
933                Some(
934                    &[
935                        "vmware".to_string(),
936                        "region".to_string(),
937                        "eu-central".to_string(),
938                    ][..]
939                )
940            );
941
942            let dsl = parser.parse(&parser.tokenize("cmd -- literal | F name"));
943            assert_eq!(dsl.head(), ["cmd".to_string()]);
944            assert_eq!(
945                dsl.positional_args().cloned().collect::<Vec<_>>(),
946                vec!["literal".to_string()]
947            );
948            assert!(dsl.has_pipe());
949            assert_eq!(dsl.pipes(), ["F".to_string(), "name".to_string()]);
950        }
951    }
952
953    mod cursor_analysis_contracts {
954        use super::*;
955
956        #[test]
957        fn cursor_state_tracks_equals_boundaries_and_open_quote_ranges() {
958            let parser = parser();
959
960            let cursor = parser.cursor_state("cmd --flag=", "cmd --flag=".len());
961            assert_eq!(cursor.token_stub, "");
962
963            assert_eq!(
964                parser.compute_stub_quote("cmd --name \"al"),
965                Some(QuoteStyle::Double)
966            );
967            assert_eq!(
968                parser.compute_stub_quote("cmd --name 'al"),
969                Some(QuoteStyle::Single)
970            );
971            assert_eq!(parser.compute_stub_quote("cmd --name al"), None);
972
973            let line = "ldap user \"oi";
974            let cursor = parser.cursor_state(line, line.len());
975            assert_eq!(cursor.token_stub, "oi");
976            assert_eq!(cursor.raw_stub, "oi");
977            assert_eq!(cursor.replace_range, 11..13);
978            assert_eq!(cursor.quote_style, Some(QuoteStyle::Double));
979        }
980
981        #[test]
982        fn analyze_reuses_safe_cursor_snapshots_for_prefix_and_balanced_quotes() {
983            let parser = parser();
984
985            let line = "orch provision --provider vmware --os rhel | F name";
986            let cursor = "orch provision --provider vmware".len();
987            let analyzed = parser.analyze(line, cursor);
988            assert_eq!(
989                analyzed.parsed.full_tokens,
990                vec![
991                    "orch",
992                    "provision",
993                    "--provider",
994                    "vmware",
995                    "--os",
996                    "rhel",
997                    "|",
998                    "F",
999                    "name",
1000                ]
1001            );
1002            assert_eq!(
1003                analyzed.parsed.cursor_tokens,
1004                vec!["orch", "provision", "--provider", "vmware"]
1005            );
1006            assert_eq!(
1007                analyzed.parsed.cursor_cmd.flag_values("--provider"),
1008                Some(&["vmware".to_string()][..])
1009            );
1010
1011            let balanced = parser.analyze(
1012                r#"ldap user "oi ste" --format json"#,
1013                r#"ldap user "oi"#.len(),
1014            );
1015            assert_eq!(balanced.cursor.token_stub, "oi");
1016            assert_eq!(balanced.cursor.raw_stub, "oi");
1017            assert_eq!(balanced.cursor.quote_style, Some(QuoteStyle::Double));
1018        }
1019
1020        #[test]
1021        fn analyze_recovers_from_unbalanced_quotes_and_non_char_boundaries() {
1022            let parser = parser();
1023
1024            let unbalanced = parser.analyze(r#"ldap user "alice"#, r#"ldap user "alice"#.len());
1025            assert_eq!(unbalanced.parsed.full_tokens, vec!["ldap", "user", "alice"]);
1026            assert_eq!(
1027                unbalanced.parsed.cursor_tokens,
1028                vec!["ldap", "user", "alice"]
1029            );
1030            assert_eq!(unbalanced.cursor.quote_style, Some(QuoteStyle::Double));
1031            assert_eq!(unbalanced.cursor.token_stub, "alice");
1032
1033            let line = "ldap user å";
1034            let analyzed = parser.analyze(line, line.len() - 1);
1035            assert!(analyzed.parsed.safe_cursor < line.len());
1036            assert_eq!(analyzed.cursor.token_stub, "");
1037        }
1038    }
1039}