Skip to main content

osp_cli/completion/
parse.rs

1//! Shell-like tokenization and cursor analysis for completion.
2//!
3//! This module exists to turn a partially typed input line plus a cursor offset
4//! into the structured data the completion engine actually needs: command path,
5//! tail items, pipe mode, and the active replacement span.
6//!
7//! Contract:
8//!
9//! - parsing here stays permissive for interactive use
10//! - the parser owns lexical structure, not suggestion ranking
11//! - callers should rely on `ParsedCursorLine` and `CursorState` rather than
12//!   re-deriving cursor spans themselves
13
14use crate::completion::model::{CommandLine, CursorState, FlagOccurrence, ParsedLine, QuoteStyle};
15use std::collections::BTreeMap;
16
17/// Token value with byte offsets into the original input line.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct TokenSpan {
20    /// Unescaped token text.
21    pub value: String,
22    /// Inclusive start byte offset.
23    pub start: usize,
24    /// Exclusive end byte offset.
25    pub end: usize,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29enum LexState {
30    Normal,
31    SingleQuote,
32    DoubleQuote,
33    EscapeNormal,
34    EscapeDouble,
35}
36
37/// Parsed line assembly after tokenization.
38///
39/// The parser keeps the command head separate until it sees the first option-like
40/// token. After that point the rest of the line is interpreted as flags, args,
41/// or pipes. That mirrors how the completer reasons about scope: command path
42/// first, then option/value mode.
43#[derive(Debug, Default)]
44struct ParseState {
45    head: Vec<String>,
46    tail: Vec<crate::completion::model::TailItem>,
47    flag_values: BTreeMap<String, Vec<String>>,
48    pipes: Vec<String>,
49    has_pipe: bool,
50}
51
52impl ParseState {
53    fn finish(self) -> CommandLine {
54        CommandLine {
55            head: self.head,
56            tail: self.tail,
57            flag_values: self.flag_values,
58            pipes: self.pipes,
59            has_pipe: self.has_pipe,
60        }
61    }
62
63    fn start_pipe<'a>(&mut self, iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>) {
64        self.has_pipe = true;
65        self.pipes.extend(iter.cloned());
66    }
67
68    fn collect_positional_tail<'a>(
69        &mut self,
70        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
71    ) {
72        while let Some(next) = iter.next() {
73            if next == "|" {
74                self.start_pipe(iter);
75                break;
76            }
77            self.tail
78                .push(crate::completion::model::TailItem::Positional(next.clone()));
79        }
80    }
81
82    fn parse_flag_tail<'a>(
83        &mut self,
84        first_token: String,
85        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
86    ) {
87        // Once the parser has seen the first flag-like token, the rest of the
88        // line stays in "tail mode". From that point on we only distinguish
89        // between more flags, their values, `--`, and a pipe into DSL mode.
90        let mut current = first_token;
91        loop {
92            if current == "|" {
93                self.start_pipe(iter);
94                return;
95            }
96
97            if current == "--" {
98                self.collect_positional_tail(iter);
99                return;
100            }
101
102            if let Some((flag, value)) = split_inline_flag_value(&current) {
103                let mut occurrence_values = Vec::new();
104                if !value.is_empty() {
105                    self.flag_values
106                        .entry(flag.clone())
107                        .or_default()
108                        .push(value.clone());
109                    occurrence_values.push(value);
110                } else {
111                    self.flag_values.entry(flag.clone()).or_default();
112                }
113                self.tail
114                    .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
115                        name: flag.clone(),
116                        values: occurrence_values,
117                    }));
118                let Some(next) = iter.next().cloned() else {
119                    break;
120                };
121                current = next;
122                continue;
123            }
124
125            let flag = current;
126            let values = self.consume_flag_values(iter);
127            self.tail
128                .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
129                    name: flag.clone(),
130                    values: values.clone(),
131                }));
132            self.flag_values
133                .entry(flag.clone())
134                .or_default()
135                .extend(values);
136
137            let Some(next) = iter.next().cloned() else {
138                break;
139            };
140            current = next;
141        }
142    }
143
144    fn consume_flag_values<'a>(
145        &mut self,
146        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
147    ) -> Vec<String> {
148        let mut values = Vec::new();
149
150        while let Some(next) = iter.peek() {
151            if *next == "|" || *next == "--" {
152                break;
153            }
154            if looks_like_flag_start(next) {
155                break;
156            }
157
158            values.push((*next).clone());
159            iter.next();
160        }
161
162        values
163    }
164}
165
166/// Shell-like parser used by the completion engine.
167#[derive(Debug, Clone, Default)]
168pub struct CommandLineParser;
169
170/// Parsed command-line state for the full line and the cursor position.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct ParsedCursorLine {
173    /// Parsed tokens and command structure.
174    pub parsed: ParsedLine,
175    /// Cursor-local replacement information.
176    pub cursor: CursorState,
177}
178
179#[derive(Debug, Clone)]
180struct CursorTokenization {
181    full_tokens: Vec<String>,
182    cursor_tokens: Vec<String>,
183    cursor_quote_style: Option<QuoteStyle>,
184}
185
186impl CommandLineParser {
187    /// Tokenizes a line using shell-like quoting rules.
188    ///
189    /// Tokenization is intentionally permissive for interactive use. If the
190    /// user is mid-quote while pressing tab, we retry with a synthetic closing
191    /// quote before finally falling back to whitespace splitting.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use osp_cli::completion::CommandLineParser;
197    ///
198    /// let parser = CommandLineParser;
199    ///
200    /// assert_eq!(
201    ///     parser.tokenize(r#"ldap user "alice smith""#),
202    ///     vec!["ldap", "user", "alice smith"]
203    /// );
204    /// ```
205    pub fn tokenize(&self, line: &str) -> Vec<String> {
206        self.tokenize_inner(line)
207            .or_else(|| self.tokenize_inner(&format!("{line}\"")))
208            .or_else(|| self.tokenize_inner(&format!("{line}'")))
209            .unwrap_or_else(|| line.split_whitespace().map(str::to_string).collect())
210    }
211
212    /// Tokenizes `line` and preserves byte spans for each token when possible.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use osp_cli::completion::CommandLineParser;
218    ///
219    /// let spans = CommandLineParser.tokenize_with_spans("ldap user alice");
220    ///
221    /// assert_eq!(spans[0].value, "ldap");
222    /// assert_eq!(spans[1].start, 5);
223    /// assert_eq!(spans[2].end, 15);
224    /// ```
225    pub fn tokenize_with_spans(&self, line: &str) -> Vec<TokenSpan> {
226        self.tokenize_with_spans_inner(line)
227            .or_else(|| self.tokenize_with_spans_fallback(line))
228            .unwrap_or_default()
229    }
230
231    /// Parse the full line and the cursor-local prefix from one lexical walk.
232    ///
233    /// The common case keeps completion analysis in one tokenization pass. If
234    /// the line ends in an unmatched quote we fall back to the permissive
235    /// tokenization path so interactive behavior stays unchanged.
236    ///
237    /// `cursor` is clamped to the input length and to a valid UTF-8 character
238    /// boundary before the parser slices the line.
239    ///
240    /// # Examples
241    ///
242    /// ```
243    /// use osp_cli::completion::CommandLineParser;
244    ///
245    /// let parsed = CommandLineParser.analyze("ldap user ali", 13);
246    ///
247    /// assert_eq!(parsed.parsed.cursor_tokens, vec!["ldap", "user", "ali"]);
248    /// assert_eq!(parsed.cursor.token_stub, "ali");
249    /// ```
250    pub fn analyze(&self, line: &str, cursor: usize) -> ParsedCursorLine {
251        let safe_cursor = clamp_to_char_boundary(line, cursor.min(line.len()));
252        let before_cursor = &line[..safe_cursor];
253
254        if let Some(tokenized) = self.tokenize_with_cursor_inner(line, safe_cursor) {
255            let full_cmd = self.parse(&tokenized.full_tokens);
256            let cursor_cmd = self.parse(&tokenized.cursor_tokens);
257            let cursor = self.build_cursor_state(
258                before_cursor,
259                safe_cursor,
260                &tokenized.cursor_tokens,
261                tokenized.cursor_quote_style,
262            );
263
264            return ParsedCursorLine {
265                parsed: ParsedLine {
266                    safe_cursor,
267                    full_tokens: tokenized.full_tokens,
268                    cursor_tokens: tokenized.cursor_tokens,
269                    full_cmd,
270                    cursor_cmd,
271                },
272                cursor,
273            };
274        }
275
276        let full_tokens = self.tokenize(line);
277        let cursor_tokens = self.tokenize(before_cursor);
278        let full_cmd = self.parse(&full_tokens);
279        let cursor_cmd = self.parse(&cursor_tokens);
280        let cursor = self.cursor_state(before_cursor, safe_cursor);
281
282        ParsedCursorLine {
283            parsed: ParsedLine {
284                safe_cursor,
285                full_tokens,
286                cursor_tokens,
287                full_cmd,
288                cursor_cmd,
289            },
290            cursor,
291        }
292    }
293
294    fn tokenize_inner(&self, line: &str) -> Option<Vec<String>> {
295        let mut out = Vec::new();
296        let mut state = LexState::Normal;
297        let mut current = String::new();
298
299        for ch in line.chars() {
300            match state {
301                LexState::Normal => {
302                    if ch.is_whitespace() {
303                        push_current(&mut out, &mut current);
304                    } else {
305                        match ch {
306                            '|' => {
307                                push_current(&mut out, &mut current);
308                                out.push("|".to_string());
309                            }
310                            '\\' => state = LexState::EscapeNormal,
311                            '\'' => state = LexState::SingleQuote,
312                            '"' => state = LexState::DoubleQuote,
313                            _ => current.push(ch),
314                        }
315                    }
316                }
317                LexState::SingleQuote => {
318                    if ch == '\'' {
319                        state = LexState::Normal;
320                    } else {
321                        current.push(ch);
322                    }
323                }
324                LexState::DoubleQuote => match ch {
325                    '"' => state = LexState::Normal,
326                    '\\' => state = LexState::EscapeDouble,
327                    _ => current.push(ch),
328                },
329                LexState::EscapeNormal => {
330                    current.push(ch);
331                    state = LexState::Normal;
332                }
333                LexState::EscapeDouble => {
334                    current.push(ch);
335                    state = LexState::DoubleQuote;
336                }
337            }
338        }
339
340        match state {
341            LexState::Normal => {
342                push_current(&mut out, &mut current);
343                Some(out)
344            }
345            _ => None,
346        }
347    }
348
349    fn tokenize_with_spans_inner(&self, line: &str) -> Option<Vec<TokenSpan>> {
350        let mut out = Vec::new();
351        let mut state = LexState::Normal;
352        let mut current = String::new();
353        let mut current_start = None;
354
355        for (idx, ch) in line.char_indices() {
356            match state {
357                LexState::Normal => {
358                    if ch.is_whitespace() {
359                        push_current_span(&mut out, &mut current, &mut current_start, idx);
360                    } else {
361                        match ch {
362                            '|' => {
363                                push_current_span(&mut out, &mut current, &mut current_start, idx);
364                                out.push(TokenSpan {
365                                    value: "|".to_string(),
366                                    start: idx,
367                                    end: idx + ch.len_utf8(),
368                                });
369                            }
370                            '\\' => {
371                                current_start.get_or_insert(idx);
372                                state = LexState::EscapeNormal;
373                            }
374                            '\'' => {
375                                current_start.get_or_insert(idx);
376                                state = LexState::SingleQuote;
377                            }
378                            '"' => {
379                                current_start.get_or_insert(idx);
380                                state = LexState::DoubleQuote;
381                            }
382                            _ => {
383                                current_start.get_or_insert(idx);
384                                current.push(ch);
385                            }
386                        }
387                    }
388                }
389                LexState::SingleQuote => {
390                    if ch == '\'' {
391                        state = LexState::Normal;
392                    } else {
393                        current.push(ch);
394                    }
395                }
396                LexState::DoubleQuote => match ch {
397                    '"' => state = LexState::Normal,
398                    '\\' => state = LexState::EscapeDouble,
399                    _ => current.push(ch),
400                },
401                LexState::EscapeNormal => {
402                    current.push(ch);
403                    state = LexState::Normal;
404                }
405                LexState::EscapeDouble => {
406                    current.push(ch);
407                    state = LexState::DoubleQuote;
408                }
409            }
410        }
411
412        match state {
413            LexState::Normal => {
414                push_current_span(&mut out, &mut current, &mut current_start, line.len());
415                Some(out)
416            }
417            _ => None,
418        }
419    }
420
421    fn tokenize_with_spans_fallback(&self, line: &str) -> Option<Vec<TokenSpan>> {
422        let mut out = Vec::new();
423        let mut search_from = 0usize;
424        for token in line.split_whitespace() {
425            let rel = line.get(search_from..)?.find(token)?;
426            let start = search_from + rel;
427            let end = start + token.len();
428            out.push(TokenSpan {
429                value: token.to_string(),
430                start,
431                end,
432            });
433            search_from = end;
434        }
435        Some(out)
436    }
437
438    /// Parses tokens into command-path, flag, positional, and pipe segments.
439    ///
440    /// # Examples
441    ///
442    /// ```
443    /// use osp_cli::completion::CommandLineParser;
444    ///
445    /// let tokens = vec![
446    ///     "ldap".to_string(),
447    ///     "user".to_string(),
448    ///     "--json".to_string(),
449    ///     "|".to_string(),
450    ///     "P".to_string(),
451    /// ];
452    /// let parsed = CommandLineParser.parse(&tokens);
453    ///
454    /// assert_eq!(parsed.head(), &["ldap".to_string(), "user".to_string()]);
455    /// assert!(parsed.has_pipe());
456    /// ```
457    pub fn parse(&self, tokens: &[String]) -> CommandLine {
458        let mut state = ParseState::default();
459        let mut iter = tokens.iter().peekable();
460
461        while let Some(token) = iter.next() {
462            if token == "|" {
463                state.start_pipe(&mut iter);
464                return state.finish();
465            }
466            if token == "--" {
467                state.collect_positional_tail(&mut iter);
468                return state.finish();
469            }
470            if token.starts_with('-') {
471                state.parse_flag_tail(token.clone(), &mut iter);
472                return state.finish();
473            }
474            state.head.push(token.clone());
475        }
476
477        state.finish()
478    }
479
480    /// Computes the cursor replacement range and current token stub.
481    ///
482    /// # Examples
483    ///
484    /// ```
485    /// use osp_cli::completion::CommandLineParser;
486    ///
487    /// let cursor = CommandLineParser.cursor_state("ldap user ali", 13);
488    ///
489    /// assert_eq!(cursor.token_stub, "ali");
490    /// assert_eq!(cursor.replace_range, 10..13);
491    /// ```
492    pub fn cursor_state(&self, text_before_cursor: &str, safe_cursor: usize) -> CursorState {
493        let tokens = self.tokenize(text_before_cursor);
494        self.build_cursor_state(
495            text_before_cursor,
496            safe_cursor,
497            &tokens,
498            self.compute_stub_quote(text_before_cursor),
499        )
500    }
501
502    fn build_cursor_state(
503        &self,
504        text_before_cursor: &str,
505        safe_cursor: usize,
506        tokens: &[String],
507        quote_style: Option<QuoteStyle>,
508    ) -> CursorState {
509        let token_stub = self.compute_stub(text_before_cursor, tokens);
510        let replace_start = token_replace_start(text_before_cursor, safe_cursor, quote_style);
511        let raw_stub = text_before_cursor
512            .get(replace_start..safe_cursor)
513            .unwrap_or("")
514            .to_string();
515
516        CursorState::new(
517            token_stub,
518            raw_stub,
519            replace_start..safe_cursor,
520            quote_style,
521        )
522    }
523
524    fn tokenize_with_cursor_inner(
525        &self,
526        line: &str,
527        safe_cursor: usize,
528    ) -> Option<CursorTokenization> {
529        let mut out = Vec::new();
530        let mut state = LexState::Normal;
531        let mut current = String::new();
532        let mut cursor_tokens = None;
533        let mut cursor_quote_style = None;
534
535        for (idx, ch) in line.char_indices() {
536            if idx == safe_cursor && cursor_tokens.is_none() {
537                cursor_tokens = Some(snapshot_tokens(&out, &current));
538                cursor_quote_style = Some(quote_style_for_state(state));
539            }
540
541            match state {
542                LexState::Normal => {
543                    if ch.is_whitespace() {
544                        push_current(&mut out, &mut current);
545                    } else {
546                        match ch {
547                            '|' => {
548                                push_current(&mut out, &mut current);
549                                out.push("|".to_string());
550                            }
551                            '\\' => state = LexState::EscapeNormal,
552                            '\'' => state = LexState::SingleQuote,
553                            '"' => state = LexState::DoubleQuote,
554                            _ => current.push(ch),
555                        }
556                    }
557                }
558                LexState::SingleQuote => {
559                    if ch == '\'' {
560                        state = LexState::Normal;
561                    } else {
562                        current.push(ch);
563                    }
564                }
565                LexState::DoubleQuote => match ch {
566                    '"' => state = LexState::Normal,
567                    '\\' => state = LexState::EscapeDouble,
568                    _ => current.push(ch),
569                },
570                LexState::EscapeNormal => {
571                    current.push(ch);
572                    state = LexState::Normal;
573                }
574                LexState::EscapeDouble => {
575                    current.push(ch);
576                    state = LexState::DoubleQuote;
577                }
578            }
579        }
580
581        if safe_cursor == line.len() && cursor_tokens.is_none() {
582            cursor_tokens = Some(snapshot_tokens(&out, &current));
583            cursor_quote_style = Some(quote_style_for_state(state));
584        }
585
586        match state {
587            LexState::Normal => {
588                push_current(&mut out, &mut current);
589                Some(CursorTokenization {
590                    full_tokens: out,
591                    cursor_tokens: cursor_tokens.unwrap_or_default(),
592                    cursor_quote_style: cursor_quote_style.unwrap_or(None),
593                })
594            }
595            _ => None,
596        }
597    }
598
599    fn compute_stub(&self, text_before_cursor: &str, tokens: &[String]) -> String {
600        if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
601            return String::new();
602        }
603        let Some(last) = tokens.last() else {
604            return String::new();
605        };
606
607        if last.starts_with("--") && last.ends_with('=') && last.contains('=') {
608            return String::new();
609        }
610
611        last.clone()
612    }
613
614    /// Returns the active quote style for the token being edited, if any.
615    ///
616    /// # Examples
617    ///
618    /// ```
619    /// use osp_cli::completion::{CommandLineParser, QuoteStyle};
620    ///
621    /// assert_eq!(
622    ///     CommandLineParser.compute_stub_quote(r#"ldap user "ali"#),
623    ///     Some(QuoteStyle::Double)
624    /// );
625    /// ```
626    pub fn compute_stub_quote(&self, text_before_cursor: &str) -> Option<QuoteStyle> {
627        current_quote_state(text_before_cursor)
628    }
629}
630
631fn snapshot_tokens(out: &[String], current: &str) -> Vec<String> {
632    let mut tokens = out.to_vec();
633    if !current.is_empty() {
634        tokens.push(current.to_string());
635    }
636    tokens
637}
638
639fn clamp_to_char_boundary(input: &str, cursor: usize) -> usize {
640    if input.is_char_boundary(cursor) {
641        return cursor;
642    }
643    let mut safe = cursor;
644    while safe > 0 && !input.is_char_boundary(safe) {
645        safe -= 1;
646    }
647    safe
648}
649
650fn quote_style_for_state(state: LexState) -> Option<QuoteStyle> {
651    match state {
652        LexState::SingleQuote => Some(QuoteStyle::Single),
653        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
654        LexState::Normal | LexState::EscapeNormal => None,
655    }
656}
657
658fn split_inline_flag_value(token: &str) -> Option<(String, String)> {
659    if !token.starts_with("--") || !token.contains('=') {
660        return None;
661    }
662
663    let mut split = token.splitn(2, '=');
664    let flag = split.next().unwrap_or_default().to_string();
665    let value = split.next().unwrap_or_default().to_string();
666    Some((flag, value))
667}
668
669fn push_current(out: &mut Vec<String>, current: &mut String) {
670    if !current.is_empty() {
671        out.push(std::mem::take(current));
672    }
673}
674
675fn push_current_span(
676    out: &mut Vec<TokenSpan>,
677    current: &mut String,
678    current_start: &mut Option<usize>,
679    end: usize,
680) {
681    if !current.is_empty() {
682        out.push(TokenSpan {
683            value: std::mem::take(current),
684            start: current_start.take().unwrap_or(end),
685            end,
686        });
687    } else {
688        *current_start = None;
689    }
690}
691
692fn looks_like_flag_start(token: &str) -> bool {
693    token.starts_with('-') && token != "-" && !is_number(token)
694}
695
696fn is_number(text: &str) -> bool {
697    text.parse::<f64>().is_ok()
698}
699
700fn current_quote_state(text: &str) -> Option<QuoteStyle> {
701    let mut state = LexState::Normal;
702
703    for ch in text.chars() {
704        match state {
705            LexState::Normal => match ch {
706                '\\' => state = LexState::EscapeNormal,
707                '\'' => state = LexState::SingleQuote,
708                '"' => state = LexState::DoubleQuote,
709                _ => {}
710            },
711            LexState::SingleQuote => {
712                if ch == '\'' {
713                    state = LexState::Normal;
714                }
715            }
716            LexState::DoubleQuote => match ch {
717                '"' => state = LexState::Normal,
718                '\\' => state = LexState::EscapeDouble,
719                _ => {}
720            },
721            LexState::EscapeNormal => state = LexState::Normal,
722            LexState::EscapeDouble => state = LexState::DoubleQuote,
723        }
724    }
725
726    match state {
727        LexState::SingleQuote => Some(QuoteStyle::Single),
728        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
729        LexState::Normal | LexState::EscapeNormal => None,
730    }
731}
732
733fn token_replace_start(
734    text_before_cursor: &str,
735    safe_cursor: usize,
736    quote_style: Option<QuoteStyle>,
737) -> usize {
738    if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
739        return safe_cursor;
740    }
741
742    let mut state = LexState::Normal;
743    let mut token_start = 0usize;
744    let mut token_active = false;
745    let mut quote_start = None;
746
747    for (idx, ch) in text_before_cursor.char_indices() {
748        match state {
749            LexState::Normal => {
750                if ch.is_whitespace() {
751                    token_active = false;
752                    token_start = idx + ch.len_utf8();
753                    quote_start = None;
754                    continue;
755                }
756
757                if !token_active {
758                    token_active = true;
759                    token_start = idx;
760                }
761
762                match ch {
763                    '\'' => {
764                        quote_start = Some(idx + ch.len_utf8());
765                        state = LexState::SingleQuote;
766                    }
767                    '"' => {
768                        quote_start = Some(idx + ch.len_utf8());
769                        state = LexState::DoubleQuote;
770                    }
771                    '\\' => state = LexState::EscapeNormal,
772                    _ => {}
773                }
774            }
775            LexState::SingleQuote => {
776                if ch == '\'' {
777                    state = LexState::Normal;
778                }
779            }
780            LexState::DoubleQuote => match ch {
781                '"' => state = LexState::Normal,
782                '\\' => state = LexState::EscapeDouble,
783                _ => {}
784            },
785            LexState::EscapeNormal => state = LexState::Normal,
786            LexState::EscapeDouble => state = LexState::DoubleQuote,
787        }
788    }
789
790    match quote_style {
791        Some(_) => quote_start.unwrap_or(token_start),
792        None => token_start,
793    }
794}
795
796#[cfg(test)]
797mod tests {
798    use crate::completion::model::{FlagOccurrence, QuoteStyle};
799
800    use super::CommandLineParser;
801
802    fn parser() -> CommandLineParser {
803        CommandLineParser
804    }
805
806    mod scanner_contracts {
807        use super::*;
808
809        #[test]
810        fn scanner_preserves_token_values_offsets_and_unmatched_quote_recovery() {
811            let parser = parser();
812
813            assert_eq!(
814                parser.tokenize("orch provision --request 'name=a|b' | F name"),
815                vec![
816                    "orch",
817                    "provision",
818                    "--request",
819                    "name=a|b",
820                    "|",
821                    "F",
822                    "name",
823                ]
824            );
825            assert_eq!(parser.tokenize("--os 'alma"), vec!["--os", "alma"]);
826
827            let spans = parser.tokenize_with_spans("cmd --name 'alice");
828            assert_eq!(spans.len(), 3);
829            assert_eq!(spans[0].value, "cmd");
830            assert_eq!(spans[1].value, "--name");
831            assert_eq!(spans[2].value, "'alice");
832            let source = r#"ldap user "alice smith" | P uid"#;
833            let spans = parser.tokenize_with_spans(source);
834
835            assert_eq!(spans[0].value, "ldap");
836            assert_eq!(spans[0].start, 0);
837            assert_eq!(spans[2].value, "alice smith");
838            assert_eq!(&source[spans[2].start..spans[2].end], "\"alice smith\"");
839            assert_eq!(spans[3].value, "|");
840        }
841    }
842
843    mod command_shape_contracts {
844        use super::*;
845
846        #[test]
847        fn parse_tracks_flag_values_pipes_and_repeated_occurrence_boundaries() {
848            let parser = parser();
849
850            let tokens = parser.tokenize("orch provision --provider vmware --os rhel | F name");
851            let cmd = parser.parse(&tokens);
852            assert_eq!(cmd.head(), ["orch".to_string(), "provision".to_string()]);
853            assert_eq!(
854                cmd.flag_values("--provider"),
855                Some(&["vmware".to_string()][..])
856            );
857            assert_eq!(cmd.flag_values("--os"), Some(&["rhel".to_string()][..]));
858            assert!(cmd.has_pipe());
859            assert_eq!(cmd.pipes(), ["F".to_string(), "name".to_string()]);
860
861            let repeated = parser.parse(&parser.tokenize("cmd --tag red --mode fast --tag blue"));
862            assert_eq!(
863                repeated.flag_occurrences().cloned().collect::<Vec<_>>(),
864                vec![
865                    FlagOccurrence {
866                        name: "--tag".to_string(),
867                        values: vec!["red".to_string()],
868                    },
869                    FlagOccurrence {
870                        name: "--mode".to_string(),
871                        values: vec!["fast".to_string()],
872                    },
873                    FlagOccurrence {
874                        name: "--tag".to_string(),
875                        values: vec!["blue".to_string()],
876                    },
877                ]
878            );
879        }
880
881        #[test]
882        fn parse_respects_option_boundaries_inline_values_and_negative_numbers() {
883            let parser = parser();
884
885            let after_double_dash = parser.parse(&parser.tokenize("cmd -- --not-a-flag"));
886            assert_eq!(after_double_dash.head(), ["cmd".to_string()]);
887            assert_eq!(
888                after_double_dash
889                    .positional_args()
890                    .cloned()
891                    .collect::<Vec<_>>(),
892                vec!["--not-a-flag".to_string()]
893            );
894
895            let negative_value = parser.parse(&parser.tokenize("cmd --count -5"));
896            assert_eq!(
897                negative_value.flag_values("--count"),
898                Some(&["-5".to_string()][..])
899            );
900
901            let inline = parser.parse(&parser.tokenize("cmd --format=json --os= --format=table"));
902            assert_eq!(inline.flag_values("--os"), Some(&[][..]));
903            assert_eq!(
904                inline.flag_occurrences().cloned().collect::<Vec<_>>(),
905                vec![
906                    FlagOccurrence {
907                        name: "--format".to_string(),
908                        values: vec!["json".to_string()],
909                    },
910                    FlagOccurrence {
911                        name: "--os".to_string(),
912                        values: vec![],
913                    },
914                    FlagOccurrence {
915                        name: "--format".to_string(),
916                        values: vec!["table".to_string()],
917                    },
918                ]
919            );
920        }
921
922        #[test]
923        fn parse_distinguishes_tail_mode_from_dsl_boundaries() {
924            let parser = parser();
925
926            let tail =
927                parser.parse(&parser.tokenize("ldap user --provider vmware region eu-central"));
928            assert_eq!(tail.head(), ["ldap".to_string(), "user".to_string()]);
929            assert_eq!(
930                tail.flag_values("--provider"),
931                Some(
932                    &[
933                        "vmware".to_string(),
934                        "region".to_string(),
935                        "eu-central".to_string(),
936                    ][..]
937                )
938            );
939
940            let dsl = parser.parse(&parser.tokenize("cmd -- literal | F name"));
941            assert_eq!(dsl.head(), ["cmd".to_string()]);
942            assert_eq!(
943                dsl.positional_args().cloned().collect::<Vec<_>>(),
944                vec!["literal".to_string()]
945            );
946            assert!(dsl.has_pipe());
947            assert_eq!(dsl.pipes(), ["F".to_string(), "name".to_string()]);
948        }
949    }
950
951    mod cursor_analysis_contracts {
952        use super::*;
953
954        #[test]
955        fn cursor_state_tracks_equals_boundaries_and_open_quote_ranges() {
956            let parser = parser();
957
958            let cursor = parser.cursor_state("cmd --flag=", "cmd --flag=".len());
959            assert_eq!(cursor.token_stub, "");
960
961            assert_eq!(
962                parser.compute_stub_quote("cmd --name \"al"),
963                Some(QuoteStyle::Double)
964            );
965            assert_eq!(
966                parser.compute_stub_quote("cmd --name 'al"),
967                Some(QuoteStyle::Single)
968            );
969            assert_eq!(parser.compute_stub_quote("cmd --name al"), None);
970
971            let line = "ldap user \"oi";
972            let cursor = parser.cursor_state(line, line.len());
973            assert_eq!(cursor.token_stub, "oi");
974            assert_eq!(cursor.raw_stub, "oi");
975            assert_eq!(cursor.replace_range, 11..13);
976            assert_eq!(cursor.quote_style, Some(QuoteStyle::Double));
977        }
978
979        #[test]
980        fn analyze_reuses_safe_cursor_snapshots_for_prefix_and_balanced_quotes() {
981            let parser = parser();
982
983            let line = "orch provision --provider vmware --os rhel | F name";
984            let cursor = "orch provision --provider vmware".len();
985            let analyzed = parser.analyze(line, cursor);
986            assert_eq!(
987                analyzed.parsed.full_tokens,
988                vec![
989                    "orch",
990                    "provision",
991                    "--provider",
992                    "vmware",
993                    "--os",
994                    "rhel",
995                    "|",
996                    "F",
997                    "name",
998                ]
999            );
1000            assert_eq!(
1001                analyzed.parsed.cursor_tokens,
1002                vec!["orch", "provision", "--provider", "vmware"]
1003            );
1004            assert_eq!(
1005                analyzed.parsed.cursor_cmd.flag_values("--provider"),
1006                Some(&["vmware".to_string()][..])
1007            );
1008
1009            let balanced = parser.analyze(
1010                r#"ldap user "oi ste" --format json"#,
1011                r#"ldap user "oi"#.len(),
1012            );
1013            assert_eq!(balanced.cursor.token_stub, "oi");
1014            assert_eq!(balanced.cursor.raw_stub, "oi");
1015            assert_eq!(balanced.cursor.quote_style, Some(QuoteStyle::Double));
1016        }
1017
1018        #[test]
1019        fn analyze_recovers_from_unbalanced_quotes_and_non_char_boundaries() {
1020            let parser = parser();
1021
1022            let unbalanced = parser.analyze(r#"ldap user "alice"#, r#"ldap user "alice"#.len());
1023            assert_eq!(unbalanced.parsed.full_tokens, vec!["ldap", "user", "alice"]);
1024            assert_eq!(
1025                unbalanced.parsed.cursor_tokens,
1026                vec!["ldap", "user", "alice"]
1027            );
1028            assert_eq!(unbalanced.cursor.quote_style, Some(QuoteStyle::Double));
1029            assert_eq!(unbalanced.cursor.token_stub, "alice");
1030
1031            let line = "ldap user å";
1032            let analyzed = parser.analyze(line, line.len() - 1);
1033            assert!(analyzed.parsed.safe_cursor < line.len());
1034            assert_eq!(analyzed.cursor.token_stub, "");
1035        }
1036    }
1037}