Skip to main content

osp_cli/completion/
parse.rs

1//! Shell-like tokenization and cursor analysis for completion.
2//!
3//! This module exists to turn a partially typed input line plus a cursor offset
4//! into the structured data the completion engine actually needs: command path,
5//! tail items, pipe mode, and the active replacement span.
6//!
7//! Contract:
8//!
9//! - parsing here stays permissive for interactive use
10//! - the parser owns lexical structure, not suggestion ranking
11//! - callers should rely on `ParsedCursorLine` and `CursorState` rather than
12//!   re-deriving cursor spans themselves
13
14use crate::completion::model::{CommandLine, CursorState, FlagOccurrence, ParsedLine, QuoteStyle};
15use std::collections::BTreeMap;
16
17/// Token value with byte offsets into the original input line.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct TokenSpan {
20    /// Unescaped token text.
21    pub value: String,
22    /// Inclusive start byte offset.
23    pub start: usize,
24    /// Exclusive end byte offset.
25    pub end: usize,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29enum LexState {
30    Normal,
31    SingleQuote,
32    DoubleQuote,
33    EscapeNormal,
34    EscapeDouble,
35}
36
37/// Parsed line assembly after tokenization.
38///
39/// The parser keeps the command head separate until it sees the first option-like
40/// token. After that point the rest of the line is interpreted as flags, args,
41/// or pipes. That mirrors how the completer reasons about scope: command path
42/// first, then option/value mode.
43#[derive(Debug, Default)]
44struct ParseState {
45    head: Vec<String>,
46    tail: Vec<crate::completion::model::TailItem>,
47    flag_values: BTreeMap<String, Vec<String>>,
48    pipes: Vec<String>,
49    has_pipe: bool,
50}
51
52impl ParseState {
53    fn finish(self) -> CommandLine {
54        CommandLine {
55            head: self.head,
56            tail: self.tail,
57            flag_values: self.flag_values,
58            pipes: self.pipes,
59            has_pipe: self.has_pipe,
60        }
61    }
62
63    fn start_pipe<'a>(&mut self, iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>) {
64        self.has_pipe = true;
65        self.pipes.extend(iter.cloned());
66    }
67
68    fn collect_positional_tail<'a>(
69        &mut self,
70        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
71    ) {
72        while let Some(next) = iter.next() {
73            if next == "|" {
74                self.start_pipe(iter);
75                break;
76            }
77            self.tail
78                .push(crate::completion::model::TailItem::Positional(next.clone()));
79        }
80    }
81
82    fn parse_flag_tail<'a>(
83        &mut self,
84        first_token: String,
85        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
86    ) {
87        // Once the parser has seen the first flag-like token, the rest of the
88        // line stays in "tail mode". From that point on we only distinguish
89        // between more flags, their values, `--`, and a pipe into DSL mode.
90        let mut current = first_token;
91        loop {
92            if current == "|" {
93                self.start_pipe(iter);
94                return;
95            }
96
97            if current == "--" {
98                self.collect_positional_tail(iter);
99                return;
100            }
101
102            if let Some((flag, value)) = split_inline_flag_value(&current) {
103                let mut occurrence_values = Vec::new();
104                if !value.is_empty() {
105                    self.flag_values
106                        .entry(flag.clone())
107                        .or_default()
108                        .push(value.clone());
109                    occurrence_values.push(value);
110                } else {
111                    self.flag_values.entry(flag.clone()).or_default();
112                }
113                self.tail
114                    .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
115                        name: flag.clone(),
116                        values: occurrence_values,
117                    }));
118                let Some(next) = iter.next().cloned() else {
119                    break;
120                };
121                current = next;
122                continue;
123            }
124
125            let flag = current;
126            let values = self.consume_flag_values(iter);
127            self.tail
128                .push(crate::completion::model::TailItem::Flag(FlagOccurrence {
129                    name: flag.clone(),
130                    values: values.clone(),
131                }));
132            self.flag_values
133                .entry(flag.clone())
134                .or_default()
135                .extend(values);
136
137            let Some(next) = iter.next().cloned() else {
138                break;
139            };
140            current = next;
141        }
142    }
143
144    fn consume_flag_values<'a>(
145        &mut self,
146        iter: &mut std::iter::Peekable<std::slice::Iter<'a, String>>,
147    ) -> Vec<String> {
148        let mut values = Vec::new();
149
150        while let Some(next) = iter.peek() {
151            if *next == "|" || *next == "--" {
152                break;
153            }
154            if looks_like_flag_start(next) {
155                break;
156            }
157
158            values.push((*next).clone());
159            iter.next();
160        }
161
162        values
163    }
164}
165
166/// Shell-like parser used by the completion engine.
167#[derive(Debug, Clone, Default)]
168pub struct CommandLineParser;
169
170/// Parsed command-line state for the full line and the cursor position.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct ParsedCursorLine {
173    /// Parsed tokens and command structure.
174    pub parsed: ParsedLine,
175    /// Cursor-local replacement information.
176    pub cursor: CursorState,
177}
178
179#[derive(Debug, Clone)]
180struct CursorTokenization {
181    full_tokens: Vec<String>,
182    cursor_tokens: Vec<String>,
183    cursor_quote_style: Option<QuoteStyle>,
184}
185
186impl CommandLineParser {
187    /// Tokenizes a line using shell-like quoting rules.
188    ///
189    /// Tokenization is intentionally permissive for interactive use. If the
190    /// user is mid-quote while pressing tab, we retry with a synthetic closing
191    /// quote before finally falling back to whitespace splitting.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use osp_cli::completion::CommandLineParser;
197    ///
198    /// let parser = CommandLineParser;
199    ///
200    /// assert_eq!(
201    ///     parser.tokenize(r#"ldap user "alice smith""#),
202    ///     vec!["ldap", "user", "alice smith"]
203    /// );
204    /// ```
205    pub fn tokenize(&self, line: &str) -> Vec<String> {
206        self.tokenize_inner(line)
207            .or_else(|| self.tokenize_inner(&format!("{line}\"")))
208            .or_else(|| self.tokenize_inner(&format!("{line}'")))
209            .unwrap_or_else(|| line.split_whitespace().map(str::to_string).collect())
210    }
211
212    /// Tokenizes `line` and preserves byte spans for each token when possible.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use osp_cli::completion::CommandLineParser;
218    ///
219    /// let spans = CommandLineParser.tokenize_with_spans("ldap user alice");
220    ///
221    /// assert_eq!(spans[0].value, "ldap");
222    /// assert_eq!(spans[1].start, 5);
223    /// assert_eq!(spans[2].end, 15);
224    /// ```
225    pub fn tokenize_with_spans(&self, line: &str) -> Vec<TokenSpan> {
226        self.tokenize_with_spans_inner(line)
227            .or_else(|| self.tokenize_with_spans_fallback(line))
228            .unwrap_or_default()
229    }
230
231    /// Parse the full line and the cursor-local prefix from one lexical walk.
232    ///
233    /// The common case keeps completion analysis in one tokenization pass. If
234    /// the line ends in an unmatched quote we fall back to the permissive
235    /// tokenization path so interactive behavior stays unchanged.
236    ///
237    /// `cursor` is clamped to the input length and to a valid UTF-8 character
238    /// boundary before the parser slices the line.
239    ///
240    /// # Examples
241    ///
242    /// ```
243    /// use osp_cli::completion::CommandLineParser;
244    ///
245    /// let parsed = CommandLineParser.analyze("ldap user ali", 13);
246    ///
247    /// assert_eq!(parsed.parsed.cursor_tokens, vec!["ldap", "user", "ali"]);
248    /// assert_eq!(parsed.cursor.token_stub, "ali");
249    /// ```
250    pub fn analyze(&self, line: &str, cursor: usize) -> ParsedCursorLine {
251        let safe_cursor = clamp_to_char_boundary(line, cursor.min(line.len()));
252        let before_cursor = &line[..safe_cursor];
253        let lexical = self.lex_cursor_line(line, before_cursor, safe_cursor);
254        self.assemble_parsed_cursor_line(before_cursor, safe_cursor, lexical)
255    }
256
257    fn tokenize_inner(&self, line: &str) -> Option<Vec<String>> {
258        let mut out = Vec::new();
259        let mut state = LexState::Normal;
260        let mut current = String::new();
261
262        for ch in line.chars() {
263            match state {
264                LexState::Normal => {
265                    if ch.is_whitespace() {
266                        push_current(&mut out, &mut current);
267                    } else {
268                        match ch {
269                            '|' => {
270                                push_current(&mut out, &mut current);
271                                out.push("|".to_string());
272                            }
273                            '\\' => state = LexState::EscapeNormal,
274                            '\'' => state = LexState::SingleQuote,
275                            '"' => state = LexState::DoubleQuote,
276                            _ => current.push(ch),
277                        }
278                    }
279                }
280                LexState::SingleQuote => {
281                    if ch == '\'' {
282                        state = LexState::Normal;
283                    } else {
284                        current.push(ch);
285                    }
286                }
287                LexState::DoubleQuote => match ch {
288                    '"' => state = LexState::Normal,
289                    '\\' => state = LexState::EscapeDouble,
290                    _ => current.push(ch),
291                },
292                LexState::EscapeNormal => {
293                    current.push(ch);
294                    state = LexState::Normal;
295                }
296                LexState::EscapeDouble => {
297                    current.push(ch);
298                    state = LexState::DoubleQuote;
299                }
300            }
301        }
302
303        match state {
304            LexState::Normal => {
305                push_current(&mut out, &mut current);
306                Some(out)
307            }
308            _ => None,
309        }
310    }
311
312    fn tokenize_with_spans_inner(&self, line: &str) -> Option<Vec<TokenSpan>> {
313        let mut out = Vec::new();
314        let mut state = LexState::Normal;
315        let mut current = String::new();
316        let mut current_start = None;
317
318        for (idx, ch) in line.char_indices() {
319            match state {
320                LexState::Normal => {
321                    if ch.is_whitespace() {
322                        push_current_span(&mut out, &mut current, &mut current_start, idx);
323                    } else {
324                        match ch {
325                            '|' => {
326                                push_current_span(&mut out, &mut current, &mut current_start, idx);
327                                out.push(TokenSpan {
328                                    value: "|".to_string(),
329                                    start: idx,
330                                    end: idx + ch.len_utf8(),
331                                });
332                            }
333                            '\\' => {
334                                current_start.get_or_insert(idx);
335                                state = LexState::EscapeNormal;
336                            }
337                            '\'' => {
338                                current_start.get_or_insert(idx);
339                                state = LexState::SingleQuote;
340                            }
341                            '"' => {
342                                current_start.get_or_insert(idx);
343                                state = LexState::DoubleQuote;
344                            }
345                            _ => {
346                                current_start.get_or_insert(idx);
347                                current.push(ch);
348                            }
349                        }
350                    }
351                }
352                LexState::SingleQuote => {
353                    if ch == '\'' {
354                        state = LexState::Normal;
355                    } else {
356                        current.push(ch);
357                    }
358                }
359                LexState::DoubleQuote => match ch {
360                    '"' => state = LexState::Normal,
361                    '\\' => state = LexState::EscapeDouble,
362                    _ => current.push(ch),
363                },
364                LexState::EscapeNormal => {
365                    current.push(ch);
366                    state = LexState::Normal;
367                }
368                LexState::EscapeDouble => {
369                    current.push(ch);
370                    state = LexState::DoubleQuote;
371                }
372            }
373        }
374
375        match state {
376            LexState::Normal => {
377                push_current_span(&mut out, &mut current, &mut current_start, line.len());
378                Some(out)
379            }
380            _ => None,
381        }
382    }
383
384    fn tokenize_with_spans_fallback(&self, line: &str) -> Option<Vec<TokenSpan>> {
385        let mut out = Vec::new();
386        let mut search_from = 0usize;
387        for token in line.split_whitespace() {
388            let rel = line.get(search_from..)?.find(token)?;
389            let start = search_from + rel;
390            let end = start + token.len();
391            out.push(TokenSpan {
392                value: token.to_string(),
393                start,
394                end,
395            });
396            search_from = end;
397        }
398        Some(out)
399    }
400
401    /// Parses tokens into command-path, flag, positional, and pipe segments.
402    ///
403    /// # Examples
404    ///
405    /// ```
406    /// use osp_cli::completion::CommandLineParser;
407    ///
408    /// let tokens = vec![
409    ///     "ldap".to_string(),
410    ///     "user".to_string(),
411    ///     "--json".to_string(),
412    ///     "|".to_string(),
413    ///     "P".to_string(),
414    /// ];
415    /// let parsed = CommandLineParser.parse(&tokens);
416    ///
417    /// assert_eq!(parsed.head(), &["ldap".to_string(), "user".to_string()]);
418    /// assert!(parsed.has_pipe());
419    /// ```
420    pub fn parse(&self, tokens: &[String]) -> CommandLine {
421        let mut state = ParseState::default();
422        let mut iter = tokens.iter().peekable();
423
424        while let Some(token) = iter.next() {
425            if token == "|" {
426                state.start_pipe(&mut iter);
427                return state.finish();
428            }
429            if token == "--" {
430                state.collect_positional_tail(&mut iter);
431                return state.finish();
432            }
433            if token.starts_with('-') {
434                state.parse_flag_tail(token.clone(), &mut iter);
435                return state.finish();
436            }
437            state.head.push(token.clone());
438        }
439
440        state.finish()
441    }
442
443    /// Computes the cursor replacement range and current token stub.
444    ///
445    /// # Examples
446    ///
447    /// ```
448    /// use osp_cli::completion::CommandLineParser;
449    ///
450    /// let cursor = CommandLineParser.cursor_state("ldap user ali", 13);
451    ///
452    /// assert_eq!(cursor.token_stub, "ali");
453    /// assert_eq!(cursor.replace_range, 10..13);
454    /// ```
455    pub fn cursor_state(&self, text_before_cursor: &str, safe_cursor: usize) -> CursorState {
456        let tokens = self.tokenize(text_before_cursor);
457        self.build_cursor_state(
458            text_before_cursor,
459            safe_cursor,
460            &tokens,
461            self.compute_stub_quote(text_before_cursor),
462        )
463    }
464
465    fn build_cursor_state(
466        &self,
467        text_before_cursor: &str,
468        safe_cursor: usize,
469        tokens: &[String],
470        quote_style: Option<QuoteStyle>,
471    ) -> CursorState {
472        let token_stub = self.compute_stub(text_before_cursor, tokens);
473        let replace_start = token_replace_start(text_before_cursor, safe_cursor, quote_style);
474        let raw_stub = text_before_cursor
475            .get(replace_start..safe_cursor)
476            .unwrap_or("")
477            .to_string();
478
479        CursorState::new(
480            token_stub,
481            raw_stub,
482            replace_start..safe_cursor,
483            quote_style,
484        )
485    }
486
487    fn tokenize_with_cursor_inner(
488        &self,
489        line: &str,
490        safe_cursor: usize,
491    ) -> Option<CursorTokenization> {
492        let mut out = Vec::new();
493        let mut state = LexState::Normal;
494        let mut current = String::new();
495        let mut cursor_tokens = None;
496        let mut cursor_quote_style = None;
497
498        for (idx, ch) in line.char_indices() {
499            if idx == safe_cursor && cursor_tokens.is_none() {
500                cursor_tokens = Some(snapshot_tokens(&out, &current));
501                cursor_quote_style = Some(quote_style_for_state(state));
502            }
503
504            match state {
505                LexState::Normal => {
506                    if ch.is_whitespace() {
507                        push_current(&mut out, &mut current);
508                    } else {
509                        match ch {
510                            '|' => {
511                                push_current(&mut out, &mut current);
512                                out.push("|".to_string());
513                            }
514                            '\\' => state = LexState::EscapeNormal,
515                            '\'' => state = LexState::SingleQuote,
516                            '"' => state = LexState::DoubleQuote,
517                            _ => current.push(ch),
518                        }
519                    }
520                }
521                LexState::SingleQuote => {
522                    if ch == '\'' {
523                        state = LexState::Normal;
524                    } else {
525                        current.push(ch);
526                    }
527                }
528                LexState::DoubleQuote => match ch {
529                    '"' => state = LexState::Normal,
530                    '\\' => state = LexState::EscapeDouble,
531                    _ => current.push(ch),
532                },
533                LexState::EscapeNormal => {
534                    current.push(ch);
535                    state = LexState::Normal;
536                }
537                LexState::EscapeDouble => {
538                    current.push(ch);
539                    state = LexState::DoubleQuote;
540                }
541            }
542        }
543
544        if safe_cursor == line.len() && cursor_tokens.is_none() {
545            cursor_tokens = Some(snapshot_tokens(&out, &current));
546            cursor_quote_style = Some(quote_style_for_state(state));
547        }
548
549        match state {
550            LexState::Normal => {
551                push_current(&mut out, &mut current);
552                Some(CursorTokenization {
553                    full_tokens: out,
554                    cursor_tokens: cursor_tokens.unwrap_or_default(),
555                    cursor_quote_style: cursor_quote_style.unwrap_or(None),
556                })
557            }
558            _ => None,
559        }
560    }
561
562    fn compute_stub(&self, text_before_cursor: &str, tokens: &[String]) -> String {
563        if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
564            return String::new();
565        }
566        let Some(last) = tokens.last() else {
567            return String::new();
568        };
569
570        if last.starts_with("--") && last.ends_with('=') && last.contains('=') {
571            return String::new();
572        }
573
574        last.clone()
575    }
576
577    /// Returns the active quote style for the token being edited, if any.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// use osp_cli::completion::{CommandLineParser, QuoteStyle};
583    ///
584    /// assert_eq!(
585    ///     CommandLineParser.compute_stub_quote(r#"ldap user "ali"#),
586    ///     Some(QuoteStyle::Double)
587    /// );
588    /// ```
589    pub fn compute_stub_quote(&self, text_before_cursor: &str) -> Option<QuoteStyle> {
590        current_quote_state(text_before_cursor)
591    }
592
593    fn lex_cursor_line(
594        &self,
595        line: &str,
596        before_cursor: &str,
597        safe_cursor: usize,
598    ) -> CursorLexicalState {
599        match self.tokenize_with_cursor_inner(line, safe_cursor) {
600            Some(tokenized) => CursorLexicalState::Structured(tokenized),
601            None => CursorLexicalState::Fallback {
602                full_tokens: self.tokenize(line),
603                cursor_tokens: self.tokenize(before_cursor),
604            },
605        }
606    }
607
608    fn assemble_parsed_cursor_line(
609        &self,
610        before_cursor: &str,
611        safe_cursor: usize,
612        lexical: CursorLexicalState,
613    ) -> ParsedCursorLine {
614        match lexical {
615            CursorLexicalState::Structured(tokenized) => {
616                let full_cmd = self.parse(&tokenized.full_tokens);
617                let cursor_cmd = self.parse(&tokenized.cursor_tokens);
618                let cursor = self.build_cursor_state(
619                    before_cursor,
620                    safe_cursor,
621                    &tokenized.cursor_tokens,
622                    tokenized.cursor_quote_style,
623                );
624
625                ParsedCursorLine {
626                    parsed: ParsedLine {
627                        safe_cursor,
628                        full_tokens: tokenized.full_tokens,
629                        cursor_tokens: tokenized.cursor_tokens,
630                        full_cmd,
631                        cursor_cmd,
632                    },
633                    cursor,
634                }
635            }
636            CursorLexicalState::Fallback {
637                full_tokens,
638                cursor_tokens,
639            } => {
640                let full_cmd = self.parse(&full_tokens);
641                let cursor_cmd = self.parse(&cursor_tokens);
642                let cursor = self.cursor_state(before_cursor, safe_cursor);
643
644                ParsedCursorLine {
645                    parsed: ParsedLine {
646                        safe_cursor,
647                        full_tokens,
648                        cursor_tokens,
649                        full_cmd,
650                        cursor_cmd,
651                    },
652                    cursor,
653                }
654            }
655        }
656    }
657}
658
659enum CursorLexicalState {
660    Structured(CursorTokenization),
661    Fallback {
662        full_tokens: Vec<String>,
663        cursor_tokens: Vec<String>,
664    },
665}
666
667fn snapshot_tokens(out: &[String], current: &str) -> Vec<String> {
668    let mut tokens = out.to_vec();
669    if !current.is_empty() {
670        tokens.push(current.to_string());
671    }
672    tokens
673}
674
675fn clamp_to_char_boundary(input: &str, cursor: usize) -> usize {
676    if input.is_char_boundary(cursor) {
677        return cursor;
678    }
679    let mut safe = cursor;
680    while safe > 0 && !input.is_char_boundary(safe) {
681        safe -= 1;
682    }
683    safe
684}
685
686fn quote_style_for_state(state: LexState) -> Option<QuoteStyle> {
687    match state {
688        LexState::SingleQuote => Some(QuoteStyle::Single),
689        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
690        LexState::Normal | LexState::EscapeNormal => None,
691    }
692}
693
694fn split_inline_flag_value(token: &str) -> Option<(String, String)> {
695    if !token.starts_with("--") || !token.contains('=') {
696        return None;
697    }
698
699    let mut split = token.splitn(2, '=');
700    let flag = split.next().unwrap_or_default().to_string();
701    let value = split.next().unwrap_or_default().to_string();
702    Some((flag, value))
703}
704
705fn push_current(out: &mut Vec<String>, current: &mut String) {
706    if !current.is_empty() {
707        out.push(std::mem::take(current));
708    }
709}
710
711fn push_current_span(
712    out: &mut Vec<TokenSpan>,
713    current: &mut String,
714    current_start: &mut Option<usize>,
715    end: usize,
716) {
717    if !current.is_empty() {
718        out.push(TokenSpan {
719            value: std::mem::take(current),
720            start: current_start.take().unwrap_or(end),
721            end,
722        });
723    } else {
724        *current_start = None;
725    }
726}
727
728fn looks_like_flag_start(token: &str) -> bool {
729    token.starts_with('-') && token != "-" && !is_number(token)
730}
731
732fn is_number(text: &str) -> bool {
733    text.parse::<f64>().is_ok()
734}
735
736fn current_quote_state(text: &str) -> Option<QuoteStyle> {
737    let mut state = LexState::Normal;
738
739    for ch in text.chars() {
740        match state {
741            LexState::Normal => match ch {
742                '\\' => state = LexState::EscapeNormal,
743                '\'' => state = LexState::SingleQuote,
744                '"' => state = LexState::DoubleQuote,
745                _ => {}
746            },
747            LexState::SingleQuote => {
748                if ch == '\'' {
749                    state = LexState::Normal;
750                }
751            }
752            LexState::DoubleQuote => match ch {
753                '"' => state = LexState::Normal,
754                '\\' => state = LexState::EscapeDouble,
755                _ => {}
756            },
757            LexState::EscapeNormal => state = LexState::Normal,
758            LexState::EscapeDouble => state = LexState::DoubleQuote,
759        }
760    }
761
762    match state {
763        LexState::SingleQuote => Some(QuoteStyle::Single),
764        LexState::DoubleQuote | LexState::EscapeDouble => Some(QuoteStyle::Double),
765        LexState::Normal | LexState::EscapeNormal => None,
766    }
767}
768
769fn token_replace_start(
770    text_before_cursor: &str,
771    safe_cursor: usize,
772    quote_style: Option<QuoteStyle>,
773) -> usize {
774    if text_before_cursor.is_empty() || text_before_cursor.ends_with(' ') {
775        return safe_cursor;
776    }
777
778    let mut state = LexState::Normal;
779    let mut token_start = 0usize;
780    let mut token_active = false;
781    let mut quote_start = None;
782
783    for (idx, ch) in text_before_cursor.char_indices() {
784        match state {
785            LexState::Normal => {
786                if ch.is_whitespace() {
787                    token_active = false;
788                    token_start = idx + ch.len_utf8();
789                    quote_start = None;
790                    continue;
791                }
792
793                if !token_active {
794                    token_active = true;
795                    token_start = idx;
796                }
797
798                match ch {
799                    '\'' => {
800                        quote_start = Some(idx + ch.len_utf8());
801                        state = LexState::SingleQuote;
802                    }
803                    '"' => {
804                        quote_start = Some(idx + ch.len_utf8());
805                        state = LexState::DoubleQuote;
806                    }
807                    '\\' => state = LexState::EscapeNormal,
808                    _ => {}
809                }
810            }
811            LexState::SingleQuote => {
812                if ch == '\'' {
813                    state = LexState::Normal;
814                }
815            }
816            LexState::DoubleQuote => match ch {
817                '"' => state = LexState::Normal,
818                '\\' => state = LexState::EscapeDouble,
819                _ => {}
820            },
821            LexState::EscapeNormal => state = LexState::Normal,
822            LexState::EscapeDouble => state = LexState::DoubleQuote,
823        }
824    }
825
826    match quote_style {
827        Some(_) => quote_start.unwrap_or(token_start),
828        None => token_start,
829    }
830}
831
832#[cfg(test)]
833mod tests {
834    use crate::completion::model::{FlagOccurrence, QuoteStyle};
835
836    use super::CommandLineParser;
837
838    fn parser() -> CommandLineParser {
839        CommandLineParser
840    }
841
842    mod scanner_contracts {
843        use super::*;
844
845        #[test]
846        fn scanner_preserves_token_values_offsets_and_unmatched_quote_recovery() {
847            let parser = parser();
848
849            assert_eq!(
850                parser.tokenize("orch provision --request 'name=a|b' | F name"),
851                vec![
852                    "orch",
853                    "provision",
854                    "--request",
855                    "name=a|b",
856                    "|",
857                    "F",
858                    "name",
859                ]
860            );
861            assert_eq!(parser.tokenize("--os 'alma"), vec!["--os", "alma"]);
862
863            let spans = parser.tokenize_with_spans("cmd --name 'alice");
864            assert_eq!(spans.len(), 3);
865            assert_eq!(spans[0].value, "cmd");
866            assert_eq!(spans[1].value, "--name");
867            assert_eq!(spans[2].value, "'alice");
868            let source = r#"ldap user "alice smith" | P uid"#;
869            let spans = parser.tokenize_with_spans(source);
870
871            assert_eq!(spans[0].value, "ldap");
872            assert_eq!(spans[0].start, 0);
873            assert_eq!(spans[2].value, "alice smith");
874            assert_eq!(&source[spans[2].start..spans[2].end], "\"alice smith\"");
875            assert_eq!(spans[3].value, "|");
876        }
877    }
878
879    mod command_shape_contracts {
880        use super::*;
881
882        #[test]
883        fn parse_tracks_flag_values_pipes_and_repeated_occurrence_boundaries() {
884            let parser = parser();
885
886            let tokens = parser.tokenize("orch provision --provider vmware --os rhel | F name");
887            let cmd = parser.parse(&tokens);
888            assert_eq!(cmd.head(), ["orch".to_string(), "provision".to_string()]);
889            assert_eq!(
890                cmd.flag_values("--provider"),
891                Some(&["vmware".to_string()][..])
892            );
893            assert_eq!(cmd.flag_values("--os"), Some(&["rhel".to_string()][..]));
894            assert!(cmd.has_pipe());
895            assert_eq!(cmd.pipes(), ["F".to_string(), "name".to_string()]);
896
897            let repeated = parser.parse(&parser.tokenize("cmd --tag red --mode fast --tag blue"));
898            assert_eq!(
899                repeated.flag_occurrences().cloned().collect::<Vec<_>>(),
900                vec![
901                    FlagOccurrence {
902                        name: "--tag".to_string(),
903                        values: vec!["red".to_string()],
904                    },
905                    FlagOccurrence {
906                        name: "--mode".to_string(),
907                        values: vec!["fast".to_string()],
908                    },
909                    FlagOccurrence {
910                        name: "--tag".to_string(),
911                        values: vec!["blue".to_string()],
912                    },
913                ]
914            );
915        }
916
917        #[test]
918        fn parse_respects_option_boundaries_inline_values_and_negative_numbers() {
919            let parser = parser();
920
921            let after_double_dash = parser.parse(&parser.tokenize("cmd -- --not-a-flag"));
922            assert_eq!(after_double_dash.head(), ["cmd".to_string()]);
923            assert_eq!(
924                after_double_dash
925                    .positional_args()
926                    .cloned()
927                    .collect::<Vec<_>>(),
928                vec!["--not-a-flag".to_string()]
929            );
930
931            let negative_value = parser.parse(&parser.tokenize("cmd --count -5"));
932            assert_eq!(
933                negative_value.flag_values("--count"),
934                Some(&["-5".to_string()][..])
935            );
936
937            let inline = parser.parse(&parser.tokenize("cmd --format=json --os= --format=table"));
938            assert_eq!(inline.flag_values("--os"), Some(&[][..]));
939            assert_eq!(
940                inline.flag_occurrences().cloned().collect::<Vec<_>>(),
941                vec![
942                    FlagOccurrence {
943                        name: "--format".to_string(),
944                        values: vec!["json".to_string()],
945                    },
946                    FlagOccurrence {
947                        name: "--os".to_string(),
948                        values: vec![],
949                    },
950                    FlagOccurrence {
951                        name: "--format".to_string(),
952                        values: vec!["table".to_string()],
953                    },
954                ]
955            );
956        }
957
958        #[test]
959        fn parse_distinguishes_tail_mode_from_dsl_boundaries() {
960            let parser = parser();
961
962            let tail =
963                parser.parse(&parser.tokenize("ldap user --provider vmware region eu-central"));
964            assert_eq!(tail.head(), ["ldap".to_string(), "user".to_string()]);
965            assert_eq!(
966                tail.flag_values("--provider"),
967                Some(
968                    &[
969                        "vmware".to_string(),
970                        "region".to_string(),
971                        "eu-central".to_string(),
972                    ][..]
973                )
974            );
975
976            let dsl = parser.parse(&parser.tokenize("cmd -- literal | F name"));
977            assert_eq!(dsl.head(), ["cmd".to_string()]);
978            assert_eq!(
979                dsl.positional_args().cloned().collect::<Vec<_>>(),
980                vec!["literal".to_string()]
981            );
982            assert!(dsl.has_pipe());
983            assert_eq!(dsl.pipes(), ["F".to_string(), "name".to_string()]);
984        }
985    }
986
987    mod cursor_analysis_contracts {
988        use super::*;
989
990        #[test]
991        fn cursor_state_tracks_equals_boundaries_and_open_quote_ranges() {
992            let parser = parser();
993
994            let cursor = parser.cursor_state("cmd --flag=", "cmd --flag=".len());
995            assert_eq!(cursor.token_stub, "");
996
997            assert_eq!(
998                parser.compute_stub_quote("cmd --name \"al"),
999                Some(QuoteStyle::Double)
1000            );
1001            assert_eq!(
1002                parser.compute_stub_quote("cmd --name 'al"),
1003                Some(QuoteStyle::Single)
1004            );
1005            assert_eq!(parser.compute_stub_quote("cmd --name al"), None);
1006
1007            let line = "ldap user \"oi";
1008            let cursor = parser.cursor_state(line, line.len());
1009            assert_eq!(cursor.token_stub, "oi");
1010            assert_eq!(cursor.raw_stub, "oi");
1011            assert_eq!(cursor.replace_range, 11..13);
1012            assert_eq!(cursor.quote_style, Some(QuoteStyle::Double));
1013        }
1014
1015        #[test]
1016        fn analyze_reuses_safe_cursor_snapshots_for_prefix_and_balanced_quotes() {
1017            let parser = parser();
1018
1019            let line = "orch provision --provider vmware --os rhel | F name";
1020            let cursor = "orch provision --provider vmware".len();
1021            let analyzed = parser.analyze(line, cursor);
1022            assert_eq!(
1023                analyzed.parsed.full_tokens,
1024                vec![
1025                    "orch",
1026                    "provision",
1027                    "--provider",
1028                    "vmware",
1029                    "--os",
1030                    "rhel",
1031                    "|",
1032                    "F",
1033                    "name",
1034                ]
1035            );
1036            assert_eq!(
1037                analyzed.parsed.cursor_tokens,
1038                vec!["orch", "provision", "--provider", "vmware"]
1039            );
1040            assert_eq!(
1041                analyzed.parsed.cursor_cmd.flag_values("--provider"),
1042                Some(&["vmware".to_string()][..])
1043            );
1044
1045            let balanced = parser.analyze(
1046                r#"ldap user "oi ste" --format json"#,
1047                r#"ldap user "oi"#.len(),
1048            );
1049            assert_eq!(balanced.cursor.token_stub, "oi");
1050            assert_eq!(balanced.cursor.raw_stub, "oi");
1051            assert_eq!(balanced.cursor.quote_style, Some(QuoteStyle::Double));
1052        }
1053
1054        #[test]
1055        fn analyze_recovers_from_unbalanced_quotes_and_non_char_boundaries() {
1056            let parser = parser();
1057
1058            let unbalanced = parser.analyze(r#"ldap user "alice"#, r#"ldap user "alice"#.len());
1059            assert_eq!(unbalanced.parsed.full_tokens, vec!["ldap", "user", "alice"]);
1060            assert_eq!(
1061                unbalanced.parsed.cursor_tokens,
1062                vec!["ldap", "user", "alice"]
1063            );
1064            assert_eq!(unbalanced.cursor.quote_style, Some(QuoteStyle::Double));
1065            assert_eq!(unbalanced.cursor.token_stub, "alice");
1066
1067            let line = "ldap user å";
1068            let analyzed = parser.analyze(line, line.len() - 1);
1069            assert!(analyzed.parsed.safe_cursor < line.len());
1070            assert_eq!(analyzed.cursor.token_stub, "");
1071        }
1072    }
1073}