Skip to main content

reovim_driver_command/
parse.rs

1//! Command-line parsing for ex-commands.
2//!
3//! Pure functions that split a command-line string into its components
4//! and bind arguments to specs. No side effects, no execution -- mechanism only.
5
6use std::{collections::HashMap, fmt};
7
8use reovim_driver_command_types::{ArgKind, ArgSpec, ArgValue};
9
10/// Parsed command-line input.
11///
12/// Result of parsing a string like `"write! filename.txt"` into
13/// structured components: name (`"write"`), bang (`true`),
14/// args (`["filename.txt"]`), `raw_args` (`"filename.txt"`).
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct ParsedCmdline {
17    /// The command name (e.g., `"write"`, `"q"`).
18    pub name: String,
19    /// Whether the command was invoked with `!` (e.g., `:q!`).
20    pub bang: bool,
21    /// Positional arguments after the command name (whitespace-split).
22    pub args: Vec<String>,
23    /// Raw argument text after the command name (preserves quoting and spacing).
24    pub raw_args: String,
25}
26
27/// Parse a command-line string into name, bang, and args.
28///
29/// Grammar: `[name][!] [arg1 arg2 ...]`
30///
31/// Command names are alphabetic. Arguments start at the first non-alpha
32/// character after the name (or at whitespace). This allows vim-style
33/// commands like `:s/pat/rep/` where there is no space between the
34/// command name and arguments.
35///
36/// Returns `None` for empty or whitespace-only input.
37///
38/// # Examples
39///
40/// ```
41/// use reovim_driver_command::parse_cmdline;
42///
43/// let parsed = parse_cmdline("w filename.txt").unwrap();
44/// assert_eq!(parsed.name, "w");
45/// assert!(!parsed.bang);
46/// assert_eq!(parsed.args, vec!["filename.txt"]);
47/// assert_eq!(parsed.raw_args, "filename.txt");
48///
49/// let parsed = parse_cmdline("q!").unwrap();
50/// assert_eq!(parsed.name, "q");
51/// assert!(parsed.bang);
52/// assert!(parsed.args.is_empty());
53/// assert!(parsed.raw_args.is_empty());
54///
55/// let parsed = parse_cmdline("s/foo/bar/g").unwrap();
56/// assert_eq!(parsed.name, "s");
57/// assert_eq!(parsed.raw_args, "/foo/bar/g");
58///
59/// assert!(parse_cmdline("").is_none());
60/// ```
61#[must_use]
62pub fn parse_cmdline(input: &str) -> Option<ParsedCmdline> {
63    let input = input.trim();
64    if input.is_empty() {
65        return None;
66    }
67
68    // Find where the command name ends. Ex-command names are alphabetic.
69    // The name ends at the first character that is not a letter, giving us
70    // correct parsing for `:s/pat/rep/` (name="s", args="/pat/rep/").
71    let name_end = input
72        .find(|c: char| !c.is_ascii_alphabetic())
73        .unwrap_or(input.len());
74
75    let cmd_part = &input[..name_end];
76    let rest = &input[name_end..];
77
78    // Extract bang if the rest starts with '!'
79    let (bang, args_part) = rest
80        .strip_prefix('!')
81        .map_or_else(|| (false, rest.trim_start()), |after| (true, after.trim_start()));
82
83    let name = cmd_part;
84
85    let args = if args_part.is_empty() {
86        vec![]
87    } else {
88        args_part.split_whitespace().map(String::from).collect()
89    };
90
91    Some(ParsedCmdline {
92        name: name.to_string(),
93        bang,
94        args,
95        raw_args: args_part.to_string(),
96    })
97}
98
99/// Tokenize argument text with quote and escape awareness.
100///
101/// Handles:
102/// - Double quotes: `"foo bar"` -> single token `foo bar`
103/// - Single quotes: `'foo bar'` -> single token `foo bar`
104/// - Backslash escapes: `foo\ bar` -> single token `foo bar`
105/// - Mixed: `foo "bar baz" qux` -> `["foo", "bar baz", "qux"]`
106///
107/// Unclosed quotes are treated as extending to end of input.
108#[must_use]
109pub fn tokenize_args(input: &str) -> Vec<String> {
110    let mut tokens = Vec::new();
111    let mut current = String::new();
112    let mut chars = input.chars().peekable();
113    let mut has_content = false; // tracks if we've seen quotes or chars for this token
114
115    while let Some(&ch) = chars.peek() {
116        match ch {
117            ' ' | '\t' => {
118                if has_content {
119                    tokens.push(std::mem::take(&mut current));
120                    has_content = false;
121                }
122                chars.next();
123            }
124            '"' | '\'' => {
125                has_content = true;
126                let quote = ch;
127                chars.next(); // consume opening quote
128                loop {
129                    match chars.next() {
130                        Some(c) if c == quote => break,
131                        Some('\\') if quote == '"' => {
132                            // Inside double quotes, backslash escapes next char
133                            if let Some(escaped) = chars.next() {
134                                current.push(escaped);
135                            }
136                        }
137                        Some(c) => current.push(c),
138                        None => break, // unclosed quote
139                    }
140                }
141            }
142            '\\' => {
143                has_content = true;
144                chars.next(); // consume backslash
145                if let Some(escaped) = chars.next() {
146                    current.push(escaped);
147                }
148            }
149            _ => {
150                has_content = true;
151                current.push(ch);
152                chars.next();
153            }
154        }
155    }
156
157    if has_content {
158        tokens.push(current);
159    }
160
161    tokens
162}
163
164/// Error type for argument binding failures.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum ArgError {
167    /// A required argument is missing.
168    MissingRequired {
169        /// The argument name.
170        name: &'static str,
171        /// The expected argument kind.
172        kind: ArgKind,
173    },
174    /// Too many arguments provided.
175    TooManyArgs {
176        /// Expected argument count.
177        expected: usize,
178        /// Actual argument count.
179        got: usize,
180    },
181    /// Argument value could not be parsed as expected type.
182    InvalidValue {
183        /// The argument name.
184        name: &'static str,
185        /// The expected argument kind.
186        kind: ArgKind,
187        /// The actual value that failed to parse.
188        value: String,
189    },
190}
191
192impl fmt::Display for ArgError {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        match self {
195            Self::MissingRequired { name, kind } => {
196                write!(f, "E471: Missing required argument: {name} ({kind:?})")
197            }
198            Self::TooManyArgs { expected, got } => {
199                write!(f, "E488: Too many arguments (expected {expected}, got {got})")
200            }
201            Self::InvalidValue { name, kind, value } => {
202                write!(f, "E474: Invalid value for {name} ({kind:?}): \"{value}\"")
203            }
204        }
205    }
206}
207
208/// Bind arguments to specs, producing a map of name -> value.
209///
210/// Tokenizes `raw_args` and matches tokens to specs in order.
211/// Bang is handled separately via `parsed.bang` and not consumed from tokens.
212///
213/// # Errors
214///
215/// Returns `ArgError` if a required argument is missing, too many arguments
216/// are provided, or a value cannot be parsed as the expected type.
217pub fn bind_args(
218    specs: &[ArgSpec],
219    raw_args: &str,
220    bang: bool,
221) -> Result<HashMap<String, ArgValue>, ArgError> {
222    let tokens = tokenize_args(raw_args);
223    let mut result = HashMap::new();
224    let mut token_idx = 0;
225    // Track how many positional (non-bang) specs there are
226    let positional_count = specs.iter().filter(|s| s.kind != ArgKind::Bang).count();
227
228    // Track whether the last spec was Rest (which consumes everything)
229    let mut consumed_rest = false;
230
231    for spec in specs {
232        match spec.kind {
233            ArgKind::Bang => {
234                // Bang is from the parsed command, not from tokens
235                if bang {
236                    result.insert(spec.name.to_string(), ArgValue::Bang(true));
237                }
238            }
239            ArgKind::Rest => {
240                // Rest consumes all remaining raw text after already-consumed tokens
241                let remaining = remaining_raw(raw_args, token_idx);
242                if remaining.is_empty() {
243                    if spec.required {
244                        return Err(ArgError::MissingRequired {
245                            name: spec.name,
246                            kind: spec.kind,
247                        });
248                    }
249                } else {
250                    result.insert(spec.name.to_string(), ArgValue::String(remaining));
251                    token_idx = tokens.len(); // consume all
252                }
253                consumed_rest = true;
254            }
255            _ => {
256                if token_idx >= tokens.len() {
257                    if spec.required {
258                        return Err(ArgError::MissingRequired {
259                            name: spec.name,
260                            kind: spec.kind,
261                        });
262                    }
263                    continue;
264                }
265                let token = &tokens[token_idx];
266                token_idx += 1;
267                let value = parse_token(spec.name, spec.kind, token)?;
268                result.insert(spec.name.to_string(), value);
269            }
270        }
271    }
272
273    // Check for leftover tokens (unless last spec was Rest)
274    if !consumed_rest && token_idx < tokens.len() {
275        return Err(ArgError::TooManyArgs {
276            expected: positional_count,
277            got: positional_count + (tokens.len() - token_idx),
278        });
279    }
280
281    Ok(result)
282}
283
284/// Parse a single token into an `ArgValue` based on the expected kind.
285fn parse_token(name: &'static str, kind: ArgKind, token: &str) -> Result<ArgValue, ArgError> {
286    match kind {
287        ArgKind::FilePath => Ok(ArgValue::FilePath(token.to_string())),
288        ArgKind::String => Ok(ArgValue::String(token.to_string())),
289        ArgKind::Count => {
290            token
291                .parse::<usize>()
292                .map(ArgValue::Count)
293                .map_err(|_| ArgError::InvalidValue {
294                    name,
295                    kind,
296                    value: token.to_string(),
297                })
298        }
299        ArgKind::Bool => match token {
300            "true" => Ok(ArgValue::Bool(true)),
301            "false" => Ok(ArgValue::Bool(false)),
302            _ => Err(ArgError::InvalidValue {
303                name,
304                kind,
305                value: token.to_string(),
306            }),
307        },
308        ArgKind::Char => {
309            let mut chars = token.chars();
310            match (chars.next(), chars.next()) {
311                (Some(c), None) => Ok(ArgValue::Char(c)),
312                _ => Err(ArgError::InvalidValue {
313                    name,
314                    kind,
315                    value: token.to_string(),
316                }),
317            }
318        }
319        ArgKind::Register => {
320            let mut chars = token.chars();
321            match (chars.next(), chars.next()) {
322                (Some(c), None) => Ok(ArgValue::Register(c)),
323                _ => Err(ArgError::InvalidValue {
324                    name,
325                    kind,
326                    value: token.to_string(),
327                }),
328            }
329        }
330        // These kinds are not expected from ex-command text input
331        ArgKind::Bang | ArgKind::Rest | ArgKind::Motion | ArgKind::Range | ArgKind::BufferId => {
332            Err(ArgError::InvalidValue {
333                name,
334                kind,
335                value: token.to_string(),
336            })
337        }
338    }
339}
340
341/// Compute the remaining raw text after consuming `consumed` tokens.
342///
343/// Finds where the consumed tokens end in `raw_args` and returns the rest,
344/// trimmed of leading whitespace.
345fn remaining_raw(raw_args: &str, consumed: usize) -> String {
346    if consumed == 0 {
347        return raw_args.trim().to_string();
348    }
349
350    // Re-scan raw_args to find where token N starts
351    let mut pos = 0;
352    let bytes = raw_args.as_bytes();
353    for _ in 0..consumed {
354        // Skip whitespace
355        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
356            pos += 1;
357        }
358        if pos >= bytes.len() {
359            return String::new();
360        }
361        // Skip token (respect quotes)
362        match bytes[pos] {
363            b'"' | b'\'' => {
364                let quote = bytes[pos];
365                pos += 1;
366                while pos < bytes.len() && bytes[pos] != quote {
367                    if bytes[pos] == b'\\' && quote == b'"' {
368                        pos += 1; // skip escaped char
369                    }
370                    pos += 1;
371                }
372                if pos < bytes.len() {
373                    pos += 1; // skip closing quote
374                }
375            }
376            _ => {
377                while pos < bytes.len() && bytes[pos] != b' ' && bytes[pos] != b'\t' {
378                    if bytes[pos] == b'\\' {
379                        pos += 1; // skip escaped char
380                    }
381                    pos += 1;
382                }
383            }
384        }
385    }
386
387    raw_args[pos..].trim().to_string()
388}
389
390#[cfg(test)]
391#[path = "parse_tests.rs"]
392mod tests;