reovim_driver_command/parse.rs
1//! Command-line parsing for ex-commands.
2//!
3//! Pure functions that split a command-line string into its components
4//! and bind arguments to specs. No side effects, no execution -- mechanism only.
5
6use std::{collections::HashMap, fmt};
7
8use reovim_driver_command_types::{ArgKind, ArgSpec, ArgValue};
9
10/// Parsed command-line input.
11///
12/// Result of parsing a string like `"write! filename.txt"` into
13/// structured components: name (`"write"`), bang (`true`),
14/// args (`["filename.txt"]`), `raw_args` (`"filename.txt"`).
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct ParsedCmdline {
17 /// The command name (e.g., `"write"`, `"q"`).
18 pub name: String,
19 /// Whether the command was invoked with `!` (e.g., `:q!`).
20 pub bang: bool,
21 /// Positional arguments after the command name (whitespace-split).
22 pub args: Vec<String>,
23 /// Raw argument text after the command name (preserves quoting and spacing).
24 pub raw_args: String,
25}
26
27/// Parse a command-line string into name, bang, and args.
28///
29/// Grammar: `[name][!] [arg1 arg2 ...]`
30///
31/// Command names are alphabetic. Arguments start at the first non-alpha
32/// character after the name (or at whitespace). This allows vim-style
33/// commands like `:s/pat/rep/` where there is no space between the
34/// command name and arguments.
35///
36/// Returns `None` for empty or whitespace-only input.
37///
38/// # Examples
39///
40/// ```
41/// use reovim_driver_command::parse_cmdline;
42///
43/// let parsed = parse_cmdline("w filename.txt").unwrap();
44/// assert_eq!(parsed.name, "w");
45/// assert!(!parsed.bang);
46/// assert_eq!(parsed.args, vec!["filename.txt"]);
47/// assert_eq!(parsed.raw_args, "filename.txt");
48///
49/// let parsed = parse_cmdline("q!").unwrap();
50/// assert_eq!(parsed.name, "q");
51/// assert!(parsed.bang);
52/// assert!(parsed.args.is_empty());
53/// assert!(parsed.raw_args.is_empty());
54///
55/// let parsed = parse_cmdline("s/foo/bar/g").unwrap();
56/// assert_eq!(parsed.name, "s");
57/// assert_eq!(parsed.raw_args, "/foo/bar/g");
58///
59/// assert!(parse_cmdline("").is_none());
60/// ```
61#[must_use]
62pub fn parse_cmdline(input: &str) -> Option<ParsedCmdline> {
63 let input = input.trim();
64 if input.is_empty() {
65 return None;
66 }
67
68 // Find where the command name ends. Ex-command names are alphabetic.
69 // The name ends at the first character that is not a letter, giving us
70 // correct parsing for `:s/pat/rep/` (name="s", args="/pat/rep/").
71 let name_end = input
72 .find(|c: char| !c.is_ascii_alphabetic())
73 .unwrap_or(input.len());
74
75 let cmd_part = &input[..name_end];
76 let rest = &input[name_end..];
77
78 // Extract bang if the rest starts with '!'
79 let (bang, args_part) = rest
80 .strip_prefix('!')
81 .map_or_else(|| (false, rest.trim_start()), |after| (true, after.trim_start()));
82
83 let name = cmd_part;
84
85 let args = if args_part.is_empty() {
86 vec![]
87 } else {
88 args_part.split_whitespace().map(String::from).collect()
89 };
90
91 Some(ParsedCmdline {
92 name: name.to_string(),
93 bang,
94 args,
95 raw_args: args_part.to_string(),
96 })
97}
98
99/// Tokenize argument text with quote and escape awareness.
100///
101/// Handles:
102/// - Double quotes: `"foo bar"` -> single token `foo bar`
103/// - Single quotes: `'foo bar'` -> single token `foo bar`
104/// - Backslash escapes: `foo\ bar` -> single token `foo bar`
105/// - Mixed: `foo "bar baz" qux` -> `["foo", "bar baz", "qux"]`
106///
107/// Unclosed quotes are treated as extending to end of input.
108#[must_use]
109pub fn tokenize_args(input: &str) -> Vec<String> {
110 let mut tokens = Vec::new();
111 let mut current = String::new();
112 let mut chars = input.chars().peekable();
113 let mut has_content = false; // tracks if we've seen quotes or chars for this token
114
115 while let Some(&ch) = chars.peek() {
116 match ch {
117 ' ' | '\t' => {
118 if has_content {
119 tokens.push(std::mem::take(&mut current));
120 has_content = false;
121 }
122 chars.next();
123 }
124 '"' | '\'' => {
125 has_content = true;
126 let quote = ch;
127 chars.next(); // consume opening quote
128 loop {
129 match chars.next() {
130 Some(c) if c == quote => break,
131 Some('\\') if quote == '"' => {
132 // Inside double quotes, backslash escapes next char
133 if let Some(escaped) = chars.next() {
134 current.push(escaped);
135 }
136 }
137 Some(c) => current.push(c),
138 None => break, // unclosed quote
139 }
140 }
141 }
142 '\\' => {
143 has_content = true;
144 chars.next(); // consume backslash
145 if let Some(escaped) = chars.next() {
146 current.push(escaped);
147 }
148 }
149 _ => {
150 has_content = true;
151 current.push(ch);
152 chars.next();
153 }
154 }
155 }
156
157 if has_content {
158 tokens.push(current);
159 }
160
161 tokens
162}
163
164/// Error type for argument binding failures.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum ArgError {
167 /// A required argument is missing.
168 MissingRequired {
169 /// The argument name.
170 name: &'static str,
171 /// The expected argument kind.
172 kind: ArgKind,
173 },
174 /// Too many arguments provided.
175 TooManyArgs {
176 /// Expected argument count.
177 expected: usize,
178 /// Actual argument count.
179 got: usize,
180 },
181 /// Argument value could not be parsed as expected type.
182 InvalidValue {
183 /// The argument name.
184 name: &'static str,
185 /// The expected argument kind.
186 kind: ArgKind,
187 /// The actual value that failed to parse.
188 value: String,
189 },
190}
191
192impl fmt::Display for ArgError {
193 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194 match self {
195 Self::MissingRequired { name, kind } => {
196 write!(f, "E471: Missing required argument: {name} ({kind:?})")
197 }
198 Self::TooManyArgs { expected, got } => {
199 write!(f, "E488: Too many arguments (expected {expected}, got {got})")
200 }
201 Self::InvalidValue { name, kind, value } => {
202 write!(f, "E474: Invalid value for {name} ({kind:?}): \"{value}\"")
203 }
204 }
205 }
206}
207
208/// Bind arguments to specs, producing a map of name -> value.
209///
210/// Tokenizes `raw_args` and matches tokens to specs in order.
211/// Bang is handled separately via `parsed.bang` and not consumed from tokens.
212///
213/// # Errors
214///
215/// Returns `ArgError` if a required argument is missing, too many arguments
216/// are provided, or a value cannot be parsed as the expected type.
217pub fn bind_args(
218 specs: &[ArgSpec],
219 raw_args: &str,
220 bang: bool,
221) -> Result<HashMap<String, ArgValue>, ArgError> {
222 let tokens = tokenize_args(raw_args);
223 let mut result = HashMap::new();
224 let mut token_idx = 0;
225 // Track how many positional (non-bang) specs there are
226 let positional_count = specs.iter().filter(|s| s.kind != ArgKind::Bang).count();
227
228 // Track whether the last spec was Rest (which consumes everything)
229 let mut consumed_rest = false;
230
231 for spec in specs {
232 match spec.kind {
233 ArgKind::Bang => {
234 // Bang is from the parsed command, not from tokens
235 if bang {
236 result.insert(spec.name.to_string(), ArgValue::Bang(true));
237 }
238 }
239 ArgKind::Rest => {
240 // Rest consumes all remaining raw text after already-consumed tokens
241 let remaining = remaining_raw(raw_args, token_idx);
242 if remaining.is_empty() {
243 if spec.required {
244 return Err(ArgError::MissingRequired {
245 name: spec.name,
246 kind: spec.kind,
247 });
248 }
249 } else {
250 result.insert(spec.name.to_string(), ArgValue::String(remaining));
251 token_idx = tokens.len(); // consume all
252 }
253 consumed_rest = true;
254 }
255 _ => {
256 if token_idx >= tokens.len() {
257 if spec.required {
258 return Err(ArgError::MissingRequired {
259 name: spec.name,
260 kind: spec.kind,
261 });
262 }
263 continue;
264 }
265 let token = &tokens[token_idx];
266 token_idx += 1;
267 let value = parse_token(spec.name, spec.kind, token)?;
268 result.insert(spec.name.to_string(), value);
269 }
270 }
271 }
272
273 // Check for leftover tokens (unless last spec was Rest)
274 if !consumed_rest && token_idx < tokens.len() {
275 return Err(ArgError::TooManyArgs {
276 expected: positional_count,
277 got: positional_count + (tokens.len() - token_idx),
278 });
279 }
280
281 Ok(result)
282}
283
284/// Parse a single token into an `ArgValue` based on the expected kind.
285fn parse_token(name: &'static str, kind: ArgKind, token: &str) -> Result<ArgValue, ArgError> {
286 match kind {
287 ArgKind::FilePath => Ok(ArgValue::FilePath(token.to_string())),
288 ArgKind::String => Ok(ArgValue::String(token.to_string())),
289 ArgKind::Count => {
290 token
291 .parse::<usize>()
292 .map(ArgValue::Count)
293 .map_err(|_| ArgError::InvalidValue {
294 name,
295 kind,
296 value: token.to_string(),
297 })
298 }
299 ArgKind::Bool => match token {
300 "true" => Ok(ArgValue::Bool(true)),
301 "false" => Ok(ArgValue::Bool(false)),
302 _ => Err(ArgError::InvalidValue {
303 name,
304 kind,
305 value: token.to_string(),
306 }),
307 },
308 ArgKind::Char => {
309 let mut chars = token.chars();
310 match (chars.next(), chars.next()) {
311 (Some(c), None) => Ok(ArgValue::Char(c)),
312 _ => Err(ArgError::InvalidValue {
313 name,
314 kind,
315 value: token.to_string(),
316 }),
317 }
318 }
319 ArgKind::Register => {
320 let mut chars = token.chars();
321 match (chars.next(), chars.next()) {
322 (Some(c), None) => Ok(ArgValue::Register(c)),
323 _ => Err(ArgError::InvalidValue {
324 name,
325 kind,
326 value: token.to_string(),
327 }),
328 }
329 }
330 // These kinds are not expected from ex-command text input
331 ArgKind::Bang | ArgKind::Rest | ArgKind::Motion | ArgKind::Range | ArgKind::BufferId => {
332 Err(ArgError::InvalidValue {
333 name,
334 kind,
335 value: token.to_string(),
336 })
337 }
338 }
339}
340
341/// Compute the remaining raw text after consuming `consumed` tokens.
342///
343/// Finds where the consumed tokens end in `raw_args` and returns the rest,
344/// trimmed of leading whitespace.
345fn remaining_raw(raw_args: &str, consumed: usize) -> String {
346 if consumed == 0 {
347 return raw_args.trim().to_string();
348 }
349
350 // Re-scan raw_args to find where token N starts
351 let mut pos = 0;
352 let bytes = raw_args.as_bytes();
353 for _ in 0..consumed {
354 // Skip whitespace
355 while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
356 pos += 1;
357 }
358 if pos >= bytes.len() {
359 return String::new();
360 }
361 // Skip token (respect quotes)
362 match bytes[pos] {
363 b'"' | b'\'' => {
364 let quote = bytes[pos];
365 pos += 1;
366 while pos < bytes.len() && bytes[pos] != quote {
367 if bytes[pos] == b'\\' && quote == b'"' {
368 pos += 1; // skip escaped char
369 }
370 pos += 1;
371 }
372 if pos < bytes.len() {
373 pos += 1; // skip closing quote
374 }
375 }
376 _ => {
377 while pos < bytes.len() && bytes[pos] != b' ' && bytes[pos] != b'\t' {
378 if bytes[pos] == b'\\' {
379 pos += 1; // skip escaped char
380 }
381 pos += 1;
382 }
383 }
384 }
385 }
386
387 raw_args[pos..].trim().to_string()
388}
389
390#[cfg(test)]
391#[path = "parse_tests.rs"]
392mod tests;