debate_parser/
lib.rs

1#![no_std]
2
3/*!
4Low-level implementation of argument handling. Takes care of distinctions
5between flags, options, and positionals, that sort of thing. No type handling
6happens here. Usually this is too low level to use directly.
7*/
8
9#[cfg(feature = "std")]
10extern crate std;
11
12mod arg;
13mod populated_slice;
14
15use ::core::fmt::Debug;
16
17use populated_slice::PopulatedSlice;
18
19pub use crate::arg::Arg;
20
21/**
22The [`ArgumentsParser`] type operates by passing arguments it finds into a
23[`Visitor`], to be handled.
24 */
25pub trait Visitor<'arg> {
26    type Value;
27
28    /// A positional parameter.
29    fn visit_positional(self, argument: &'arg Arg) -> Self::Value;
30
31    /// A long option that definitely has an argument, because it was given
32    /// as `--option=argument`
33    fn visit_long_option(self, option: &'arg Arg, argument: &'arg Arg) -> Self::Value;
34
35    /// A long option or flag, such as `--option`
36    fn visit_long(self, option: &'arg Arg, arg: impl ArgAccess<'arg>) -> Self::Value;
37
38    /// A long option or flag, such as `-o`
39    fn visit_short(self, option: u8, arg: impl ArgAccess<'arg>) -> Self::Value;
40}
41
42/**
43[`ArgAccess`] allows a visitor to decide if a given parameter needs an argument,
44based on the identity of the flag or option.
45
46Consider `--foo bar`. Is this a pair of parameters (the flag `--foo` and the
47positional parameter `bar`) or a single option `--foo bar` that takes an
48argument? Similarly, `-ab foo` could be `-a b`, `foo`; or `-a`, `-b foo`; or
49`-a`, `-b`, `foo`. The [`ArgumentsParser`] can't independently classify a given
50argument, so instead, a visitor can request an argument via this trait only for
51options that need them and the `ArgumentParser` takes care of the parsing logic
52of actually determining where that argument comes from.
53*/
54pub trait ArgAccess<'arg>: Sized {
55    /**
56    Get an argument from the parser. This should only be called by options that
57    need it; flags should simply ignore it, to ensure that the next command
58    line argument can correctly be parsed independently.
59
60    This returns [`None`] if all of the CLI arguments have been exhausted, or
61    if there are known to only be positional parameters remaining (because
62    a raw `--` was parsed at some point).
63    */
64    fn take(self) -> Option<&'arg Arg>;
65}
66
67#[derive(Debug, Clone)]
68enum State<'arg> {
69    Ready,
70    PositionalOnly,
71    // TODO: reuse the better byte slice formatter here
72    ShortInProgress(&'arg PopulatedSlice<u8>),
73}
74
75/**
76An `ArgumentsParser` is the main entry point into `debate_parser`. It parses
77arguments in each call to `next_arg`, sending those arguments to the given
78[`Visitor`]. It handles distinguishing flags, options, and positionals; logic
79related to how flags get their argument values, and the `--`
80
81[debate-parser][crate] operates entirely on borrowed data, because we assume
82that command-line arguments can be loaded early on in `main` and then handled
83in a borrowed form for the rest of the program. The ubiquitous  `'arg` lifetime
84refers to this borrowed command line data.
85*/
86#[derive(Debug, Clone)]
87pub struct ArgumentsParser<'arg, I> {
88    state: State<'arg>,
89    args: I,
90}
91
92pub fn parser<A: AsRef<[u8]>>(args: &[A]) -> ArgumentsParser<'_, impl Iterator<Item = &[u8]>> {
93    ArgumentsParser::new(args.iter().map(|arg| arg.as_ref()))
94}
95
96impl<'arg, I> ArgumentsParser<'arg, I>
97where
98    I: Iterator<Item = &'arg [u8]>,
99{
100    /**
101    Create a new [`ArgumentsParser`] from an iterator of byte slices, where
102    each byte slice is a single argument received from the command line. This
103    list should *exclude* the name of the program, which is commonly passed as
104    the first argument in the list.
105     */
106    #[inline]
107    #[must_use]
108    pub fn new(args: impl IntoIterator<IntoIter = I>) -> Self {
109        Self {
110            state: State::Ready,
111            args: args.into_iter(),
112        }
113    }
114
115    #[inline]
116    #[must_use]
117    pub fn new_from_slice(
118        slice: &[impl AsBytes],
119    ) -> ArgumentsParser<'_, impl Iterator<Item = &'_ [u8]>> {
120        ArgumentsParser::new(slice.iter().map(|arg| arg.as_bytes()))
121    }
122
123    /// Put `self` into a `PositionalOnly` state, then process a positional
124    /// argument
125    #[inline]
126    fn positional_only_arg<V>(&mut self, visitor: V) -> Option<V::Value>
127    where
128        V: Visitor<'arg>,
129    {
130        debug_assert!(!matches!(self.state, State::ShortInProgress(_)));
131
132        self.state = State::PositionalOnly;
133        self.args
134            .next()
135            .map(Arg::new)
136            .map(|arg| visitor.visit_positional(arg))
137    }
138
139    /// Put `self` into a `Ready` state, then return a ShortArgAccess
140    #[inline]
141    fn standard_arg(&mut self) -> StandardArgAccess<'_, 'arg, I> {
142        debug_assert!(!matches!(self.state, State::PositionalOnly));
143
144        self.state = State::Ready;
145        StandardArgAccess { parent: self }
146    }
147
148    /// Put `self` into a `ShortInProgress` state, then return a ShortArgAccess.
149    /// `short` must be non-empty.
150    #[inline]
151    fn short_arg(&mut self, short: &'arg PopulatedSlice<u8>) -> ShortArgAccess<'_, 'arg> {
152        debug_assert!(!matches!(self.state, State::PositionalOnly));
153
154        self.state = State::ShortInProgress(short);
155        ShortArgAccess {
156            short: short.get(),
157            state: &mut self.state,
158        }
159    }
160
161    /// Handle getting the argument for a `-s` short option. If there is
162    /// remaining content in the short, it's a candidate for the argument;
163    /// otherwise, the next argument in the input args is the candidate.
164    #[inline]
165    fn handle_short_argument<V>(&mut self, short: &'arg PopulatedSlice<u8>, visitor: V) -> V::Value
166    where
167        V: Visitor<'arg>,
168    {
169        let (&option, short) = short.split_first();
170
171        match PopulatedSlice::new(short) {
172            None => visitor.visit_short(option, self.standard_arg()),
173            Some(short) => visitor.visit_short(option, self.short_arg(short)),
174        }
175    }
176
177    pub fn next_arg<V>(&mut self, visitor: V) -> Option<V::Value>
178    where
179        V: Visitor<'arg>,
180    {
181        match self.state {
182            State::Ready => match self.args.next()? {
183                b"--" => self.positional_only_arg(visitor),
184                argument => Some(match argument {
185                    [b'-', b'-', option @ ..] => match split_once(option, b'=') {
186                        Some((option, argument)) => {
187                            visitor.visit_long_option(Arg::new(option), Arg::new(argument))
188                        }
189                        None => visitor.visit_long(Arg::new(option), self.standard_arg()),
190                    },
191                    [b'-', short @ ..] => match PopulatedSlice::new(short) {
192                        None => visitor.visit_positional(Arg::new(b"-")),
193                        Some(short) => self.handle_short_argument(short, visitor),
194                    },
195                    positional => visitor.visit_positional(Arg::new(positional)),
196                }),
197            },
198            State::PositionalOnly => self.positional_only_arg(visitor),
199            State::ShortInProgress(short) => Some(self.handle_short_argument(short, visitor)),
200        }
201    }
202}
203
204/// ArgAccess implementation that gets the value of an argument as the next
205/// whole argument from the input. Handles logic around `--`.
206struct StandardArgAccess<'a, 'arg, I> {
207    parent: &'a mut ArgumentsParser<'arg, I>,
208}
209
210impl<'arg, I> ArgAccess<'arg> for StandardArgAccess<'_, 'arg, I>
211where
212    I: Iterator<Item = &'arg [u8]>,
213{
214    fn take(self) -> Option<&'arg Arg> {
215        match self.parent.args.next()? {
216            b"--" if !matches!(self.parent.state, State::PositionalOnly) => {
217                self.parent.state = State::PositionalOnly;
218                None
219            }
220            arg => Some(Arg::new(arg)),
221        }
222    }
223}
224
225/// ArgAccess implementation that gets the remainder of a short argument.
226/// Handles things like `-ovalue`, which is equivalent to `-o value`.
227struct ShortArgAccess<'a, 'arg> {
228    short: &'arg [u8],
229    state: &'a mut State<'arg>,
230}
231
232impl<'arg> ArgAccess<'arg> for ShortArgAccess<'_, 'arg> {
233    fn take(self) -> Option<&'arg Arg> {
234        debug_assert!(
235            matches!(*self.state, State::ShortInProgress(short) if short.get() == self.short)
236        );
237
238        *self.state = State::Ready;
239        Some(Arg::new(self.short))
240    }
241}
242
243fn split_once(input: &[u8], delimiter: u8) -> Option<(&[u8], &[u8])> {
244    memchr::memchr(delimiter, input).map(|i| (&input[..i], &input[i + 1..]))
245}
246
247/// Basically the same as `AsRef<u8>`, but we want it for OsString and OsStr,
248/// too.
249pub trait AsBytes {
250    fn as_bytes(&self) -> &[u8];
251}
252
253impl AsBytes for [u8] {
254    fn as_bytes(&self) -> &[u8] {
255        self
256    }
257}
258
259impl AsBytes for str {
260    fn as_bytes(&self) -> &[u8] {
261        self.as_bytes()
262    }
263}
264
265impl<T: AsBytes> AsBytes for &T {
266    fn as_bytes(&self) -> &[u8] {
267        T::as_bytes(*self)
268    }
269}
270
271#[cfg(feature = "std")]
272mod std_impls {
273    use super::*;
274
275    use std::{
276        ffi::{OsStr, OsString},
277        string::String,
278        vec::Vec,
279    };
280
281    impl AsBytes for Vec<u8> {
282        fn as_bytes(&self) -> &[u8] {
283            self.as_slice()
284        }
285    }
286
287    impl AsBytes for String {
288        fn as_bytes(&self) -> &[u8] {
289            self.as_bytes()
290        }
291    }
292
293    impl AsBytes for OsString {
294        fn as_bytes(&self) -> &[u8] {
295            self.as_encoded_bytes()
296        }
297    }
298    impl AsBytes for OsStr {
299        fn as_bytes(&self) -> &[u8] {
300            self.as_encoded_bytes()
301        }
302    }
303}