rt_format/
parser.rs

1//! Provides support for parsing typical Rust formatting strings.
2//! 
3//! The parser supports all of the features of the formatting strings that are normally passed to
4//! the `format!` macro, except for the fill character.
5
6use regex::{Captures, Match};
7use std::convert::{TryFrom, TryInto};
8use std::fmt;
9
10use crate::argument::{
11    ArgumentFormatter, ArgumentSource, FormatArgument, NamedArguments, PositionalArguments
12};
13use crate::{format_value, Align, Format, Pad, Precision, Repr, Sign, Specifier, Width};
14
15/// A value and its formatting specifier.
16#[derive(Debug, Copy, Clone, PartialEq)]
17pub struct Substitution<'v, V: FormatArgument> {
18    specifier: Specifier,
19    value: &'v V,
20    _private: (),
21}
22
23impl<'v, V: FormatArgument> Substitution<'v, V> {
24    /// Create an `Substitution` if the given value supports the given format.
25    pub fn new(specifier: Specifier, value: &'v V) -> Result<Substitution<'v, V>, ()> {
26        if value.supports_format(&specifier) {
27            Ok(Substitution {
28                specifier,
29                value,
30                _private: (),
31            })
32        } else {
33            Err(())
34        }
35    }
36
37    /// A reference to the formatting specifier.
38    pub fn specifier(&self) -> &Specifier {
39        &self.specifier
40    }
41
42    /// A reference to the value to format.
43    pub fn value(&self) -> &'v V {
44        self.value
45    }
46}
47
48impl<'v, V: FormatArgument> fmt::Display for Substitution<'v, V> {
49    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
50        format_value(&self.specifier, &ArgumentFormatter(self.value), f)
51    }
52}
53
54/// A single segment of a formatting string.
55#[derive(Debug, Copy, Clone, PartialEq)]
56pub enum Segment<'s, V: FormatArgument> {
57    /// Text to be sent to the formatter.
58    Text(&'s str),
59    /// A value ready to be formatted.
60    Substitution(Substitution<'s, V>),
61}
62
63impl<'s, V: FormatArgument> fmt::Display for Segment<'s, V> {
64    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65        match self {
66            Segment::Text(text) => f.write_str(text),
67            Segment::Substitution(arg) => arg.fmt(f),
68        }
69    }
70}
71
72/// A representation of the formatting string and associated values, ready to be formatted.
73#[derive(Debug, Clone, PartialEq)]
74pub struct ParsedFormat<'a, V: FormatArgument> {
75    /// A vector of formatting string segments.
76    pub segments: Vec<Segment<'a, V>>,
77}
78
79impl<'a, V: FormatArgument> ParsedFormat<'a, V> {
80    /// Parses the formatting string, using given positional and named arguments. Does not perform
81    /// any formatting. It just parses the formatting string, validates that all the arguments are
82    /// present, and that each argument supports the requested format.
83    pub fn parse<P, N>(format: &'a str, positional: &'a P, named: &'a N) -> Result<Self, usize>
84    where
85        P: PositionalArguments<'a, V> + ?Sized,
86        N: NamedArguments<V>,
87    {
88        let segments: Result<Vec<Segment<'a, V>>, usize> =
89            Parser::new(format, positional, named).collect();
90        Ok(ParsedFormat {
91            segments: segments?,
92        })
93    }
94}
95
96impl<'a, V: FormatArgument> fmt::Display for ParsedFormat<'a, V> {
97    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98        for segment in &self.segments {
99            segment.fmt(f)?
100        }
101        Ok(())
102    }
103}
104
105/// A specifier component that can be parsed from the corresponding part of the formatting string.
106trait Parseable<'m, V, S>
107where
108    V: FormatArgument,
109    S: ArgumentSource<V>,
110    Self: Sized,
111{
112    fn parse(capture: Option<Match<'m>>, value_src: &mut S) -> Result<Self, ()>;
113}
114
115impl<'m, V, S, T> Parseable<'m, V, S> for T
116where
117    V: FormatArgument,
118    S: ArgumentSource<V>,
119    T: Sized + TryFrom<&'m str, Error = ()>,
120{
121    fn parse(capture: Option<Match<'m>>, _: &mut S) -> Result<Self, ()> {
122        capture.map(|m| m.as_str()).unwrap_or("").try_into()
123    }
124}
125
126/// Parses a size specifier, such as width or precision. If the size is not hard-coded in the
127/// formatting string, looks up the corresponding argument and tries to convert it to `usize`.
128fn parse_size<'m, V, S>(text: &str, value_src: &S) -> Result<usize, ()>
129where
130    V: FormatArgument,
131    S: ArgumentSource<V>,
132{
133    if text.ends_with('$') {
134        let text = &text[..text.len() - 1];
135        let value = if text.as_bytes()[0].is_ascii_digit() {
136            text.parse()
137                .ok()
138                .and_then(|idx| value_src.lookup_argument_by_index(idx))
139        } else {
140            value_src.lookup_argument_by_name(text)
141        };
142        value.ok_or(()).and_then(FormatArgument::to_usize)
143    } else {
144        text.parse().map_err(|_| ())
145    }
146}
147
148impl<'m, V, S> Parseable<'m, V, S> for Width
149where
150    V: FormatArgument,
151    S: ArgumentSource<V>,
152{
153    fn parse(capture: Option<Match<'m>>, value_src: &mut S) -> Result<Self, ()> {
154        match capture.map(|m| m.as_str()).unwrap_or("") {
155            "" => Ok(Width::Auto),
156            s @ _ => parse_size(s, value_src).map(|width| Width::AtLeast { width }),
157        }
158    }
159}
160
161impl<'m, V, S> Parseable<'m, V, S> for Precision
162where
163    V: FormatArgument,
164    S: ArgumentSource<V>,
165{
166    fn parse(capture: Option<Match<'m>>, value_src: &mut S) -> Result<Self, ()> {
167        match capture.map(|m| m.as_str()).unwrap_or("") {
168            "" => Ok(Precision::Auto),
169            "*" => value_src
170                .next_argument()
171                .ok_or(())
172                .and_then(FormatArgument::to_usize)
173                .map(|precision| Precision::Exactly { precision }),
174            s @ _ => parse_size(s, value_src).map(|precision| Precision::Exactly { precision }),
175        }
176    }
177}
178
179macro_rules! SPEC_REGEX_FRAG {
180    () => { r"
181        (?P<align>[<^>])?
182        (?P<sign>\+)?
183        (?P<repr>\#)?
184        (?P<pad>0)?
185        (?P<width>
186            (?:\d+\$?)|(?:[[:alpha:]][[:alnum:]]*\$)
187        )?
188        (?:\.(?P<precision>
189            (?:\d+\$?)|(?:[[:alpha:]][[:alnum:]]*\$)|\*
190        ))?
191        (?P<format>[?oxXbeE])?
192    " };
193}
194
195fn parse_specifier_captures<V, S>(captures: &Captures, value_src: &mut S) -> Result<Specifier, ()>
196where
197    V: FormatArgument,
198    S: ArgumentSource<V>,
199{
200    Ok(Specifier {
201        align: Align::parse(captures.name("align"), value_src)?,
202        sign: Sign::parse(captures.name("sign"), value_src)?,
203        repr: Repr::parse(captures.name("repr"), value_src)?,
204        pad: Pad::parse(captures.name("pad"), value_src)?,
205        width: Width::parse(captures.name("width"), value_src)?,
206        precision: Precision::parse(captures.name("precision"), value_src)?,
207        format: Format::parse(captures.name("format"), value_src)?,
208    })
209}
210
211/// Parses only the format specifier portion of a format argument. For example, in a format
212/// argument specification `{foo:#X}`, this function would parse only the `#X` part.
213pub fn parse_specifier<V, S>(spec_str: &str, value_src: &mut S) -> Result<Specifier, ()>
214where
215    V: FormatArgument,
216    S: ArgumentSource<V>,
217{
218    use lazy_static::lazy_static;
219    use regex::Regex;
220
221    lazy_static! {
222        static ref SPEC_RE: Regex = Regex::new(concat!(r"(?x) ^", SPEC_REGEX_FRAG!())).unwrap();
223    }
224
225    match SPEC_RE.captures(spec_str) {
226        None => Err(()),
227        Some(captures) => parse_specifier_captures(&captures, value_src)
228    }
229}
230
231/// An iterator of `Segment`s that correspond to the parts of the formatting string being parsed.
232pub struct Parser<'p, V, P, N>
233where
234    V: FormatArgument,
235    P: PositionalArguments<'p, V> + ?Sized,
236    N: NamedArguments<V>,
237{
238    unparsed: &'p str,
239    parsed_len: usize,
240    positional: &'p P,
241    named: &'p N,
242    positional_iter: P::Iter,
243}
244
245impl<'p, V, P, N> Parser<'p, V, P, N>
246where
247    V: FormatArgument,
248    P: PositionalArguments<'p, V> + ?Sized,
249    N: NamedArguments<V>,
250{
251    /// Creates a new `Parser` for the given formatting string, positional arguments, and named
252    /// arguments.
253    pub fn new(format: &'p str, positional: &'p P, named: &'p N) -> Self {
254        Parser {
255            unparsed: format,
256            parsed_len: 0,
257            positional,
258            named,
259            positional_iter: positional.iter(),
260        }
261    }
262
263    fn advance_and_return<T>(&mut self, advance_by: usize, result: T) -> T {
264        self.unparsed = &self.unparsed[advance_by..];
265        self.parsed_len += advance_by;
266        result
267    }
268
269    fn error(&mut self) -> Result<Segment<'p, V>, usize> {
270        self.unparsed = "";
271        Err(self.parsed_len)
272    }
273
274    fn text_segment(&mut self, len: usize) -> Segment<'p, V> {
275        self.advance_and_return(len, Segment::Text(&self.unparsed[..len]))
276    }
277
278    fn parse_braces(&mut self) -> Result<Segment<'p, V>, usize> {
279        if self.unparsed.len() < 2 {
280            self.error()
281        } else if self.unparsed.as_bytes()[0] == self.unparsed.as_bytes()[1] {
282            Ok(self.advance_and_return(2, Segment::Text(&self.unparsed[..1])))
283        } else {
284            self.parse_substitution()
285        }
286    }
287
288    fn parse_substitution(&mut self) -> Result<Segment<'p, V>, usize> {
289        use lazy_static::lazy_static;
290        use regex::Regex;
291
292        lazy_static! {
293            static ref ARG_RE: Regex = Regex::new(
294                concat!(
295                    r"(?x)
296                        ^
297                        \{
298                            (?:(?P<index>\d+)|(?P<name>[\p{XID_Start}_][\p{XID_Continue}]*))?
299                            (?:
300                                :
301                    ",
302                    SPEC_REGEX_FRAG!(),
303                    r"
304                            )?
305                    \}"
306                )
307            )
308            .unwrap();
309        }
310
311        match ARG_RE.captures(self.unparsed) {
312            None => self.error(),
313            Some(captures) => match parse_specifier_captures(&captures, self) {
314                Ok(specifier) => self
315                    .lookup_argument(&captures)
316                    .ok_or(())
317                    .and_then(|value| Substitution::new(specifier, value))
318                    .map(|arg| {
319                        self.advance_and_return(
320                            captures.get(0).unwrap().end(),
321                            Segment::Substitution(arg),
322                        )
323                    })
324                    .or_else(|_| self.error()),
325                Err(_) => self.error(),
326            },
327        }
328    }
329
330    fn next_argument(&mut self) -> Option<&'p V> {
331        self.positional_iter.next()
332    }
333
334    fn lookup_argument_by_index(&self, idx: usize) -> Option<&'p V> {
335        self.positional.get(idx)
336    }
337
338    fn lookup_argument_by_name(&self, name: &str) -> Option<&'p V> {
339        self.named.get(name)
340    }
341
342    fn lookup_argument(&mut self, captures: &Captures) -> Option<&'p V> {
343        if let Some(idx) = captures.name("index") {
344            idx.as_str()
345                .parse::<usize>()
346                .ok()
347                .and_then(|idx| self.lookup_argument_by_index(idx))
348        } else if let Some(name) = captures.name("name") {
349            self.lookup_argument_by_name(name.as_str())
350        } else {
351            self.next_argument()
352        }
353    }
354}
355
356impl<'p, V, P, N> ArgumentSource<V> for Parser<'p, V, P, N>
357where
358    V: FormatArgument,
359    P: PositionalArguments<'p, V> + ?Sized,
360    N: NamedArguments<V>,
361{
362    fn next_argument(&mut self) -> Option<&V> {
363        (self as &mut Parser<'p, V, P, N>).next_argument()
364    }
365
366    fn lookup_argument_by_index(&self, idx: usize) -> Option<&V> {
367        (self as &Parser<'p, V, P, N>).lookup_argument_by_index(idx)
368    }
369
370    fn lookup_argument_by_name(&self, name: &str) -> Option<&V> {
371        (self as &Parser<'p, V, P, N>).lookup_argument_by_name(name)
372    }
373}
374
375impl<'p, V, P, N> Iterator for Parser<'p, V, P, N>
376where
377    V: FormatArgument,
378    P: PositionalArguments<'p, V> + ?Sized,
379    N: NamedArguments<V>,
380{
381    type Item = Result<Segment<'p, V>, usize>;
382
383    fn next(&mut self) -> Option<Self::Item> {
384        static BRACES: &[char] = &['{', '}'];
385
386        if self.unparsed.len() == 0 {
387            return None;
388        }
389
390        match self.unparsed.find(BRACES) {
391            None => Some(Ok(self.text_segment(self.unparsed.len()))),
392            Some(0) => Some(self.parse_braces()),
393            Some(brace_idx) => Some(Ok(self.text_segment(brace_idx))),
394        }
395    }
396}