dynfmt/
python.rs

1//! Implementation for old-style Python format strings.
2//!
3//! See [`PythonFormat`] for more information.
4//!
5//! [`PythonFormat`]: struct.PythonFormat.html
6
7use regex::{CaptureMatches, Captures, Regex};
8
9use crate::{Alignment, ArgumentResult, ArgumentSpec, Count, Error, Format, FormatType, Position};
10
11lazy_static::lazy_static! {
12/// The regular expression used for parsing python format strings.
13    static ref PYTHON_RE: Regex = Regex::new(r"(?x)
14        %
15        (?:\((?P<key>\w+)\))?         # Mapping key
16        (?P<flags>[\#0\- +]*)?        # Conversion flags
17        (?P<width>\*|\d+)?            # Minimum field width
18        (?:.(?P<precision>\*|\d+))?   # Precision after decimal point
19        [hlL]*                        # Ignored length modifier
20        (?P<type>[diouxXeEfFgGcrs%])  # Conversion type
21    ").unwrap();
22}
23
24fn parse_next(captures: Captures<'_>) -> ArgumentResult<'_> {
25    let group = captures.get(0).unwrap();
26
27    let position = captures
28        .name("key")
29        .map(|m| Position::Key(m.as_str()))
30        .unwrap_or_else(|| Position::Auto);
31
32    let format = match &captures["type"] {
33        "d" | "i" | "u" => FormatType::Display,
34        "o" => FormatType::Octal,
35        "x" => FormatType::LowerHex,
36        "X" => FormatType::UpperHex,
37        "e" => FormatType::LowerExp,
38        "E" => FormatType::UpperExp,
39        "f" | "F" | "g" | "G" => FormatType::Display,
40        "c" | "s" => FormatType::Display,
41        "r" => FormatType::Object,
42        "%" => FormatType::Literal("%"),
43        s => return Err(Error::BadFormat(s.chars().next().unwrap_or_default())),
44    };
45
46    let mut alternate = false;
47    let mut pad_zero = false;
48    let mut alignment = Alignment::Right;
49    let mut sign = false;
50
51    if let Some(flags) = captures.name("flags") {
52        for flag in flags.as_str().chars() {
53            match flag {
54                '#' => alternate = true,
55                '0' => pad_zero = true,
56                '-' => alignment = Alignment::Left,
57                ' ' => (), // blank between sign and number, not supported
58                '+' => sign = true,
59                c => unreachable!("unknown conversion flag \"{}\"", c),
60            }
61        }
62    }
63
64    let width = captures.name("width").and_then(|m| match m.as_str() {
65        "*" => Some(Count::Ref(Position::Auto)),
66        value => value.parse().ok().map(Count::Value),
67    });
68
69    let precision = captures.name("precision").and_then(|m| match m.as_str() {
70        "*" => Some(Count::Ref(Position::Auto)),
71        value => value.parse().ok().map(Count::Value),
72    });
73
74    let spec = ArgumentSpec::new(group.start(), group.end())
75        .with_position(position)
76        .with_format(format)
77        .with_alternate(alternate)
78        .with_zeros(pad_zero)
79        .with_alignment(alignment)
80        .with_sign(sign)
81        .with_width(width)
82        .with_precision(precision);
83
84    Ok(spec)
85}
86
87/// Format argument iterator for [`PythonFormat`].
88///
89/// [`PythonFormat`]: struct.PythonFormat.html
90#[derive(Debug)]
91pub struct PythonIter<'f> {
92    captures: CaptureMatches<'static, 'f>,
93}
94
95impl<'f> PythonIter<'f> {
96    fn new(format: &'f str) -> Self {
97        PythonIter {
98            captures: PYTHON_RE.captures_iter(format),
99        }
100    }
101}
102
103impl<'f> Iterator for PythonIter<'f> {
104    type Item = ArgumentResult<'f>;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        self.captures.next().map(parse_next)
108    }
109}
110
111/// Format implementation for old-style Python formatting.
112///
113/// Python uses a syntax similar to `sprintf` in the C language. Each format argument contains two
114/// or more characters and has the following components, which must occur in this order:
115///
116///  1. The `'%'` character, which marks the start of the specifier.
117///  2. Mapping key (optional), consisting of a parenthesised sequence of characters (for example,
118///     `(somename)`).
119///  3. Conversion flags (optional), which affect the result of some conversion types.
120///  4. Minimum field width (optional). If specified as an `'*'` (asterisk), the actual width is
121///     read from the next element of the tuple in values, and the object to convert comes after the
122///     minimum field width and optional precision.
123///  5. Precision (optional), given as a `'.'` (dot) followed by the precision. If specified as
124///     `'*'` (an asterisk), the actual width is read from the next element of the tuple in values,
125///     and the value to convert comes after the precision.
126///  6. Length modifier (optional).
127///  7. Conversion type.
128///
129/// Most of the conversion types are mapped to the standard `Display` trait. The `%r` conversion
130/// type is implemented as JSON, if the `json` feature is active and will otherwise error.
131///
132/// For the full specification, please refer to the [Python string formatting docs].
133///
134/// # Example
135///
136/// ```rust
137/// use dynfmt::{Format, PythonFormat};
138///
139/// let formatted = PythonFormat.format("hello, %s", &["world"]);
140/// assert_eq!("hello, world", formatted.expect("formatting failed"));
141/// ```
142///
143/// [Python string formatting docs]: https://docs.python.org/2/library/stdtypes.html#string-formatting-operations
144#[derive(Debug)]
145pub struct PythonFormat;
146
147impl<'f> Format<'f> for PythonFormat {
148    type Iter = PythonIter<'f>;
149
150    fn iter_args(&self, format: &'f str) -> Result<Self::Iter, Error<'f>> {
151        Ok(PythonIter::new(format))
152    }
153}