strfmt/
formatter.rs

1use std::fmt;
2use std::fmt::Write;
3use std::iter::Iterator;
4use std::str;
5use std::string::String;
6
7use types::*;
8
9#[derive(Debug, PartialEq)]
10pub struct Formatter<'a, 'b> {
11    pub key: &'a str,
12    fill: char,
13    align: Alignment, // default Right for numbers, Left for strings
14    sign: Sign,
15    alternate: bool,
16    width: Option<usize>,
17    thousands: bool,
18    precision: Option<usize>,
19    ty: Option<char>,
20    buff: &'b mut String,
21    pattern: &'a str,
22}
23
24fn is_alignment_token(c: char) -> bool {
25    match c {
26        '=' | '<' | '^' | '>' => true,
27        _ => false,
28    }
29}
30
31fn is_sign_element(c: char) -> bool {
32    match c {
33        ' ' | '-' | '+' => true,
34        _ => false,
35    }
36}
37
38fn is_type_element(c: char) -> bool {
39    match c {
40        'b' | 'o' | 'x' | 'X' | 'e' | 'E' | 'f' | 'F' | '%' | 's' | '?' => true,
41        _ => false,
42    }
43}
44
45// get an integer from pos, returning the number of bytes
46// consumed and the integer
47fn get_integer(s: &[u8], pos: usize) -> (usize, Option<i64>) {
48    let (_, rest) = s.split_at(pos);
49    let mut consumed: usize = 0;
50    for b in rest {
51        match *b as char {
52            '0'..='9' => {}
53            _ => break,
54        };
55        consumed += 1;
56    }
57    if consumed == 0 {
58        (0, None)
59    } else {
60        let (intstr, _) = rest.split_at(consumed);
61        let val = unsafe {
62            // I think I can be reasonably sure that 0-9 chars are utf8 :)
63            match str::from_utf8_unchecked(intstr).parse::<i64>() {
64                Ok(v) => Some(v),
65                Err(_) => None,
66            }
67        };
68        (consumed, val)
69    }
70}
71
72#[derive(Debug)]
73/// The format struct as it is defined in the python source
74struct FmtPy {
75    pub fill: char,
76    pub align: char,
77    pub alternate: bool,
78    pub sign: char,
79    pub width: i64,
80    pub thousands: bool,
81    pub precision: i64,
82    pub ty: char,
83}
84
85fn parse_like_python(rest: &str) -> Result<FmtPy> {
86    // The rest of this was pretty much strait up copied from python's format parser
87    // All credit goes to python source file: formatter_unicode.c
88    //
89
90    let mut format = FmtPy {
91        fill: ' ',
92        align: '\0',
93        alternate: false,
94        sign: '\0',
95        width: -1,
96        thousands: false,
97        precision: -1,
98        ty: '\0',
99    };
100    let mut chars = rest.chars();
101    let fake_fill = match chars.next() {
102        Some(c) => c,
103        None => return Ok(format),
104    };
105    // from now on all format characters MUST be valid
106    // ASCII characters (fill and identifier were the
107    // only ones that weren't.
108    // Therefore we can use bytes for the rest
109    let rest = rest.as_bytes();
110    let mut align_specified = false;
111    let mut fill_specified = false;
112
113    let end: usize = rest.len();
114    let mut pos: usize = 0;
115
116    // If the second char is an alignment token,
117    // then fake_fill as fill
118    if end - pos >= 1 + fake_fill.len_utf8()
119        && is_alignment_token(rest[pos + fake_fill.len_utf8()] as char)
120    {
121        format.align = rest[pos + fake_fill.len_utf8()] as char;
122        format.fill = fake_fill;
123        fill_specified = true;
124        align_specified = true;
125        pos += 1 + fake_fill.len_utf8();
126    } else if end - pos >= 1 && is_alignment_token(fake_fill) {
127        format.align = fake_fill;
128        pos += fake_fill.len_utf8();
129    }
130
131    // Parse the various sign options
132    if end - pos >= 1 && is_sign_element(rest[pos] as char) {
133        format.sign = rest[pos] as char;
134        pos += 1;
135    }
136
137    // If the next character is #, we're in alternate mode.  This only
138    // applies to integers.
139    if end - pos >= 1 && rest[pos] as char == '#' {
140        format.alternate = true;
141        pos += 1;
142    }
143
144    // The special case for 0-padding (backwards compat)
145    if !fill_specified && end - pos >= 1 && rest[pos] == '0' as u8 {
146        format.fill = '0';
147        if !align_specified {
148            format.align = '=';
149        }
150        pos += 1;
151    }
152
153    // check to make sure that val is good
154    let (consumed, val) = get_integer(rest, pos);
155    pos += consumed;
156    if consumed != 0 {
157        match val {
158            None => {
159                return Err(FmtError::Invalid(
160                    "overflow error when parsing width".to_string(),
161                ))
162            }
163            Some(v) => {
164                format.width = v;
165            }
166        }
167    }
168
169    // Comma signifies add thousands separators
170    if end - pos > 0 && rest[pos] as char == ',' {
171        format.thousands = true;
172        pos += 1;
173    }
174
175    // Parse field precision
176    if end - pos > 0 && rest[pos] as char == '.' {
177        pos += 1;
178
179        let (consumed, val) = get_integer(rest, pos);
180        if consumed != 0 {
181            match val {
182                None => {
183                    return Err(FmtError::Invalid(
184                        "overflow error when parsing precision".to_string(),
185                    ))
186                }
187                Some(v) => {
188                    format.precision = v;
189                }
190            }
191        } else {
192            // Not having a precision after a dot is an error.
193            if consumed == 0 {
194                return Err(FmtError::Invalid(
195                    "Format specifier missing precision".to_string(),
196                ));
197            }
198        }
199        pos += consumed;
200    }
201
202    // Finally, parse the type field.
203    if end - pos > 1 {
204        // More than one char remain, invalid format specifier.
205        return Err(FmtError::Invalid("Invalid format specifier".to_string()));
206    }
207
208    if end - pos == 1 {
209        format.ty = rest[pos] as char;
210        if !is_type_element(format.ty) {
211            let mut msg = String::new();
212            write!(msg, "Invalid type specifier: {:?}", format.ty).unwrap();
213            return Err(FmtError::TypeError(msg));
214        }
215        // pos+=1;
216    }
217
218    // Do as much validating as we can, just by looking at the format
219    // specifier.  Do not take into account what type of formatting
220    // we're doing (int, float, string).
221    if format.thousands {
222        match format.ty {
223            'd' | 'e' | 'f' | 'g' | 'E' | 'G' | '%' | 'F' | '\0' => {} /* These are allowed. See PEP 378.*/
224
225            _ => {
226                let mut msg = String::new();
227                write!(msg, "Invalid comma type: {}", format.ty).unwrap();
228                return Err(FmtError::Invalid(msg));
229            }
230        }
231    }
232    Ok(format)
233}
234
235impl<'a, 'b> Formatter<'a, 'b> {
236    /// create Formatter from format string
237    pub fn from_str(s: &'a str, buff: &'b mut String) -> Result<Formatter<'a, 'b>> {
238        let mut found_colon = false;
239        let mut chars = s.chars();
240        let mut c = match chars.next() {
241            Some(':') | None => {
242                return Err(FmtError::Invalid("must specify identifier".to_string()))
243            }
244            Some(c) => c,
245        };
246        let mut consumed = 0;
247        // find the identifier
248        loop {
249            consumed += c.len_utf8();
250            if c == ':' {
251                found_colon = true;
252                break;
253            }
254            c = match chars.next() {
255                Some(c) => c,
256                None => {
257                    break;
258                }
259            };
260        }
261        let (identifier, rest) = s.split_at(consumed);
262        let identifier = if found_colon {
263            let (i, _) = identifier.split_at(identifier.len() - 1); // get rid of ':'
264            i
265        } else {
266            identifier
267        };
268
269        let format = parse_like_python(rest)?;
270
271        Ok(Formatter {
272            key: identifier,
273            fill: format.fill,
274            align: match format.align {
275                '\0' => Alignment::Unspecified,
276                '<' => Alignment::Left,
277                '^' => Alignment::Center,
278                '>' => Alignment::Right,
279                '=' => Alignment::Equal,
280                _ => unreachable!(),
281            },
282            sign: match format.sign {
283                '\0' => Sign::Unspecified,
284                '+' => Sign::Plus,
285                '-' => Sign::Minus,
286                ' ' => Sign::Space,
287                _ => unreachable!(),
288            },
289            alternate: format.alternate,
290            width: match format.width {
291                -1 => None,
292                _ => Some(format.width as usize),
293            },
294            thousands: format.thousands,
295            precision: match format.precision {
296                -1 => None,
297                _ => Some(format.precision as usize),
298            },
299            ty: match format.ty {
300                '\0' => None,
301                _ => Some(format.ty),
302            },
303            buff: buff,
304            pattern: s,
305        })
306    }
307
308    /// call this to re-write the original format string verbatum
309    /// back to the output
310    pub fn skip(mut self) -> Result<()> {
311        self.buff.push('{');
312        self.write_str(self.pattern).unwrap();
313        self.buff.push('}');
314        Ok(())
315    }
316
317    /// fill getter
318    pub fn fill(&self) -> char {
319        self.fill
320    }
321
322    /// align getter
323    pub fn align(&self) -> Alignment {
324        self.align.clone()
325    }
326
327    // provide default for unspecified alignment
328    pub fn set_default_align(&mut self, align: Alignment) {
329        if self.align == Alignment::Unspecified {
330            self.align = align
331        }
332    }
333
334    /// width getter
335    pub fn width(&self) -> Option<usize> {
336        self.width
337    }
338
339    /// thousands getter
340    pub fn thousands(&self) -> bool {
341        self.thousands
342    }
343
344    /// precision getter
345    pub fn precision(&self) -> Option<usize> {
346        self.precision
347    }
348
349    /// set precision to None, used for formatting int, float, etc
350    pub fn set_precision(&mut self, precision: Option<usize>) {
351        self.precision = precision;
352    }
353
354    /// sign getter
355    pub fn sign(&self) -> Sign {
356        self.sign.clone()
357    }
358
359    /// sign plus getter
360    /// here because it is in fmt::Formatter
361    pub fn sign_plus(&self) -> bool {
362        self.sign == Sign::Plus
363    }
364
365    /// sign minus getter
366    /// here because it is in fmt::Formatter
367    pub fn sign_minus(&self) -> bool {
368        self.sign == Sign::Minus
369    }
370
371    /// alternate getter
372    pub fn alternate(&self) -> bool {
373        self.alternate
374    }
375
376    // sign_aware_zero_pad // Not supported
377
378    /// type getter
379    pub fn ty(&self) -> Option<char> {
380        self.ty
381    }
382
383    /// UNSTABLE: in the future, this may return true if all validty
384    ///   checks for a float return true
385    /// return true if ty is valid for formatting integers
386    pub fn is_int_type(&self) -> bool {
387        match self.ty {
388            None => true,
389            Some(c) => match c {
390                'b' | 'o' | 'x' | 'X' => true,
391                _ => false,
392            },
393        }
394    }
395
396    /// UNSTABLE: in the future, this may return true if all validty
397    ///   checks for a float return true
398    /// return true if ty is valid for formatting floats
399    pub fn is_float_type(&self) -> bool {
400        match self.ty {
401            None => true,
402            Some(c) => match c {
403                'f' | 'e' | 'E' => true,
404                _ => false,
405            },
406        }
407    }
408}
409
410impl<'a, 'b> fmt::Write for Formatter<'a, 'b> {
411    fn write_str(&mut self, s: &str) -> fmt::Result {
412        self.buff.write_str(s)
413    }
414}