1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
use std::str;
use std::fmt;
use std::fmt::Write;
use std::iter::Iterator;
use std::string::String;

use types::*;

#[derive(Debug, PartialEq)]
pub struct Formatter<'a, 'b> {
    pub key: &'a str,
    fill: char,
    align: Alignment, // default Right
    sign: Sign,
    alternate: bool,
    width: Option<usize>,
    thousands: bool,
    precision: Option<usize>,
    ty: Option<char>,
    buff: &'b mut String,
    pattern: &'a str,
}

fn is_alignment_token(c: char) -> bool {
    match c {
        '=' | '<' | '^' | '>' => true,
        _ => false,
    }
}

fn is_sign_element(c: char) -> bool {
    match c {
        ' ' | '-' | '+' => true,
        _ => false,
    }
}

fn is_type_element(c: char) -> bool {
    match c {
        'b' |
        'o' |
        'x' |
        'X' |
        'e' |
        'E' |
        'f' |
        'F' |
        '%' |
        's' |
        '?' => true,
        _ => false,
    }
}

// get an integer from pos, returning the number of bytes
// consumed and the integer
fn get_integer(s: &[u8], pos: usize) -> (usize, Option<i64>) {
    let (_, rest) = s.split_at(pos);
    let mut consumed: usize = 0;
    for b in rest {
        match *b as char {
            '0'...'9' => {}
            _ => break,
        };
        consumed += 1;
    }
    if consumed == 0 {
        (0, None)
    } else {
        let (intstr, _) = rest.split_at(consumed);
        let val = unsafe {
            // I think I can be reasonably sure that 0-9 chars are utf8 :)
            match str::from_utf8_unchecked(intstr).parse::<i64>() {
                Ok(v) => Some(v),
                Err(_) => None,
            }
        };
        (consumed, val)
    }
}


#[derive(Debug)]
/// The format struct as it is defined in the python source
struct FmtPy {
    pub fill: char,
    pub align: char,
    pub alternate: bool,
    pub sign: char,
    pub width: i64,
    pub thousands: bool,
    pub precision: i64,
    pub ty: char,
}

fn parse_like_python(rest: &str) -> Result<FmtPy> {
    // The rest of this was pretty much strait up copied from python's format parser
    // All credit goes to python source file: formatter_unicode.c
    //

    let mut format = FmtPy {
        fill: ' ',
        align: '>',
        alternate: false,
        sign: '\0',
        width: -1,
        thousands: false,
        precision: -1,
        ty: '\0',
    };
    let mut chars = rest.chars();
    let fake_fill = match chars.next() {
        Some(c) => c,
        None => return Ok(format),
    };
    // from now on all format characters MUST be valid
    // ASCII characters (fill and identifier were the
    // only ones that weren't.
    // Therefore we can use bytes for the rest
    let rest = rest.as_bytes();
    let mut align_specified = false;
    let mut fill_specified = false;

    let end: usize = rest.len();
    let mut pos: usize = 0;

    // If the second char is an alignment token,
    // then fake_fill as fill
    if end - pos >= 1 + fake_fill.len_utf8() &&
       is_alignment_token(rest[pos + fake_fill.len_utf8()] as char) {
        format.align = rest[pos + fake_fill.len_utf8()] as char;
        format.fill = fake_fill;
        fill_specified = true;
        align_specified = true;
        pos += 1 + fake_fill.len_utf8();
    } else if end - pos >= 1 && is_alignment_token(fake_fill) {
        format.align = fake_fill;
        pos += fake_fill.len_utf8();
    }

    // Parse the various sign options
    if end - pos >= 1 && is_sign_element(rest[pos] as char) {
        format.sign = rest[pos] as char;
        pos += 1;
    }

    // If the next character is #, we're in alternate mode.  This only
    // applies to integers.
    if end - pos >= 1 && rest[pos] as char == '#' {
        format.alternate = true;
        pos += 1;
    }

    // The special case for 0-padding (backwards compat)
    if !fill_specified && end - pos >= 1 && rest[pos] == '0' as u8 {
        format.fill = '0';
        if !align_specified {
            format.align = '=';
        }
        pos += 1;
    }

    // check to make sure that val is good
    let (consumed, val) = get_integer(rest, pos);
    pos += consumed;
    if consumed != 0 {
        match val {
            None => return Err(FmtError::Invalid("overflow error when parsing width".to_string())),
            Some(v) => {
                format.width = v;
            }
        }
    }

    // Comma signifies add thousands separators
    if end - pos > 0 && rest[pos] as char == ',' {
        format.thousands = true;
        pos += 1;
    }

    // Parse field precision
    if end - pos > 0 && rest[pos] as char == '.' {
        pos += 1;

        let (consumed, val) = get_integer(rest, pos);
        if consumed != 0 {
            match val {
                None => {
                    return Err(FmtError::Invalid("overflow error when parsing precision"
                                                     .to_string()))
                }
                Some(v) => {
                    format.precision = v;
                }
            }
        } else {
            // Not having a precision after a dot is an error.
            if consumed == 0 {
                return Err(FmtError::Invalid("Format specifier missing precision".to_string()));
            }
        }
        pos += consumed;

    }

    // Finally, parse the type field.
    if end - pos > 1 {
        // More than one char remain, invalid format specifier.
        return Err(FmtError::Invalid("Invalid format specifier".to_string()));
    }

    if end - pos == 1 {
        format.ty = rest[pos] as char;
        if !is_type_element(format.ty) {
            let mut msg = String::new();
            write!(msg, "Invalid type specifier: {:?}", format.ty).unwrap();
            return Err(FmtError::TypeError(msg));
        }
        // pos+=1;
    }

    // Do as much validating as we can, just by looking at the format
    // specifier.  Do not take into account what type of formatting
    // we're doing (int, float, string).
    if format.thousands {
        match format.ty {
            'd' |
            'e' |
            'f' |
            'g' |
            'E' |
            'G' |
            '%' |
            'F' |
            '\0' => {} /* These are allowed. See PEP 378.*/

            _ => {
                let mut msg = String::new();
                write!(msg, "Invalid comma type: {}", format.ty).unwrap();
                return Err(FmtError::Invalid(msg));
            }
        }
    }
    Ok(format)
}

impl<'a, 'b> Formatter<'a, 'b> {
    /// create Formatter from format string
    pub fn from_str(s: &'a str, buff: &'b mut String) -> Result<Formatter<'a, 'b>> {
        let mut found_colon = false;
        let mut chars = s.chars();
        let mut c = match chars.next() {
            Some(':') | None => {
                return Err(FmtError::Invalid("must specify identifier".to_string()))
            }
            Some(c) => c,
        };
        let mut consumed = 0;
        // find the identifier
        loop {
            consumed += c.len_utf8();
            if c == ':' {
                found_colon = true;
                break;
            }
            c = match chars.next() {
                Some(c) => c,
                None => {
                    break;
                }
            };
        }
        let (identifier, rest) = s.split_at(consumed);
        let identifier = if found_colon {
            let (i, _) = identifier.split_at(identifier.len() - 1); // get rid of ':'
            i
        } else {
            identifier
        };

        let format = try!(parse_like_python(rest));

        Ok(Formatter {
            key: identifier,
            fill: format.fill,
            align: match format.align {
                '<' => Alignment::Left,
                '^' => Alignment::Center,
                '>' => Alignment::Right,
                '=' => Alignment::Equal,
                _ => unreachable!(),
            },
            sign: match format.sign {
                '\0' => Sign::Unspecified,
                '+' => Sign::Plus,
                '-' => Sign::Minus,
                ' ' => Sign::Space,
                _ => unreachable!(),
            },
            alternate: format.alternate,
            width: match format.width {
                -1 => None,
                _ => Some(format.width as usize),
            },
            thousands: format.thousands,
            precision: match format.precision {
                -1 => None,
                _ => Some(format.precision as usize),
            },
            ty: match format.ty {
                '\0' => None,
                _ => Some(format.ty),
            },
            buff: buff,
            pattern: s,
        })
    }

    /// call this to re-write the original format string verbatum
    /// back to the output
    pub fn skip(mut self) -> Result<()> {
        self.buff.push('{');
        self.write_str(self.pattern).unwrap();
        self.buff.push('}');
        Ok(())
    }


    /// fill getter
    pub fn fill(&self) -> char {
        self.fill
    }

    /// align getter
    pub fn align(&self) -> Alignment {
        self.align.clone()
    }

    /// width getter
    pub fn width(&self) -> Option<usize> {
        self.width
    }

    /// thousands getter
    pub fn thousands(&self) -> bool {
        self.thousands
    }

    /// precision getter
    pub fn precision(&self) -> Option<usize> {
        self.precision
    }

    /// set precision to None, used for formatting int, float, etc
    pub fn set_precision(&mut self, precision: Option<usize>) {
        self.precision = precision;
    }

    /// sign getter
    pub fn sign(&self) -> Sign {
        self.sign.clone()
    }

    /// sign plus getter
    /// here because it is in fmt::Formatter
    pub fn sign_plus(&self) -> bool {
        self.sign == Sign::Plus
    }

    /// sign minus getter
    /// here because it is in fmt::Formatter
    pub fn sign_minus(&self) -> bool {
        self.sign == Sign::Minus
    }

    /// alternate getter
    pub fn alternate(&self) -> bool {
        self.alternate
    }

    // sign_aware_zero_pad // Not supported

    /// type getter
    pub fn ty(&self) -> Option<char> {
        self.ty
    }

    /// UNSTABLE: in the future, this may return true if all validty
    ///   checks for a float return true
    /// return true if ty is valid for formatting integers
    pub fn is_int_type(&self) -> bool {
        match self.ty {
            None => true,
            Some(c) => match c {
                'b' | 'o' | 'x' | 'X' => true,
                _ => false,
            }
        }
    }

    /// UNSTABLE: in the future, this may return true if all validty
    ///   checks for a float return true
    /// return true if ty is valid for formatting floats
    pub fn is_float_type(&self) -> bool {
        match self.ty {
            None => true,
            Some(c) => match c {
                'f' | 'e' | 'E' => true,
                _ => false,
            }
        }
    }
}


impl<'a, 'b> fmt::Write for Formatter<'a, 'b> {
    fn write_str(&mut self, s: &str) -> fmt::Result {
        self.buff.write_str(s)
    }
}