1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
//! Provides tools to parse an INI file

use std::iter::Fuse;
use crate::errors::{Error, error_kinds::*};

/// Reads a string formatted by [`dump_str`](../dump/fn.dump_str.html "dump::dump_str") and unescapes the escaped characters
/// 
/// # Return value
/// `Ok(string)` with `string` as the result once parsed
/// 
/// `Err(err)` In case of error with `err` as the error code
/// 
/// # Encoding issues
/// Only allows ASCII because Unicode or other encodings musn't appear in an INI file (except in comments but this function is not intended to parse whole files)
/// 
/// # Examples
/// ```
/// use mininip::parse::parse_str;
/// 
/// assert!(parse_str("Bad because ends with a ;").is_err());
/// assert_eq!(parse_str(r"abc\=123\; \x00263a").unwrap(), "abc=123; \u{263a}");
/// ```
pub fn parse_str(content: &str) -> Result<String, Error> {
    // new will never be wider than content
    let mut new = String::with_capacity(content.len());

    static FORBIDDEN: [char; 13] = ['\x07', '\x08', '\t', '\r', '\n', '\0', '\\', '\'', '\"', ';', ':', '=', '#'];

    // `next` is the index (as bytes) of the next escape sequence in content
    let mut next = 0;
    for i in TokenIterator::from(content.chars()) {
        let escape = match i {
            Token::Char(c) => {
                let n = next;
                next += 1;

                if FORBIDDEN.contains(&c) || !c.is_ascii() {
                    let escape = crate::dump::dump_str(&format!("{}", c));
                    let err = Error::from(ExpectedEscape::new(String::from(content), n, escape));
                    return Err(err);
                }

                new.push(c);
                continue;
            },
            Token::Escape(s) => s,
        };

        next += escape.len();

        match escape.as_str() {
            "\\a"  => new.push('\x07'),
            "\\b"  => new.push('\x08'),
            "\\t"  => new.push('\t'),
            "\\r"  => new.push('\r'),
            "\\n"  => new.push('\n'),
            "\\0"  => new.push('\0'),
            r"\\"  => new.push('\\'),
            "\\'"  => new.push('\''),
            "\\\"" => new.push('\"'),
            "\\;"  => new.push(';'),
            "\\:"  => new.push(':'),
            "\\="  => new.push('='),
            "\\#"  => new.push('#'),

            _ if escape.len() == 8 => {
                debug_assert!(escape.starts_with("\\x"));

                let values = &escape[2..];
                let code = match u32::from_str_radix(values, 16) {
                    Ok(val) => val,
                    Err(_)  => return Err(Error::from(InvalidEscape::new(String::from(content), escape))),
                };
                let character = match std::char::from_u32(code) {
                    Some(val) => val,
                    None      => return Err(Error::from(InvalidEscape::new(String::from(content), escape))),
                };
                new.push(character);
            },

            _ => return Err(Error::from(InvalidEscape::new(String::from(content), escape))),
        }
    }

    Ok(new)
}


/// A token which is either a single character or an escape sequence starting with `\`
#[derive(PartialEq, Debug)]
enum Token {
    Char(char),
    Escape(String),
}

/// An iterator over the characters of an INI file
/// 
/// Yields `Token`s which can be either a character or an escape sequence
/// 
/// # Safety
/// These characters are NOT TRUSTED, for example, you may receive a `\é` sequence wich is illegal in INI
/// 
/// If an escape sequence is left unfinished, it is returned as is in a `Token::Escape` object, even though it is invalid
struct TokenIterator<T> {
    iterator: Fuse<T>,
}

impl<T: Iterator> From<T> for TokenIterator<T> {
    fn from(iterator: T) -> TokenIterator<T> {
        TokenIterator {
            iterator: iterator.fuse(),
        }
    }
}

impl<T: Iterator<Item = char>> Iterator for TokenIterator<T> {
    type Item = Token;

    fn next(&mut self) -> Option<Token> {
        let mut escape_seq = String::with_capacity(8);

        loop {
            let i = match self.iterator.next() {
                Some(val) => val,

                // When the iterator returns `None`, we return the escape sequence if unfinished or `None` if the text was not escaped
                None if escape_seq.is_empty() => return None,
                None                          => return Some(Token::Escape(escape_seq)),
            };

            if !escape_seq.is_empty() {
                escape_seq.push(i);
            } else if i == '\\' {
                escape_seq.push(i);
                continue;
            } else {
                return Some(Token::Char(i));
            }

            if escape_seq.starts_with(r"\x") && escape_seq.len() < 8 {
                continue;
            }

            return Some(Token::Escape(escape_seq));
        }
    }
}


/// Finds the first non-escaped occurence of `pattern` in `string`
/// . Currently only accepts `char`s
/// 
/// # Return value
/// `Some(index)` with `index` as the index of the first occurence of `pattern`
/// 
/// `None` if `pattern` could not be found as a non-escaped form
pub fn find_unescaped(string: &str, pattern: char) -> Option<usize> {
    // possible values of `escape`
    // -1   : the last character parsed is a '\\'
    // 0    : this character must be read because it's unescaped
    // 1..6 : this character must be ignored because it belongs to an escape sequence
    let mut escape = 0;
    for (n, i) in string.char_indices() {
        if escape == -1 {
            escape = if i == 'x' {
                6
            } else {
                0
            };
        }

        else if escape > 0 {
            escape -= 1;
        }

        // Since here, escape = 0 so the character must be parsed
        else if i == '\\' {
            escape = -1;
        } else if i == pattern {
            return Some(n);
        }
    }

    None
}


mod parser;
pub use parser::*;


#[cfg(test)]
mod tests;