config_lib/parsers/
conf.rs

1//! # CONF Format Parser
2//!
3//! High-performance parser for standard .conf configuration files.
4//! 
5//! Supports the common configuration format used by many Unix/Linux applications:
6//! 
7//! ```conf
8//! # Comments start with #
9//! key = value
10//! quoted_value = "string with spaces"
11//! number = 42
12//! float = 3.14
13//! boolean = true
14//! 
15//! # Sections
16//! [section]
17//! nested_key = value
18//! 
19//! # Arrays (space or comma separated)
20//! array = item1 item2 item3
21//! comma_array = item1, item2, item3
22//! ```
23
24use crate::error::{Error, Result};
25use crate::value::Value;
26use std::collections::BTreeMap;
27
28/// Parse CONF format configuration
29pub fn parse(source: &str) -> Result<Value> {
30    let mut parser = ConfParser::new(source);
31    parser.parse()
32}
33
34/// High-performance CONF parser with zero-allocation lexing
35/// CONF parser state
36struct ConfParser<'a> {
37    input: &'a str,
38    position: usize,
39    line: usize,
40    column: usize,
41}
42
43impl<'a> ConfParser<'a> {
44    /// Create a new parser
45    fn new(input: &'a str) -> Self {
46        Self {
47            input,
48            position: 0,
49            line: 1,
50            column: 1,
51        }
52    }
53
54    /// Parse the entire configuration
55    fn parse(&mut self) -> Result<Value> {
56        let mut root = BTreeMap::new();
57        let mut current_section = None;
58
59        while !self.is_at_end() {
60            self.skip_whitespace_and_comments();
61            
62            if self.is_at_end() {
63                break;
64            }
65
66            // Check for section header
67            if self.peek() == Some('[') {
68                current_section = Some(self.parse_section_header()?);
69                continue;
70            }
71
72            // Parse key-value pair
73            let (key, value) = self.parse_key_value()?;
74            
75            match &current_section {
76                Some(section) => {
77                    // Add to section
78                    let section_table = root.entry(section.clone())
79                        .or_insert_with(|| Value::table(BTreeMap::new()));
80                    
81                    if let Value::Table(table) = section_table {
82                        table.insert(key, value);
83                    }
84                }
85                None => {
86                    // Add to root
87                    root.insert(key, value);
88                }
89            }
90        }
91
92        Ok(Value::table(root))
93    }
94
95    /// Parse a section header like [section_name]
96    fn parse_section_header(&mut self) -> Result<String> {
97        self.expect('[')?;
98        let start = self.position;
99        
100        // Find the closing bracket
101        while let Some(ch) = self.peek() {
102            if ch == ']' {
103                break;
104            }
105            if ch == '\n' {
106                return Err(Error::parse(
107                    "Unterminated section header",
108                    self.line,
109                    self.column,
110                ));
111            }
112            self.advance();
113        }
114        
115        let section_name = self.input[start..self.position].trim().to_string();
116        self.expect(']')?;
117        
118        Ok(section_name)
119    }
120
121    /// Parse a key-value pair
122    fn parse_key_value(&mut self) -> Result<(String, Value)> {
123        let key = self.parse_key()?;
124        self.skip_whitespace();
125        self.expect('=')?;
126        self.skip_whitespace();
127        let value = self.parse_value()?;
128        
129        Ok((key, value))
130    }
131
132    /// Parse a configuration key
133    fn parse_key(&mut self) -> Result<String> {
134        let start = self.position;
135        
136        while let Some(ch) = self.peek() {
137            if ch.is_alphanumeric() || ch == '_' || ch == '-' || ch == '.' {
138                self.advance();
139            } else {
140                break;
141            }
142        }
143        
144        if start == self.position {
145            return Err(Error::parse(
146                "Expected key name",
147                self.line,
148                self.column,
149            ));
150        }
151        
152        Ok(self.input[start..self.position].to_string())
153    }
154
155    /// Parse a configuration value
156    fn parse_value(&mut self) -> Result<Value> {
157        self.skip_whitespace();
158        
159        match self.peek() {
160            Some('"') => self.parse_quoted_string(),
161            Some('\'') => self.parse_single_quoted_string(),
162            Some('[') => self.parse_array(),
163            _ => {
164                // For all other cases (including numbers), use unquoted value parsing
165                // which handles space-separated arrays
166                self.parse_unquoted_value()
167            }
168        }
169    }
170
171    /// Parse a quoted string
172    fn parse_quoted_string(&mut self) -> Result<Value> {
173        self.expect('"')?;
174        let _start = self.position;
175        let mut result = String::new();
176        
177        while let Some(ch) = self.peek() {
178            if ch == '"' {
179                break;
180            }
181            if ch == '\\' {
182                self.advance();
183                match self.peek() {
184                    Some('n') => result.push('\n'),
185                    Some('t') => result.push('\t'),
186                    Some('r') => result.push('\r'),
187                    Some('\\') => result.push('\\'),
188                    Some('"') => result.push('"'),
189                    Some(other) => {
190                        result.push('\\');
191                        result.push(other);
192                    }
193                    None => return Err(Error::parse(
194                        "Unterminated escape sequence",
195                        self.line,
196                        self.column,
197                    )),
198                }
199                self.advance();
200            } else {
201                result.push(ch);
202                self.advance();
203            }
204        }
205        
206        self.expect('"')?;
207        Ok(Value::string(result))
208    }
209
210    /// Parse a single-quoted string (no escape sequences)
211    fn parse_single_quoted_string(&mut self) -> Result<Value> {
212        self.expect('\'')?;
213        let start = self.position;
214        
215        while let Some(ch) = self.peek() {
216            if ch == '\'' {
217                break;
218            }
219            self.advance();
220        }
221        
222        let content = self.input[start..self.position].to_string();
223        self.expect('\'')?;
224        Ok(Value::string(content))
225    }
226
227    /// Parse an array [item1, item2, item3]
228    fn parse_array(&mut self) -> Result<Value> {
229        self.expect('[')?;
230        let mut items = Vec::new();
231        
232        self.skip_whitespace();
233        
234        if self.peek() == Some(']') {
235            self.advance();
236            return Ok(Value::array(items));
237        }
238        
239        loop {
240            items.push(self.parse_value()?);
241            self.skip_whitespace();
242            
243            match self.peek() {
244                Some(',') => {
245                    self.advance();
246                    self.skip_whitespace();
247                }
248                Some(']') => {
249                    self.advance();
250                    break;
251                }
252                _ => return Err(Error::parse(
253                    "Expected ',' or ']' in array",
254                    self.line,
255                    self.column,
256                )),
257            }
258        }
259        
260        Ok(Value::array(items))
261    }
262
263    /// Parse a number (integer or float)
264    #[allow(dead_code)]
265    fn parse_number(&mut self) -> Result<Value> {
266        let start = self.position;
267        let mut has_dot = false;
268        
269        // Handle sign
270        if self.peek() == Some('-') || self.peek() == Some('+') {
271            self.advance();
272        }
273        
274        // Parse digits and optional decimal point
275        while let Some(ch) = self.peek() {
276            if ch.is_ascii_digit() {
277                self.advance();
278            } else if ch == '.' && !has_dot {
279                has_dot = true;
280                self.advance();
281            } else {
282                break;
283            }
284        }
285        
286        let number_str = &self.input[start..self.position];
287        
288        if has_dot {
289            number_str.parse::<f64>()
290                .map(Value::float)
291                .map_err(|_| Error::parse(
292                    format!("Invalid float: {}", number_str),
293                    self.line,
294                    self.column,
295                ))
296        } else {
297            number_str.parse::<i64>()
298                .map(Value::integer)
299                .map_err(|_| Error::parse(
300                    format!("Invalid integer: {}", number_str),
301                    self.line,
302                    self.column,
303                ))
304        }
305    }
306
307    /// Parse an unquoted value (string, boolean, or array)
308    fn parse_unquoted_value(&mut self) -> Result<Value> {
309        let start = self.position;
310        
311        // Read until end of line, comment, or special character
312        while let Some(ch) = self.peek() {
313            if ch == '\n' || ch == '\r' || ch == '#' {
314                break;
315            }
316            self.advance();
317        }
318        
319        let raw_value = self.input[start..self.position].trim();
320        
321        if raw_value.is_empty() {
322            return Ok(Value::null());
323        }
324        
325        // Try to parse as boolean
326        match raw_value.to_lowercase().as_str() {
327            "true" | "yes" | "on" => return Ok(Value::bool(true)),
328            "false" | "no" | "off" => return Ok(Value::bool(false)),
329            "null" | "nil" | "" => return Ok(Value::null()),
330            _ => {}
331        }
332        
333        // Check if it's a space or comma separated array
334        if raw_value.contains(' ') || raw_value.contains(',') {
335            let items: Vec<Value> = raw_value
336                .split(|c| c == ' ' || c == ',')
337                .map(|s| s.trim())
338                .filter(|s| !s.is_empty())
339                .map(|s| self.parse_simple_value(s))
340                .collect::<Result<Vec<_>>>()?;
341            
342            if items.len() > 1 {
343                return Ok(Value::array(items));
344            }
345        }
346        
347        // Parse as simple value
348        self.parse_simple_value(raw_value)
349    }
350
351    /// Parse a simple value (no arrays or complex types)
352    fn parse_simple_value(&self, value: &str) -> Result<Value> {
353        // Try integer
354        if let Ok(i) = value.parse::<i64>() {
355            return Ok(Value::integer(i));
356        }
357        
358        // Try float
359        if let Ok(f) = value.parse::<f64>() {
360            return Ok(Value::float(f));
361        }
362        
363        // Default to string
364        Ok(Value::string(value.to_string()))
365    }
366
367    /// Skip whitespace but not newlines
368    fn skip_whitespace(&mut self) {
369        while let Some(ch) = self.peek() {
370            if ch == ' ' || ch == '\t' {
371                self.advance();
372            } else {
373                break;
374            }
375        }
376    }
377
378    /// Skip whitespace and comments
379    fn skip_whitespace_and_comments(&mut self) {
380        loop {
381            self.skip_whitespace();
382            
383            // Skip comments
384            if self.peek() == Some('#') {
385                while let Some(ch) = self.peek() {
386                    self.advance();
387                    if ch == '\n' {
388                        break;
389                    }
390                }
391                continue;
392            }
393            
394            // Skip newlines
395            if self.peek() == Some('\n') || self.peek() == Some('\r') {
396                self.advance();
397                continue;
398            }
399            
400            break;
401        }
402    }
403
404    /// Peek at the current character
405    fn peek(&self) -> Option<char> {
406        self.input.chars().nth(self.position)
407    }
408
409    /// Advance to the next character
410    fn advance(&mut self) -> Option<char> {
411        if let Some(ch) = self.peek() {
412            self.position += 1;
413            if ch == '\n' {
414                self.line += 1;
415                self.column = 1;
416            } else {
417                self.column += 1;
418            }
419            Some(ch)
420        } else {
421            None
422        }
423    }
424
425    /// Expect a specific character
426    fn expect(&mut self, expected: char) -> Result<()> {
427        match self.advance() {
428            Some(ch) if ch == expected => Ok(()),
429            Some(ch) => Err(Error::parse(
430                format!("Expected '{}', found '{}'", expected, ch),
431                self.line,
432                self.column,
433            )),
434            None => Err(Error::parse(
435                format!("Expected '{}', found end of input", expected),
436                self.line,
437                self.column,
438            )),
439        }
440    }
441
442    /// Check if we're at the end of input
443    fn is_at_end(&self) -> bool {
444        self.position >= self.input.len()
445    }
446}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    #[test]
453    fn test_simple_key_value() {
454        let config = parse("key = value").unwrap();
455        assert_eq!(config.get("key").unwrap().as_string().unwrap(), "value");
456    }
457
458    #[test]
459    fn test_numbers() {
460        let config = parse("int = 42\nfloat = 3.14").unwrap();
461        assert_eq!(config.get("int").unwrap().as_integer().unwrap(), 42);
462        assert_eq!(config.get("float").unwrap().as_float().unwrap(), 3.14);
463    }
464
465    #[test]
466    fn test_booleans() {
467        let config = parse("bool1 = true\nbool2 = false").unwrap();
468        assert_eq!(config.get("bool1").unwrap().as_bool().unwrap(), true);
469        assert_eq!(config.get("bool2").unwrap().as_bool().unwrap(), false);
470    }
471
472    #[test]
473    fn test_quoted_strings() {
474        let config = parse(r#"quoted = "hello world""#).unwrap();
475        assert_eq!(config.get("quoted").unwrap().as_string().unwrap(), "hello world");
476    }
477
478    #[test]
479    fn test_sections() {
480        let config = parse("[section]\nkey = value").unwrap();
481        assert_eq!(
482            config.get("section.key").unwrap().as_string().unwrap(),
483            "value"
484        );
485    }
486
487    #[test]
488    fn test_arrays() {
489        let config = parse("arr = item1 item2 item3").unwrap();
490        let arr = config.get("arr").unwrap().as_array().unwrap();
491        assert_eq!(arr.len(), 3);
492        assert_eq!(arr[0].as_string().unwrap(), "item1");
493    }
494
495    #[test]
496    fn test_comments() {
497        let config = parse("# This is a comment\nkey = value # inline comment").unwrap();
498        assert_eq!(config.get("key").unwrap().as_string().unwrap(), "value");
499    }
500}