confget 5.0.1

Parse configuration files.
Documentation
/*
 * Copyright (c) 2021, 2022  Peter Pentchev <roam@ringlet.net>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
//! Parse INI-style configuration files.

use anyhow::anyhow;
use regex::{Captures, Regex};

use crate::backend::{Backend, DataRead};
use crate::defs::{ConfgetError, Config, FileData, SectionData};

/// A backend type for parsing INI-style configuration files.
#[derive(Debug)]
#[non_exhaustive]
#[allow(clippy::module_name_repetitions)]
pub struct IniREBackend<'cfg> {
    /// Configuration settings, e.g. filename and section.
    pub config: &'cfg Config,

    /// The filename specified in the configuration.
    filename: &'cfg str,
}

/// The current state of the INI-style file parser.
#[derive(Debug)]
struct State {
    /// The regular expression to use to detect comments.
    re_comment: Regex,
    /// The regular expression to use to detect a section header.
    re_section: Regex,
    /// The regular expression to use to detect a variable definition.
    re_variable: Regex,
    /// The name of the input file to read.
    filename: String,
    /// The name of the first section in the file, if there was one at all.
    first_section: Option<String>,
    /// The name of the current section.
    section: String,
    /// If this is a continuation line, the name and value of the current variable.
    cont: Option<(String, String)>,
    /// Have we found any variables or sections at all already?
    /// Used when determining whether the first section should be the default one
    /// or there were any variables defined before that.
    found: bool,
}

impl State {
    /// Process the next line of input, return the updated parser state.
    fn feed_line(self, line: &str, res: &mut FileData) -> Result<Self, ConfgetError> {
        if let Some((name, value)) = self.cont {
            if let Some(stripped) = line.strip_suffix('\\') {
                Ok(Self {
                    cont: Some((name, format!("{}{}", value, stripped))),
                    ..self
                })
            } else {
                res.get_mut(&self.section)
                    .ok_or_else(|| {
                        ConfgetError::Internal(format!(
                            "Internal error: no data for section {}",
                            self.section
                        ))
                    })?
                    .insert(name, format!("{}{}", value, line.trim_end()));
                Ok(Self { cont: None, ..self })
            }
        } else if self.re_comment.is_match(line) {
            Ok(self)
        } else {
            /// Extract a regex capture group that we know must be there.
            fn extr<'data>(
                caps: &'data Captures<'_>,
                name: &str,
            ) -> Result<&'data str, ConfgetError> {
                Ok(caps
                    .name(name)
                    .ok_or_else(|| {
                        ConfgetError::Internal(format!(
                            "Internal error: no '{}' in {:?}",
                            name, caps
                        ))
                    })?
                    .as_str())
            }

            if let Some(caps) = self.re_section.captures(line) {
                let name = extr(&caps, "name")?;
                res.entry(name.to_owned()).or_insert_with(SectionData::new);
                Ok(Self {
                    first_section: if self.first_section.is_none() && !self.found {
                        Some(name.to_owned())
                    } else {
                        self.first_section
                    },
                    section: name.to_owned(),
                    found: true,
                    ..self
                })
            } else {
                let caps = if let Some(caps) = self.re_variable.captures(line) {
                    caps
                } else {
                    return Err(ConfgetError::FileFormat(
                        self.filename,
                        anyhow!(format!("Unexpected line: '{}'", line.escape_debug())),
                    ));
                };
                let name = extr(&caps, "name")?;
                let value = extr(&caps, "value")?;
                let cont = caps.name("cont").is_some();
                if cont {
                    Ok(Self {
                        cont: Some((name.to_owned(), format!("{}{}", value, extr(&caps, "ws")?))),
                        found: true,
                        ..self
                    })
                } else {
                    res.get_mut(&self.section)
                        .ok_or_else(|| {
                            ConfgetError::Internal(format!(
                                "Internal error: no data for section {}",
                                self.section
                            ))
                        })?
                        .insert(name.to_owned(), value.to_owned());
                    Ok(Self {
                        found: true,
                        ..self
                    })
                }
            }
        }
    }
}

/// The regular expression to use for matching comment lines.
static RE_COMMENT: &str = r"(?x) ^ \s* (?: [\#;] .* )?  $ ";

/// The regular expression to use for matching section headers.
static RE_SECTION: &str = r"(?x)
    ^ \s*
    \[ \s*
    (?P<name> [^\]]+? )
    \s* \]
    \s* $ ";

/// The regular expression to use for matching var=value lines.
static RE_VARIABLE: &str = r"(?x)
    ^ \s*
    (?P<name> [^\s=]+ )
    \s* = \s*
    (?P<value> .*? )
    (?P<ws> \s* )
    (?P<cont> [\\] )?
    $ ";

impl<'cfg> Backend<'cfg> for IniREBackend<'cfg> {
    /// Initialize an INI-style backend object.
    ///
    /// # Errors
    ///
    /// Returns [`ConfgetError`] if no filename is specified in the config.
    #[inline]
    fn from_config(config: &'cfg Config) -> Result<Self, ConfgetError> {
        let filename: &str = config
            .filename
            .as_ref()
            .ok_or_else(|| ConfgetError::Config("No filename supplied".to_owned()))?;
        Ok(Self { config, filename })
    }

    /// Parse an INI-style file consisting of zero or more sections.
    ///
    /// # Errors
    ///
    /// Returns a [`ConfgetError`] error on
    /// configuration errors or if the file's contents does not
    /// follow the expected format.
    /// Propagates errors returned by filesystem operations.
    #[inline]
    fn read_file(&self) -> Result<DataRead, ConfgetError> {
        let mut res = FileData::new();
        res.insert(String::new(), SectionData::new());

        let init_state = State {
            re_comment: Regex::new(RE_COMMENT).map_err(|err| {
                ConfgetError::Internal(format!(
                    "Could not compile the '{}' regular expression: {}",
                    RE_COMMENT, err
                ))
            })?,
            re_section: Regex::new(RE_SECTION).map_err(|err| {
                ConfgetError::Internal(format!(
                    "Could not compile the '{}' regular expression: {}",
                    RE_SECTION, err
                ))
            })?,
            re_variable: Regex::new(RE_VARIABLE).map_err(|err| {
                ConfgetError::Internal(format!(
                    "Could not compile the '{}' regular expression: {}",
                    RE_VARIABLE, err
                ))
            })?,
            filename: self.filename.to_owned(),
            first_section: self
                .config
                .section_specified
                .then(|| self.config.section.clone()),
            section: String::new(),
            cont: None,
            found: false,
        };

        let final_state = super::get_file_lines(self.filename, &self.config.encoding)?
            .iter()
            .try_fold(init_state, |state, line| state.feed_line(line, &mut res))?;
        if final_state.cont.is_some() {
            return Err(ConfgetError::FileFormat(
                self.filename.to_owned(),
                anyhow!("Line continuation on the last line"),
            ));
        }
        Ok((
            res,
            final_state
                .first_section
                .unwrap_or_else(|| self.config.section.clone()),
        ))
    }
}