confget 5.0.1

Parse configuration files.
Documentation
/*
 * Copyright (c) 2022  Peter Pentchev <roam@ringlet.net>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
//! Parse INI-style files using a Nom-based parser.

use std::collections::HashMap;
use std::iter;

use anyhow::{anyhow, Context};
use itertools::Itertools;
use nom::{
    character::complete::{char, none_of, one_of},
    combinator::{all_consuming, opt},
    multi::{many0, many1},
    sequence::{pair, tuple},
    IResult,
};

use crate::defs::{ConfgetError, Config, SectionData};

use super::{Backend, DataRead};

/// A section name and the variables defined within it.
type SectionRaw = (String, SectionData);

/// Parse a sequence of spaces and tabs into a string.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_spc(input: &str) -> IResult<&str, String> {
    let (r_input, spc) = many0(one_of(" \t"))(input)?;
    Ok((r_input, spc.into_iter().collect::<String>()))
}

/// Parse a newline followed by a sequence of spaces and tabs into a string only
/// containing the latter sequence.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_nl(input: &str) -> IResult<&str, String> {
    let (r_input, (_, spc)) = pair(char('\n'), p_spc)(input)?;
    Ok((r_input, spc))
}

/// Parse a variable name into a string.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_var(input: &str) -> IResult<&str, String> {
    let (r_input, name) = many1(none_of(" \t[#;=\n"))(input)?;
    Ok((r_input, name.into_iter().collect::<String>()))
}

/// Parse the part of the value without the continuation backslash.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_backslashes(input: &str) -> IResult<&str, String> {
    let (r_input, (groups, last)) = pair(
        many0(tuple((many0(none_of("\\\n")), char('\\'), none_of("\\\n")))),
        many0(none_of("\\\n")),
    )(input)?;
    Ok((
        r_input,
        groups
            .into_iter()
            .map(|(chars, backslash, another)| {
                let mut res = chars.into_iter().collect::<String>();
                res.push(backslash);
                res.push(another);
                res
            })
            .chain(iter::once(last.into_iter().collect::<String>()))
            .join(""),
    ))
}

/// Parse a single line ending in a backslash along with the whitespace at the start of the next one.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_continued(input: &str) -> IResult<&str, String> {
    let (r_input, (value, _, ws)) = tuple((p_backslashes, char('\\'), p_nl))(input)?;
    Ok((r_input, format!("{}{}", value, ws)))
}

/// Parse a value out of a line that does not end in a backslash.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_not_continued(input: &str) -> IResult<&str, String> {
    let (r_input, (value, _)) = pair(p_backslashes, p_nl)(input)?;
    Ok((r_input, value.trim_end().to_owned()))
}

/// Parse a `var = value` line, pulling all the continued ones together.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_line_var(input: &str) -> IResult<&str, (String, String)> {
    let (r_input, (var, _, _, _, values, value)) = tuple((
        p_var,
        p_spc,
        char('='),
        p_spc,
        many0(p_continued),
        p_not_continued,
    ))(input)?;
    Ok((
        r_input,
        (var, values.into_iter().chain(iter::once(value)).join("")),
    ))
}

/// Parse a skipped line (either empty or containing a comment).
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_line_skip(input: &str) -> IResult<&str, ()> {
    let (r_input, _) = pair(opt(pair(one_of("#;"), many0(none_of("\n")))), p_nl)(input)?;
    Ok((r_input, ()))
}

/// Parse a sequence of zero or more lines to skip.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_skip_lines(input: &str) -> IResult<&str, ()> {
    let (r_input, _) = many0(p_line_skip)(input)?;
    Ok((r_input, ()))
}

/// Parse a `[section name]` line into a string containing the name.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section_start(input: &str) -> IResult<&str, String> {
    let (r_input, (_, _, name, _, _, _)) = tuple((
        char('['),
        p_spc,
        many1(none_of("]\n")),
        char(']'),
        p_spc,
        p_nl,
    ))(input)?;
    Ok((
        r_input,
        name.into_iter().collect::<String>().trim_end().to_owned(),
    ))
}

/// Parse a sequence of zero or more variables, skipping any empty or comment lines.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section_contents(input: &str) -> IResult<&str, Vec<(String, String)>> {
    let (r_input, pairs) = many0(pair(p_line_var, p_skip_lines))(input)?;
    Ok((
        r_input,
        pairs.into_iter().map(|(var_val, _)| var_val).collect(),
    ))
}

/// Parse a section: a name and zero or more variables.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section(input: &str) -> IResult<&str, SectionRaw> {
    let (r_input, (name, _, contents)) =
        tuple((p_section_start, p_skip_lines, p_section_contents))(input)?;
    Ok((r_input, (name, contents.into_iter().collect())))
}

/// Parse a whole file as a sequence of sections starting with an unnamed one.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_ini(input: &str) -> IResult<&str, (SectionData, Vec<SectionRaw>)> {
    let (r_input, (_, _, first_section, sections)) = all_consuming(tuple((
        p_spc,
        p_skip_lines,
        p_section_contents,
        many0(p_section),
    )))(input)?;
    Ok((r_input, (first_section.into_iter().collect(), sections)))
}

/// Parse INI-style files using a Nom-based parser.
#[derive(Debug)]
#[allow(clippy::module_name_repetitions)]
pub struct IniNomBackend<'cfg> {
    /// The configuration settings.
    ///
    /// Currently, only `filename`, `section_specified`, and `section` are used.
    config: &'cfg Config,

    /// The filename extracted from the configuration settings.
    filename: &'cfg str,
}

impl<'cfg> Backend<'cfg> for IniNomBackend<'cfg> {
    #[inline]
    fn from_config(config: &'cfg Config) -> Result<Self, ConfgetError> {
        let filename: &str = config
            .filename
            .as_ref()
            .ok_or_else(|| ConfgetError::Config("No filename supplied".to_owned()))?;
        Ok(Self { config, filename })
    }

    #[inline]
    fn read_file(&self) -> Result<DataRead, ConfgetError> {
        let contents = {
            let mut res = super::get_file_lines(self.filename, &self.config.encoding)?.join("\n");
            if !res.ends_with('\n') {
                res.push('\n');
            }
            res
        };
        let (r_input, (first_section, sections)) = p_ini(&contents)
            .map_err(|err| err.map_input(std::borrow::ToOwned::to_owned))
            .context("Parse error")
            .map_err(|err| ConfgetError::FileFormat(self.filename.to_owned(), err))?;
        if !r_input.is_empty() {
            return Err(ConfgetError::FileFormat(
                self.filename.to_owned(),
                anyhow!(format!(
                    "{} characters left over after parsing valid lines: {}",
                    r_input.len(),
                    r_input.escape_debug()
                )),
            ));
        }

        let first_name = if self.config.section_specified {
            self.config.section.clone()
        } else if !first_section.is_empty() {
            String::new()
        } else if let Some(&(ref name, _)) = sections.first() {
            name.clone()
        } else {
            String::new()
        };

        let mut res = HashMap::new();
        if !first_section.is_empty() {
            res.insert(String::new(), first_section);
        }
        for (name, mut values) in sections {
            let sect = res.entry(name).or_insert_with(HashMap::new);
            sect.extend(values.drain());
        }

        Ok((res, first_name))
    }
}