confget 5.1.2

Parse configuration files.
Documentation
/*
 * SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
 * SPDX-License-Identifier: BSD-2-Clause
 */
//! Parse INI-style files using a Nom-based parser.

use std::collections::HashMap;
use std::iter;

use anyhow::{anyhow, Context};
use itertools::Itertools;
use nom::{
    character::complete::{char, none_of, one_of},
    combinator::{all_consuming, opt},
    multi::{many0, many1},
    sequence::{pair, tuple},
    IResult,
};

use crate::defs::{ConfgetError, Config, SectionData};

use super::{Backend, DataRead};

/// A section name and the variables defined within it.
type SectionRaw = (String, SectionData);

/// Parse a sequence of spaces and tabs into a string.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_spc(input: &str) -> IResult<&str, String> {
    let (r_input, spc) = many0(one_of(" \t"))(input)?;
    Ok((r_input, spc.into_iter().collect::<String>()))
}

/// Parse a newline followed by a sequence of spaces and tabs into a string only
/// containing the latter sequence.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_nl(input: &str) -> IResult<&str, String> {
    let (r_input, (_, spc)) = pair(char('\n'), p_spc)(input)?;
    Ok((r_input, spc))
}

/// Parse a variable name into a string.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_var(input: &str) -> IResult<&str, String> {
    let (r_input, name) = many1(none_of(" \t[#;=\n"))(input)?;
    Ok((r_input, name.into_iter().collect::<String>()))
}

/// Parse the part of the value without the continuation backslash.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_backslashes(input: &str) -> IResult<&str, String> {
    let (r_input, (groups, last)) = pair(
        many0(tuple((many0(none_of("\\\n")), char('\\'), none_of("\\\n")))),
        many0(none_of("\\\n")),
    )(input)?;
    Ok((
        r_input,
        groups
            .into_iter()
            .map(|(chars, backslash, another)| {
                let mut res = chars.into_iter().collect::<String>();
                res.push(backslash);
                res.push(another);
                res
            })
            .chain(iter::once(last.into_iter().collect::<String>()))
            .join(""),
    ))
}

/// Parse a single line ending in a backslash along with the whitespace at the start of the next one.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_continued(input: &str) -> IResult<&str, String> {
    let (r_input, (value, _, ws)) = tuple((p_backslashes, char('\\'), p_nl))(input)?;
    Ok((r_input, format!("{value}{ws}")))
}

/// Parse a value out of a line that does not end in a backslash.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_not_continued(input: &str) -> IResult<&str, String> {
    let (r_input, (value, _)) = pair(p_backslashes, p_nl)(input)?;
    Ok((r_input, value.trim_end().to_owned()))
}

/// Parse a `var = value` line, pulling all the continued ones together.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_line_var(input: &str) -> IResult<&str, (String, String)> {
    let (r_input, (var, _, _, _, values, value)) = tuple((
        p_var,
        p_spc,
        char('='),
        p_spc,
        many0(p_continued),
        p_not_continued,
    ))(input)?;
    Ok((
        r_input,
        (var, values.into_iter().chain(iter::once(value)).join("")),
    ))
}

/// Parse a skipped line (either empty or containing a comment).
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_line_skip(input: &str) -> IResult<&str, ()> {
    let (r_input, _) = pair(opt(pair(one_of("#;"), many0(none_of("\n")))), p_nl)(input)?;
    Ok((r_input, ()))
}

/// Parse a sequence of zero or more lines to skip.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_skip_lines(input: &str) -> IResult<&str, ()> {
    let (r_input, _) = many0(p_line_skip)(input)?;
    Ok((r_input, ()))
}

/// Parse a `[section name]` line into a string containing the name.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section_start(input: &str) -> IResult<&str, String> {
    let (r_input, (_, _, name, _, _, _)) = tuple((
        char('['),
        p_spc,
        many1(none_of("]\n")),
        char(']'),
        p_spc,
        p_nl,
    ))(input)?;
    Ok((
        r_input,
        name.into_iter().collect::<String>().trim_end().to_owned(),
    ))
}

/// Parse a sequence of zero or more variables, skipping any empty or comment lines.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section_contents(input: &str) -> IResult<&str, Vec<(String, String)>> {
    let (r_input, pairs) = many0(pair(p_line_var, p_skip_lines))(input)?;
    Ok((
        r_input,
        pairs.into_iter().map(|(var_val, _)| var_val).collect(),
    ))
}

/// Parse a section: a name and zero or more variables.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_section(input: &str) -> IResult<&str, SectionRaw> {
    let (r_input, (name, _, contents)) =
        tuple((p_section_start, p_skip_lines, p_section_contents))(input)?;
    Ok((r_input, (name, contents.into_iter().collect())))
}

/// Parse a whole file as a sequence of sections starting with an unnamed one.
///
/// # Errors
///
/// None by itself, propagates parse failures.
fn p_ini(input: &str) -> IResult<&str, (SectionData, Vec<SectionRaw>)> {
    let (r_input, (_, _, first_section, sections)) = all_consuming(tuple((
        p_spc,
        p_skip_lines,
        p_section_contents,
        many0(p_section),
    )))(input)?;
    Ok((r_input, (first_section.into_iter().collect(), sections)))
}

/// Parse INI-style files using a Nom-based parser.
#[derive(Debug)]
#[allow(clippy::module_name_repetitions)]
pub struct IniNomBackend<'cfg> {
    /// The configuration settings.
    ///
    /// Currently, only `filename`, `section_specified`, and `section` are used.
    config: &'cfg Config,

    /// The filename extracted from the configuration settings.
    filename: &'cfg str,
}

impl<'cfg> Backend<'cfg> for IniNomBackend<'cfg> {
    #[inline]
    fn from_config(config: &'cfg Config) -> Result<Self, ConfgetError> {
        let filename: &str = config
            .filename
            .as_ref()
            .ok_or_else(|| ConfgetError::Config("No filename supplied".to_owned()))?;
        Ok(Self { config, filename })
    }

    #[inline]
    fn read_file(&self) -> Result<DataRead, ConfgetError> {
        let contents = {
            let mut res = super::get_file_lines(self.filename, &self.config.encoding)?.join("\n");
            if !res.ends_with('\n') {
                res.push('\n');
            }
            res
        };
        let (r_input, (first_section, sections)) = p_ini(&contents)
            .map_err(|err| err.map_input(std::borrow::ToOwned::to_owned))
            .context("Parse error")
            .map_err(|err| ConfgetError::FileFormat(self.filename.to_owned(), err))?;
        if !r_input.is_empty() {
            return Err(ConfgetError::FileFormat(
                self.filename.to_owned(),
                anyhow!(format!(
                    "{len} characters left over after parsing valid lines: {escaped}",
                    len = r_input.len(),
                    escaped = r_input.escape_debug()
                )),
            ));
        }

        let first_name = if self.config.section_specified {
            self.config.section.clone()
        } else if !first_section.is_empty() {
            String::new()
        } else if let Some(name) = sections.first().map(|sect| sect.0.clone()) {
            name
        } else {
            String::new()
        };

        let mut res = HashMap::new();
        if !first_section.is_empty() {
            res.insert(String::new(), first_section);
        }
        for (name, mut values) in sections {
            let sect = res.entry(name).or_insert_with(HashMap::new);
            sect.extend(values.drain());
        }

        Ok((res, first_name))
    }
}