confget 5.1.2

Parse configuration files.
Documentation
/*
 * SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
 * SPDX-License-Identifier: BSD-2-Clause
 */
//! Abstract definitions for data parsing backends.

use std::borrow::ToOwned;
use std::ffi::{CStr, CString};
use std::fs;
use std::io::{self, Read};

use anyhow::{anyhow, Context};
use encoding::label as enc_label;
use encoding::DecoderTrap;
use once_cell::sync::Lazy;

use crate::defs::{ConfgetError, Config, FileData};

#[cfg(feature = "ini-nom")]
pub mod ini_nom;

#[cfg(feature = "ini-regex")]
pub mod ini_re;

/// The variables read from the input data and the first section name.
///
/// The [`Backend::read_file`] method returns two values: a mapping of
/// section names to variable => value mappings, and the name of the first
/// section encountered in the input data. The latter is particularly
/// useful for the "ini" backend where `confget` will behave differently
/// depending on the [`Config::section_override`][`crate::defs::Config::section_override`]
/// setting.
pub type DataRead = (FileData, String);

/// A backend that implements parsing a specific type of configuration data.
///
/// A `Backend` object may be constructed manually using [`Backend::from_config`],
/// but it may be preferable to use the [`get_backend`][`crate::get_backend`] or
/// [`read_ini_file`][`crate::read_ini_file`] functions instead.
pub trait Backend<'cfg> {
    /// Initialize a backend object, performing config checks if necessary.
    ///
    /// # Errors
    ///
    /// See the documentation of the individual backends.
    fn from_config(config: &'cfg Config) -> Result<Self, ConfgetError>
    where
        Self: Sized;

    /// Obtain and parse the input data in a backend-specific way.
    ///
    /// # Errors
    ///
    /// See the documentation of the individual backends.
    fn read_file(&self) -> Result<DataRead, ConfgetError>;
}

/// The character encoding of the `LC_CTYPE` locale category at the time
/// the INI-style backend is initialized.
/// This is used as the default character encoding for the input file's data
/// unless it is overridden by the configuration.
static DEFAULT_ENCODING: Lazy<Result<String, String>> = Lazy::new(|| {
    let langinfo_cstr = {
        let empty_str = CString::from_vec_with_nul(vec![0])
            .map_err(|err| format!("Could not build an empty C-style string: {err}"))?;
        // SAFETY: `once_cell::sync` should prevent data races.
        if unsafe { libc::setlocale(libc::LC_CTYPE, empty_str.as_c_str().as_ptr()) }.is_null() {
            return Err("setlocale() failed".to_owned());
        }

        // SAFETY: `once_cell::sync` should prevent data races.
        let langinfo_ptr = unsafe { libc::nl_langinfo(libc::CODESET) };
        if langinfo_ptr.is_null() {
            return Err("nl_langinfo(CODESET) returned a null pointer".to_owned());
        }
        // SAFETY: we just validated it.
        unsafe { CStr::from_ptr(langinfo_ptr) }
    };

    match langinfo_cstr.to_str() {
        Ok(enc_ref) => {
            if enc_ref.is_empty() {
                Err("nl_langinfo(CODESET) returned an empty string".to_owned())
            } else {
                Ok(enc_ref.to_owned())
            }
        }
        Err(err) => Err(format!(
            "nl_langinfo(CODESET) returned a non-UTF-8 string: {err}"
        )),
    }
});

/// Read all the input lines, either from the standard input or from a file.
///
/// # Errors
/// I/O or decoding errors reading the input file (or stream).
fn get_file_lines(filename: &str, encoding: &str) -> Result<Vec<String>, ConfgetError> {
    let encoding_name = if encoding.is_empty() {
        DEFAULT_ENCODING
            .as_ref()
            .map_err(|err| ConfgetError::Internal((*err).clone()))?
    } else {
        encoding
    };
    let enc_ref = enc_label::encoding_from_whatwg_label(encoding_name)
        .with_context(|| {
            format!(
                "Unsupported encoding '{escaped}'",
                escaped = encoding_name.escape_debug()
            )
        })
        .map_err(ConfgetError::ReadData)?;

    let raw = if filename == "-" {
        let mut buf = vec![];
        io::stdin()
            .lock()
            .read_to_end(&mut buf)
            .context("Could not read from the standard input")
            .map_err(ConfgetError::ReadData)?;
        buf
    } else {
        fs::read(filename)
            .with_context(|| {
                format!(
                    "Could not read from the '{escaped}' file",
                    escaped = filename.escape_debug()
                )
            })
            .map_err(ConfgetError::ReadData)?
    };
    let contents = enc_ref.decode(&raw, DecoderTrap::Strict).map_err(|err| {
        ConfgetError::ReadData(anyhow!(
            "Could not decode the {} input bytes using the '{}' encoding: {}",
            raw.len(),
            encoding_name,
            err
        ))
    })?;
    Ok(contents.lines().map(ToOwned::to_owned).collect())
}