libhashfindutils 0.1.0

Search for file hashes
Documentation
/*
    libhashfindutils – Internal library for the hashfindutils
    Copyright (C) 2023  Matthias Kaak

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

#![warn(
    clippy::all,
    clippy::pedantic,
    clippy::nursery,
    clippy::cargo_common_metadata
)]
// Anachronism
#![allow(clippy::non_ascii_literal)]
// More or less manual checked and documentation agrees with me that
// it's usually not needed.
#![allow(
    clippy::cast_possible_truncation,
    clippy::cast_sign_loss,
    clippy::cast_precision_loss,
    clippy::cast_lossless
)]
// Explicitly decided against; I think `let _ = …` is better than
// `mem::drop(…)`. TODO: align my opinion and community's one with
// each other.
#![allow(let_underscore_drop)]

use std::{
    ffi::OsString,
    fs::File,
    io::Read,
    os::unix::prelude::OsStringExt,
    path::{Path, PathBuf},
};

use anyhow::{anyhow, Context, Error};
use home::home_dir;
use nix::unistd::Uid;

struct InternalConfigFile
{
    db_path: Option<PathBuf>,
    search_paths: Vec<PathBuf>,
    exclude_paths: Vec<PathBuf>,
    no_global: Option<bool>,
}

/// Config for updatehashdb
pub struct UpdateDbConfig
{
    pub db_path: PathBuf,
    pub search_paths: Vec<PathBuf>,
    pub exclude_paths: Vec<PathBuf>,
}

/// Config for hashfind
pub struct HashfindConfig
{
    pub db_paths: Vec<PathBuf>,
}

impl InternalConfigFile
{
    // rustfmt doesn't support let else yet, so …
    #[allow(clippy::manual_let_else)]
    fn parse(path: &Path) -> Result<Option<Self>, Error>
    {
        let mut file = if let Ok(file) = File::open(path)
        {
            file
        }
        else
        {
            return Ok(None);
        };
        let mut buf = String::new();

        file.read_to_string(&mut buf)
            .with_context(|| format!("Couldn't read file {path:?} completely to UTF8 string"))?;

        let mut db_path = None;
        let mut search_path = vec![];
        let mut exclude_path = vec![];
        let mut no_global = None;

        for line in buf.lines()
        {
            match line.split_once('=')
            {
                Some(("db_path", value)) =>
                {
                    if db_path.is_some()
                    {
                        return Err(anyhow!("Database path was specified multiple times"));
                    }
                    db_path = Some(
                        str_to_path(value)
                            .with_context(|| format!("Couldn't parse database path: {value:?}"))?,
                    );
                }
                Some(("search_path", value)) => search_path.push(
                    str_to_path(value)
                        .with_context(|| format!("Couldn't parse search path: {value:?}"))?,
                ),
                Some(("exclude_path", value)) => exclude_path.push(
                    str_to_path(value)
                        .with_context(|| format!("Couldn't parse exclude path: {value:?}"))?,
                ),
                Some(("no_global", value)) =>
                {
                    if no_global.is_some()
                    {
                        return Err(anyhow!("The no_global option was already specified"));
                    }
                    if value == "true"
                    {
                        no_global = Some(true);
                    }
                    else if value == "false"
                    {
                        no_global = Some(false);
                    }
                    else
                    {
                        return Err(anyhow!(
                            "Unrecognized truth value: {value:?}; \
			     please note that it can only be \"true\" or \"false\""
                        ));
                    }
                }
                Some((key, value)) =>
                {
                    return Err(anyhow!(
                        "Invalid key in config file: key = {key:?} ; value = {value:?}"
                    ));
                }
                None =>
                {}
            }
        }

        Ok(Some(Self {
            db_path,
            search_paths: search_path,
            exclude_paths: exclude_path,
            no_global,
        }))
    }

    fn global() -> Result<Option<Self>, Error>
    {
        Self::parse(Path::new("/etc/hashfindutils"))
    }

    fn local() -> Result<Option<Self>, Error>
    {
        Self::parse(
            &home_dir()
                .context("Couldn't get home directory")?
                .join(".zvavybir/hashfindutils/config"),
        )
    }
}

impl UpdateDbConfig
{
    /// Creates new config for updatehashdb
    ///
    /// # Errors
    /// Returns error if something went wrong.
    pub fn new() -> Result<Self, Error>
    {
        let (internal, db_path) = if Uid::effective().is_root()
        {
            let mut internal = InternalConfigFile::global()
                .context("Couldn't read global config file")?
                .context("Couldn't open global config file")?;

            let db_path = internal
                .db_path
                .take()
                .unwrap_or_else(|| PathBuf::from("/usr/share/hashfindutils/db"));

            (internal, db_path)
        }
        else
        {
            let mut internal = InternalConfigFile::local()
                .context("Couldn't read local config file")?
                .context("Couldn't open local config file")?;

            let db_path = get_local_db_path(&mut internal)?;

            (internal, db_path)
        };

        Ok(Self {
            db_path,
            search_paths: internal.search_paths,
            exclude_paths: internal.exclude_paths,
        })
    }
}

impl HashfindConfig
{
    /// Creates new config for hashfind
    ///
    /// # Errors
    /// Returns error if something went wrong.
    pub fn new() -> Result<Self, Error>
    {
        let mut db_paths = vec![];

        if let Some(mut local) = InternalConfigFile::local()?
        {
            db_paths.push(get_local_db_path(&mut local)?);
            if local.no_global.unwrap_or(false)
            {
                return Ok(Self { db_paths });
            }
        }

        if let Some(db_path) = InternalConfigFile::global()?.and_then(|global| global.db_path)
        {
            db_paths.push(db_path);
        }

        Ok(Self { db_paths })
    }
}

fn str_to_path(s: &str) -> Result<PathBuf, Error>
{
    let mut path = Vec::with_capacity(s.as_bytes().len());
    let mut s = s.chars();

    while let Some(c) = s.next()
    {
        if c == '\\'
        {
            match s.next()
            {
                Some('\\') => path.push(b'\\'),
                Some('n') => path.push(b'\n'),
                Some(c) if c.is_ascii_digit() =>
                {
                    let hundreds = c;
                    if hundreds >= '3'
                    {
                        return Err(anyhow!(
                            "Lead digit in numeric escape sequence was too big; \
			     were you trying to encode a character instead of a byte or to omit \"unnecessary\" lead zeros?"
                        ));
                    }
                    let tens = s.next().context(
                        "Numeric escape sequence was only one digit instead of three digits long",
                    )?;
                    if !tens.is_ascii_digit()
                    {
                        return Err(anyhow!(
                            "Second digit of numeric escape sequence wasn't an digit but {tens:?}"
                        ));
                    }
                    let ones = s.next().context(
                        "Numeric escape sequence was only two digit instead of three digits long",
                    )?;
                    if !ones.is_ascii_digit()
                    {
                        return Err(anyhow!(
                            "Third digit of numeric escape sequence wasn't an digit but {tens:?}"
                        ));
                    }
                    let hundreds = hundreds as u8 - b'0';
                    let tens = tens as u8 - b'0';
                    let ones = ones as u8 - b'0';

                    // `hundreds * 100` does not need to be `checked_`
                    // because it was already checked to not be too
                    // big.
                    path.push((hundreds * 100).checked_add(tens * 10 + ones).context(
                        "Numeric escape sequence too big for a byte; \
			 were you trying to encode a whole character instead of a single byte?",
                    )?);
                }
                Some(c) => return Err(anyhow!("Unsupported escape sequence in path: {c:?}")),
                None => return Err(anyhow!("Unfinished escape sequence in path")),
            }
        }
        else
        {
            path.extend(String::from(c).as_bytes());
        }
    }

    let osstr: OsString = OsStringExt::from_vec(path);
    let path = PathBuf::from(osstr);
    if !path.is_absolute()
    {
        return Err(anyhow!("Path must be absolute"));
    }
    Ok(path)
}

fn get_local_db_path(internal: &mut InternalConfigFile) -> Result<PathBuf, Error>
{
    if let Some(path) = internal.db_path.take()
    {
        Ok(path)
    }
    else
    {
        let mut path = home_dir().context(
            "Database path wasn't neither specified nor could the \
	 default value be calculated since the home directory is unknown",
        )?;
        path.push(".zvavybir/hashfindutils/db");

        Ok(path)
    }
}

#[cfg(test)]
mod tests
{
    use std::path::PathBuf;

    use crate::parser::str_to_path;

    // This is intentional!
    #[allow(clippy::unicode_not_nfc)]
    #[test]
    fn str_to_path_tests()
    {
        let tests = vec![
            ("/home/user", PathBuf::from("/home/user")),
            ("/home/user/", PathBuf::from("/home/user/")),
            ("/home/uäser", PathBuf::from("/home/uäser")),
            ("/home/uäser", PathBuf::from("/home/uäser")),
            ("/home/uäßüöser", PathBuf::from("/home/uäßüöser")),
            ("/home/u\\\\ser", PathBuf::from("/home/u\\ser")),
            ("/home/u\\nser", PathBuf::from("/home/u\nser")),
            ("/home/u\\\\", PathBuf::from("/home/u\\")),
            ("/home/u\\n", PathBuf::from("/home/u\n")),
            ("/home/u\\115er", PathBuf::from("/home/user")),
            ("/home/u\\083er", PathBuf::from("/home/uSer")),
            ("/home/u\tser", PathBuf::from("/home/u\tser")),
            ("/home/u\rser", PathBuf::from("/home/u\rser")),
        ];

        let unequal_tests = vec![
            ("/home/uäser", PathBuf::from("/home/uäser")),
            ("/home/uäser", PathBuf::from("/home/uäser")),
        ];

        let failing_tests = vec![
            "\\\\ser",
            "\\nser",
            "home/user",
            "/home/u\\83er",
            "/home/user\\83",
            "/home/u\\8ser",
            "/home/user\\8",
            "\\83ser",
            "\\83",
            "\\8ser",
            "\\8",
            "/home/us\\ter",
            "/home/user\\",
        ];

        for (input, output) in tests
        {
            assert_eq!(str_to_path(input).unwrap(), output);
        }
        for (input, output) in unequal_tests
        {
            assert_ne!(str_to_path(input).unwrap(), output);
        }
        for input in failing_tests
        {
            assert!(str_to_path(input).is_err());
        }
    }
}