onefetch 2.27.1

Command-line Git information tool
Documentation
use crate::info::utils::info_field::InfoField;
use anyhow::{Result, bail};
use askalono::{Store, TextData};
use onefetch_manifest::Manifest;
use serde::Serialize;
use std::path::Path;
use std::{ffi::OsStr, fs};

const LICENSE_FILES: [&str; 3] = ["LICENSE", "LICENCE", "COPYING"];

static CACHE_DATA: &[u8] = include_bytes!(concat!(
    env!("CARGO_MANIFEST_DIR"),
    "/resources/license.cache.zstd"
));
const MIN_THRESHOLD: f32 = 0.8;

pub struct Detector {
    store: Store,
}

impl Detector {
    pub fn new() -> Result<Self> {
        match Store::from_cache(CACHE_DATA) {
            Ok(store) => Ok(Self { store }),
            Err(e) => {
                bail!("Could not initialize the license detector: {}", e)
            }
        }
    }

    fn get_license(&self, dir: &Path, manifest: Option<&Manifest>) -> Result<String> {
        let license_from_manifest = manifest.and_then(|m| m.license.clone()).unwrap_or_default();
        if license_from_manifest.is_empty() {
            let mut output = fs::read_dir(dir)?
                .filter_map(std::result::Result::ok)
                .map(|entry| entry.path())
                .filter(|entry| {
                    entry.is_file()
                        && entry
                            .file_name()
                            .map(OsStr::to_string_lossy)
                            .is_some_and(is_license_file)
                })
                .filter_map(|entry| {
                    let contents = fs::read_to_string(entry).unwrap_or_default();
                    self.analyze(&contents)
                })
                .collect::<Vec<_>>();

            output.sort();
            output.dedup();
            let license = output.join(", ");
            Ok(license)
        } else {
            Ok(license_from_manifest)
        }
    }

    fn analyze(&self, text: &str) -> Option<String> {
        let matched = self.store.analyze(&TextData::from(text));

        if matched.score >= MIN_THRESHOLD {
            Some(matched.name.into())
        } else {
            None
        }
    }
}

fn is_license_file<S: AsRef<str>>(file_name: S) -> bool {
    LICENSE_FILES
        .iter()
        .any(|&name| file_name.as_ref().starts_with(name))
}

#[derive(Serialize)]
pub struct LicenseInfo {
    pub license: String,
}

impl LicenseInfo {
    pub fn new(repo_path: &Path, manifest: Option<&Manifest>) -> Result<Self> {
        let license = Detector::new()?.get_license(repo_path, manifest)?;
        Ok(Self { license })
    }
}

#[typetag::serialize]
impl InfoField for LicenseInfo {
    fn value(&self) -> String {
        self.license.to_string()
    }

    fn title(&self) -> String {
        "License".into()
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use onefetch_manifest::ManifestType;

    #[test]
    fn test_get_license() -> Result<()> {
        let detector = Detector::new()?;
        let license = detector.get_license(Path::new("."), None)?;
        assert_eq!(license, "MIT");
        Ok(())
    }

    #[test]
    fn test_is_license_file() {
        for file_name in &LICENSE_FILES {
            assert!(is_license_file(file_name));
        }
        assert!(!is_license_file("NOT_LICENSE"));
    }

    #[test]
    fn test_analyze() -> Result<()> {
        let detector = Detector::new()?;
        let license_text = fs::read_to_string(Path::new("LICENSE.md"))?;
        let license = detector.analyze(&license_text);
        assert_eq!(license, Some("MIT".into()));
        Ok(())
    }

    #[test]
    fn should_read_from_manifest_first() -> Result<()> {
        let license_info = LicenseInfo::new(
            Path::new("."),
            Some(&Manifest {
                manifest_type: ManifestType::Cargo,
                name: None,
                description: None,
                number_of_dependencies: 0,
                version: None,
                license: Some("LICENSE".into()),
            }),
        )?;
        assert_eq!(license_info.value(), "LICENSE");
        Ok(())
    }
}