minutes-core 0.18.7

Core library for minutes — audio capture, transcription, and meeting memory
use crate::knowledge::slugify;
use crate::markdown::EntityRef;
use std::collections::{HashMap, HashSet};

#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct PersonIdentity {
    pub slug: String,
    pub name: String,
    pub aliases: Vec<String>,
}

#[derive(Clone, Debug)]
struct PersonCandidate {
    identity: PersonIdentity,
    alias_score: usize,
    support_score: usize,
}

#[derive(Clone, Debug, Default)]
pub(crate) struct PersonCanonicalizer {
    exact_matches: HashMap<String, Vec<usize>>,
    slug_matches: HashMap<String, Vec<usize>>,
    candidates: Vec<PersonCandidate>,
}

impl PersonCanonicalizer {
    pub(crate) fn new<I, S>(entities: &[EntityRef], context_names: I) -> Self
    where
        I: IntoIterator<Item = S>,
        S: AsRef<str>,
    {
        let mut canonicalizer = Self::default();

        for entity in entities {
            let Some(identity) = normalize_entity_identity(entity) else {
                continue;
            };

            let exact_keys = exact_keys_for_entity(entity);
            let slug_keys = slug_keys_for_entity(entity);

            let alias_score = exact_keys.len().max(slug_keys.len());
            let idx = canonicalizer.candidates.len();
            canonicalizer.candidates.push(PersonCandidate {
                identity,
                alias_score,
                support_score: 1,
            });

            for key in exact_keys {
                canonicalizer
                    .exact_matches
                    .entry(key)
                    .or_default()
                    .push(idx);
            }
            for key in slug_keys {
                canonicalizer.slug_matches.entry(key).or_default().push(idx);
            }
        }

        let context_values: Vec<String> = context_names
            .into_iter()
            .filter_map(|raw| normalize_raw_name(raw.as_ref()).map(|(_, name)| name.to_string()))
            .collect();

        for raw in context_values {
            let exact = canonicalizer.lookup_exact(&raw);
            if let Some(idx) = canonicalizer.pick_best_index(exact) {
                canonicalizer.candidates[idx].support_score += 1;
                continue;
            }

            let slug = slugify(&raw);
            if slug.is_empty() {
                continue;
            }

            if let Some(idx) = canonicalizer.pick_best_index(canonicalizer.lookup_slug(&slug)) {
                canonicalizer.candidates[idx].support_score += 1;
            }
        }

        canonicalizer
    }

    pub(crate) fn resolve(&self, raw: &str) -> Option<PersonIdentity> {
        let (_, trimmed) = normalize_raw_name(raw)?;

        if let Some(idx) = self.pick_best_index(self.lookup_exact(trimmed)) {
            return Some(self.candidates[idx].identity.clone());
        }

        let slug = slugify(trimmed);
        if slug.is_empty() {
            return None;
        }

        if let Some(idx) = self.pick_best_index(self.lookup_slug(&slug)) {
            return Some(self.candidates[idx].identity.clone());
        }

        Some(PersonIdentity {
            slug,
            name: trimmed.to_string(),
            aliases: vec![],
        })
    }

    pub(crate) fn resolve_entity(&self, entity: &EntityRef) -> Option<PersonIdentity> {
        if let Some(identity) = self.resolve(&entity.label) {
            return Some(identity);
        }
        if let Some(identity) = self.resolve(&entity.slug) {
            return Some(identity);
        }
        normalize_entity_identity(entity)
    }

    fn lookup_exact<'a>(&'a self, raw: &str) -> &'a [usize] {
        self.exact_matches
            .get(&raw.to_ascii_lowercase())
            .map(Vec::as_slice)
            .unwrap_or(&[])
    }

    fn lookup_slug<'a>(&'a self, slug: &str) -> &'a [usize] {
        self.slug_matches
            .get(slug)
            .map(Vec::as_slice)
            .unwrap_or(&[])
    }

    fn pick_best_index(&self, indices: &[usize]) -> Option<usize> {
        let mut best_idx: Option<usize> = None;
        let mut best_support = 0usize;
        let mut best_alias = 0usize;
        let mut ambiguous = false;

        for &idx in indices {
            let candidate = &self.candidates[idx];
            let support = candidate.support_score;
            let alias = candidate.alias_score;

            match best_idx {
                None => {
                    best_idx = Some(idx);
                    best_support = support;
                    best_alias = alias;
                }
                Some(_) if ambiguous => {
                    if support > best_support && alias > best_alias {
                        best_idx = Some(idx);
                        best_support = support;
                        best_alias = alias;
                        ambiguous = false;
                    }
                }
                Some(_) => {
                    if support > best_support || (support == best_support && alias > best_alias) {
                        best_idx = Some(idx);
                        best_support = support;
                        best_alias = alias;
                    } else if support == best_support && alias == best_alias {
                        ambiguous = true;
                    }
                }
            }
        }

        if ambiguous {
            None
        } else {
            best_idx
        }
    }
}

fn normalize_raw_name(raw: &str) -> Option<(&str, &str)> {
    let trimmed = raw.trim().trim_start_matches('@').trim();
    if trimmed.is_empty() {
        None
    } else {
        Some((raw, trimmed))
    }
}

fn normalize_entity_identity(entity: &EntityRef) -> Option<PersonIdentity> {
    let slug = slugify(&entity.slug);
    if slug.is_empty() {
        return None;
    }

    let name = if entity.label.trim().is_empty() {
        entity.slug.trim().to_string()
    } else {
        entity.label.trim().to_string()
    };

    Some(PersonIdentity {
        slug,
        name,
        aliases: unique_aliases(entity.aliases.iter().cloned()),
    })
}

fn exact_keys_for_entity(entity: &EntityRef) -> HashSet<String> {
    let mut keys = HashSet::new();

    if !entity.slug.trim().is_empty() {
        keys.insert(entity.slug.trim().to_ascii_lowercase());
    }
    if !entity.label.trim().is_empty() {
        keys.insert(entity.label.trim().to_ascii_lowercase());
    }
    for alias in &entity.aliases {
        let trimmed = alias.trim();
        if !trimmed.is_empty() {
            keys.insert(trimmed.to_ascii_lowercase());
        }
    }

    keys
}

fn slug_keys_for_entity(entity: &EntityRef) -> HashSet<String> {
    let mut keys = HashSet::new();

    for value in std::iter::once(entity.slug.as_str())
        .chain(std::iter::once(entity.label.as_str()))
        .chain(entity.aliases.iter().map(String::as_str))
    {
        let slug = slugify(value);
        if !slug.is_empty() {
            keys.insert(slug);
        }
    }

    keys
}

fn unique_aliases<I>(aliases: I) -> Vec<String>
where
    I: IntoIterator<Item = String>,
{
    let mut seen = HashSet::new();
    let mut out = Vec::new();
    for alias in aliases {
        let trimmed = alias.trim();
        if trimmed.is_empty() {
            continue;
        }

        let key = trimmed.to_ascii_lowercase();
        if seen.insert(key) {
            out.push(trimmed.to_string());
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    fn dan_entities() -> Vec<EntityRef> {
        vec![EntityRef {
            slug: "dan-benamoz".into(),
            label: "Dan Benamoz".into(),
            aliases: vec!["Dan".into(), "dan".into()],
        }]
    }

    #[test]
    fn resolves_raw_name_through_alias_table() {
        let resolver = PersonCanonicalizer::new(&dan_entities(), ["Dan"]);
        let identity = resolver.resolve("Dan").expect("resolved identity");
        assert_eq!(identity.slug, "dan-benamoz");
        assert_eq!(identity.name, "Dan Benamoz");
    }

    #[test]
    fn falls_back_to_raw_slug_when_no_entity_matches() {
        let resolver = PersonCanonicalizer::new(&[], ["Dan"]);
        let identity = resolver.resolve("Dan").expect("fallback identity");
        assert_eq!(identity.slug, "dan");
        assert_eq!(identity.name, "Dan");
    }

    #[test]
    fn chooses_stronger_context_when_aliases_collide() {
        let resolver = PersonCanonicalizer::new(
            &[
                EntityRef {
                    slug: "dan-benamoz".into(),
                    label: "Dan Benamoz".into(),
                    aliases: vec!["Dan".into(), "DB".into(), "Daniel".into()],
                },
                EntityRef {
                    slug: "dan-smith".into(),
                    label: "Dan Smith".into(),
                    aliases: vec!["Dan".into()],
                },
            ],
            ["Dan", "Dan Benamoz", "DB"],
        );

        let identity = resolver.resolve("Dan").expect("collision resolution");
        assert_eq!(identity.slug, "dan-benamoz");
    }

    #[test]
    fn case_insensitive_matching_works() {
        let resolver = PersonCanonicalizer::new(&dan_entities(), ["DAN"]);
        let identity = resolver.resolve("DAN").expect("case-insensitive identity");
        assert_eq!(identity.slug, "dan-benamoz");
    }

    #[test]
    fn ambiguous_collision_without_stronger_signal_falls_back() {
        let resolver = PersonCanonicalizer::new(
            &[
                EntityRef {
                    slug: "dan-benamoz".into(),
                    label: "Dan Benamoz".into(),
                    aliases: vec!["Dan".into()],
                },
                EntityRef {
                    slug: "dan-smith".into(),
                    label: "Dan Smith".into(),
                    aliases: vec!["Dan".into()],
                },
            ],
            ["Dan"],
        );

        let identity = resolver.resolve("Dan").expect("ambiguous fallback");
        assert_eq!(identity.slug, "dan");
        assert_eq!(identity.name, "Dan");
    }

    fn candidate(alias_score: usize, support_score: usize) -> PersonCandidate {
        PersonCandidate {
            identity: PersonIdentity {
                slug: format!("candidate-{alias_score}-{support_score}"),
                name: format!("Candidate {alias_score}/{support_score}"),
                aliases: vec![],
            },
            alias_score,
            support_score,
        }
    }

    #[test]
    fn pick_best_index_keeps_ambiguity_latched_after_equal_top_tie() {
        let canonicalizer = PersonCanonicalizer {
            candidates: vec![candidate(2, 1), candidate(2, 1), candidate(3, 1)],
            ..Default::default()
        };

        assert_eq!(canonicalizer.pick_best_index(&[0, 1, 2]), None);
    }

    #[test]
    fn pick_best_index_returns_strictly_higher_scoring_candidate() {
        let canonicalizer = PersonCanonicalizer {
            candidates: vec![candidate(2, 1), candidate(2, 1), candidate(3, 2)],
            ..Default::default()
        };

        assert_eq!(canonicalizer.pick_best_index(&[0, 1, 2]), Some(2));
    }

    #[test]
    fn pick_best_index_returns_none_when_all_top_candidates_tie() {
        let canonicalizer = PersonCanonicalizer {
            candidates: vec![candidate(2, 1), candidate(2, 1), candidate(2, 1)],
            ..Default::default()
        };

        assert_eq!(canonicalizer.pick_best_index(&[0, 1, 2]), None);
    }
}