milli-core 1.15.1

Meilisearch HTTP server
Documentation
use super::matching_words::WordId;
use super::{Match, MatchPosition};

struct MatchIntervalWithScore {
    interval: [usize; 2],
    score: [i16; 3],
}

// count score for phrases
fn tally_phrase_scores(fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16) {
    let words_in_phrase_minus_one = (lwp - fwp) as i16;
    // will always be ordered, so +1 for each space between words
    *order_score += words_in_phrase_minus_one;
    // distance will always be 1, so -1 for each space between words
    *distance_score -= words_in_phrase_minus_one;
}

/// Compute the score of a match interval:
/// 1) count unique matches
/// 2) calculate distance between matches
/// 3) count ordered matches
fn get_interval_score(matches: &[Match]) -> [i16; 3] {
    let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
    let mut order_score = 0;
    let mut distance_score = 0;

    let mut iter = matches.iter().peekable();
    while let Some(m) = iter.next() {
        if let Some(next_match) = iter.peek() {
            // if matches are ordered
            if next_match.ids.iter().min() > m.ids.iter().min() {
                order_score += 1;
            }

            let m_last_word_pos = match m.position {
                MatchPosition::Word { word_position, .. } => word_position,
                MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => {
                    tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
                    lwp
                }
            };
            let next_match_first_word_pos = next_match.get_first_word_pos();

            // compute distance between matches
            distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
        } else if let MatchPosition::Phrase { word_positions: [fwp, lwp], .. } = m.position {
            // in case last match is a phrase, count score for its words
            tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
        }

        ids.extend(m.ids.iter());
    }

    ids.sort_unstable();
    ids.dedup();
    let uniq_score = ids.len() as i16;

    // rank by unique match count, then by distance between matches, then by ordered match count.
    [uniq_score, distance_score, order_score]
}

/// Returns the first and last match where the score computed by match_interval_score is the best.
pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match; 2] {
    if matches.is_empty() {
        panic!("`matches` should not be empty at this point");
    }

    // positions of the first and the last match of the best matches interval in `matches`.
    let mut best_interval: Option<MatchIntervalWithScore> = None;

    let mut save_best_interval = |interval_first, interval_last| {
        let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
        let is_interval_score_better = &best_interval
            .as_ref()
            .is_none_or(|MatchIntervalWithScore { score, .. }| interval_score > *score);

        if *is_interval_score_better {
            best_interval = Some(MatchIntervalWithScore {
                interval: [interval_first, interval_last],
                score: interval_score,
            });
        }
    };

    // we compute the matches interval if we have at least 2 matches.
    // current interval positions.
    let mut interval_first = 0;
    let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();

    for (index, next_match) in matches.iter().enumerate() {
        // if next match would make interval gross more than crop_size,
        // we compare the current interval with the best one,
        // then we increase `interval_first` until next match can be added.
        let next_match_last_word_pos = next_match.get_last_word_pos();

        // if the next match would mean that we pass the crop size window,
        // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
        // and calculate a score for it, and check if it's better than our best so far
        if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
            // if index is 0 there is no last viable match
            if index != 0 {
                let interval_last = index - 1;
                // keep interval if it's the best
                save_best_interval(interval_first, interval_last);
            }

            // advance start of the interval while interval is longer than crop_size.
            loop {
                interval_first += 1;
                if interval_first == matches.len() {
                    interval_first -= 1;
                    break;
                }

                interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();

                if interval_first_match_first_word_pos > next_match_last_word_pos
                    || next_match_last_word_pos - interval_first_match_first_word_pos < crop_size
                {
                    break;
                }
            }
        }
    }

    // compute the last interval score and compare it to the best one.
    let interval_last = matches.len() - 1;
    // if it's the last match with itself, we need to make sure it's
    // not a phrase longer than the crop window
    if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
        save_best_interval(interval_first, interval_last);
    }

    // if none of the matches fit the criteria above, default to the first one
    best_interval.map_or(
        [&matches[0], &matches[0]],
        |MatchIntervalWithScore { interval: [first, last], .. }| [&matches[first], &matches[last]],
    )
}