iwes 0.1.8 - Docs.rs

use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
use itertools::Itertools;
use liwe::{
    graph::{Graph, GraphContext},
    model::{
        node::{NodeIter, NodePointer},
        Key, NodeId,
    },
};
use rayon::prelude::*;

#[derive(Clone, Debug, Default)]
pub struct SearchPath {
    pub search_text: String,
    pub node_rank: usize,
    pub key: Key,
    pub root: bool,
    pub line: u32,
    pub title: String,
    pub parent_titles: Vec<String>,
}

#[derive(Clone, Default)]
pub struct SearchIndex {
    paths: Vec<SearchPath>,
}

impl SearchIndex {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn update(&mut self, graph: &Graph) {
        let graph_ctx: &Graph = graph;
        self.paths = graph
            .nodes()
            .par_iter()
            .filter(|graph_node| graph_node.is_section())
            .filter_map(|graph_node| {
                let node_id = graph_node.id();
                let node = graph_ctx.node(node_id);

                let parent_is_document = node.to_parent().map(|p| p.is_document()).unwrap_or(false);
                if !parent_is_document {
                    return None;
                }

                let key = graph_ctx.get_node_key(node_id)?;

                let title = graph_ctx.get_text(node_id).trim().to_string();

                if title.is_empty() {
                    return None;
                }

                let parent_titles: Vec<String> = graph_ctx
                    .get_inclusion_edges_to(&key)
                    .iter()
                    .filter_map(|ref_id| {
                        let parent_key = graph_ctx.node(*ref_id).to_document()?.document_key()?;
                        graph_ctx.get_ref_text(&parent_key)
                    })
                    .sorted()
                    .collect();

                let has_parents = !parent_titles.is_empty();

                Some(SearchPath {
                    search_text: render_search_text(&title, &parent_titles, &key),
                    node_rank: node_rank(graph_ctx, node_id),
                    key,
                    root: !has_parents,
                    line: graph_ctx.node_line_number(node_id).unwrap_or(0) as u32,
                    title,
                    parent_titles,
                })
            })
            .collect::<Vec<_>>()
            .into_iter()
            .sorted_by(|a, b| {
                b.node_rank
                    .cmp(&a.node_rank)
                    .then_with(|| a.key.cmp(&b.key))
                    .then_with(|| a.line.cmp(&b.line))
            })
            .unique_by(|p| (p.key.clone(), p.line))
            .collect::<Vec<_>>();
    }

    pub fn search(&self, query: &str) -> Vec<SearchPath> {
        let matcher = SkimMatcherV2::default();
        assert_eq!(None, matcher.fuzzy_match("abc", "abx"));

        self.paths
            .par_iter()
            .map(|path| {
                (
                    path,
                    matcher.fuzzy_match(&path.search_text, query).unwrap_or(0),
                )
            })
            .collect::<Vec<_>>()
            .into_iter()
            .sorted_by(|(path_a, rank_a), (path_b, rank_b)| {
                if query.is_empty() {
                    path_b
                        .node_rank
                        .cmp(&path_a.node_rank)
                        .then_with(|| path_a.search_text.len().cmp(&path_b.search_text.len()))
                        .then_with(|| path_a.key.cmp(&path_b.key))
                        .then_with(|| path_a.line.cmp(&path_b.line))
                } else {
                    rank_b
                        .cmp(rank_a)
                        .then_with(|| path_a.search_text.len().cmp(&path_b.search_text.len()))
                        .then_with(|| path_b.node_rank.cmp(&path_a.node_rank))
                        .then_with(|| path_a.key.cmp(&path_b.key))
                        .then_with(|| path_a.line.cmp(&path_b.line))
                }
            })
            .map(|(path, _)| path)
            .take(100)
            .cloned()
            .collect_vec()
    }

    pub fn paths(&self) -> Vec<SearchPath> {
        self.paths.clone()
    }
}

fn render_search_text(title: &str, parent_titles: &[String], key: &Key) -> String {
    let mut all_titles = vec![title.to_string()];
    all_titles.extend(parent_titles.iter().cloned());
    all_titles.push(key.relative_path.to_string());
    all_titles
        .join(" ")
        .chars()
        .filter(|c| c.is_alphabetic() || c.is_numeric() || c.is_whitespace() || *c == '/')
        .collect::<String>()
}

fn node_rank(graph: &Graph, id: NodeId) -> usize {
    use liwe::model::node::NodePointer;

    if !graph.node(id).is_primary_section() {
        return 0;
    }

    let inline_refs_count = graph
        .node(id)
        .to_document()
        .and_then(|doc| doc.document_key())
        .map(|key| graph.get_reference_edges_to(&key).len())
        .unwrap_or(0);

    let block_refs_count = graph
        .node(id)
        .to_document()
        .and_then(|doc| doc.document_key())
        .map(|key| graph.get_inclusion_edges_to(&key).len())
        .unwrap_or(0);

    inline_refs_count + block_refs_count
}