cruxlines 0.2.1

Ranks symbol definitions by cross-file references using tree-sitter.
Documentation
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};

use tree_sitter::{Node, Parser, Tree};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Location {
    pub path: PathBuf,
    pub line: usize,
    pub column: usize,
    pub name: String,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ReferenceEdge {
    pub definition: Location,
    pub usage: Location,
    pub ecosystem: crate::languages::Ecosystem,
}

pub struct ReferenceScan {
    pub edges: Vec<ReferenceEdge>,
    pub definition_lines: HashMap<Location, String>,
}

struct EcosystemSymbols {
    definitions: HashMap<String, Vec<Location>>,
    definition_positions: HashSet<(PathBuf, usize, usize)>,
    references: Vec<Location>,
    definition_lines: HashMap<Location, String>,
}

pub fn find_references<I, P>(files: I) -> Result<ReferenceScan, crate::io::CruxlinesError>
where
    I: IntoIterator<Item = Result<(P, String), crate::io::CruxlinesError>>,
    P: Into<PathBuf>,
{
    let mut symbols_by_ecosystem: HashMap<crate::languages::Ecosystem, EcosystemSymbols> =
        HashMap::new();
    for item in files {
        let (path, source) = item?;
        let path = path.into();
        let Some(language) = crate::languages::language_for_path(&path) else {
            continue;
        };
        let Some(tree) = parse_tree(&language, &source) else {
            continue;
        };
        let ecosystem = crate::languages::ecosystem_for_language(language);
        let entry = symbols_by_ecosystem.entry(ecosystem).or_insert_with(|| EcosystemSymbols {
            definitions: HashMap::new(),
            definition_positions: HashSet::new(),
            references: Vec::new(),
            definition_lines: HashMap::new(),
        });
        match language {
            crate::languages::Language::Java => crate::languages::java::emit_definitions(
                &path,
                &source,
                &tree,
                |location| {
                    record_definition(
                        location.clone(),
                        &mut entry.definitions,
                        &mut entry.definition_positions,
                    );
                    record_definition_line(&location, &source, &mut entry.definition_lines);
                },
            ),
            crate::languages::Language::Kotlin => crate::languages::kotlin::emit_definitions(
                &path,
                &source,
                &tree,
                |location| {
                    record_definition(
                        location.clone(),
                        &mut entry.definitions,
                        &mut entry.definition_positions,
                    );
                    record_definition_line(&location, &source, &mut entry.definition_lines);
                },
            ),
            crate::languages::Language::Python => crate::languages::python::emit_definitions(
                &path,
                &source,
                &tree,
                |location| {
                    record_definition(
                        location.clone(),
                        &mut entry.definitions,
                        &mut entry.definition_positions,
                    );
                    record_definition_line(&location, &source, &mut entry.definition_lines);
                },
            ),
            crate::languages::Language::JavaScript
            | crate::languages::Language::TypeScript
            | crate::languages::Language::TypeScriptReact => {
                crate::languages::javascript::emit_definitions(
                    &path,
                    &source,
                    &tree,
                    |location| {
                        record_definition(
                            location.clone(),
                            &mut entry.definitions,
                            &mut entry.definition_positions,
                        );
                        record_definition_line(&location, &source, &mut entry.definition_lines);
                    },
                )
            }
            crate::languages::Language::Rust => crate::languages::rust::emit_definitions(
                &path,
                &source,
                &tree,
                |location| {
                    record_definition(
                        location.clone(),
                        &mut entry.definitions,
                        &mut entry.definition_positions,
                    );
                    record_definition_line(&location, &source, &mut entry.definition_lines);
                },
            ),
        }
        match language {
            crate::languages::Language::Java => crate::languages::java::emit_references(
                &path,
                &source,
                &tree,
                |location| entry.references.push(location),
            ),
            crate::languages::Language::Kotlin => crate::languages::kotlin::emit_references(
                &path,
                &source,
                &tree,
                |location| entry.references.push(location),
            ),
            crate::languages::Language::Python => crate::languages::python::emit_references(
                &path,
                &source,
                &tree,
                |location| entry.references.push(location),
            ),
            crate::languages::Language::JavaScript
            | crate::languages::Language::TypeScript
            | crate::languages::Language::TypeScriptReact => {
                crate::languages::javascript::emit_references(
                    &path,
                    &source,
                    &tree,
                    |location| entry.references.push(location),
                )
            }
            crate::languages::Language::Rust => crate::languages::rust::emit_references(
                &path,
                &source,
                &tree,
                |location| entry.references.push(location),
            ),
        }
    }

    let mut edges = Vec::new();
    let mut definition_lines = HashMap::new();
    for (ecosystem, symbols) in &symbols_by_ecosystem {
        for reference in &symbols.references {
            record_reference(
                reference.clone(),
                *ecosystem,
                &symbols.definitions,
                &symbols.definition_positions,
                &mut edges,
            );
        }
        for (location, line) in &symbols.definition_lines {
            definition_lines
                .entry(location.clone())
                .or_insert_with(|| line.clone());
        }
    }

    Ok(ReferenceScan {
        edges,
        definition_lines,
    })
}

fn parse_tree(language: &crate::languages::Language, source: &str) -> Option<Tree> {
    let mut parser = Parser::new();
    let ts_language = crate::languages::tree_sitter_language(*language);
    parser.set_language(&ts_language).ok()?;
    parser.parse(source, None)
}

pub(crate) fn walk_tree(tree: &Tree, mut visit: impl FnMut(Node)) {
    let root = tree.root_node();
    let mut stack = vec![root];
    while let Some(node) = stack.pop() {
        visit(node);
        for i in 0..node.child_count() {
            if let Some(child) = node.child(i) {
                stack.push(child);
            }
        }
    }
}

pub(crate) fn collect_identifier_nodes<F>(node: Node, source: &str, mut on_ident: F)
where
    F: FnMut(Node),
{
    if node.kind() == "identifier" {
        if node.utf8_text(source.as_bytes()).is_ok() {
            on_ident(node);
        }
    }
    let mut stack = vec![node];
    while let Some(current) = stack.pop() {
        for i in 0..current.child_count() {
            if let Some(child) = current.child(i) {
                if child.kind() == "identifier" {
                    if child.utf8_text(source.as_bytes()).is_ok() {
                        on_ident(child);
                    }
                } else {
                    stack.push(child);
                }
            }
        }
    }
}

pub(crate) fn location_from_node(path: &Path, source: &str, node: Node) -> Option<Location> {
    let (line, column) = position(node);
    let name = node.utf8_text(source.as_bytes()).ok()?;
    Some(Location {
        path: path.to_path_buf(),
        line,
        column,
        name: name.to_string(),
    })
}

fn record_definition(
    location: Location,
    definitions: &mut HashMap<String, Vec<Location>>,
    definition_positions: &mut HashSet<(PathBuf, usize, usize)>,
) {
    let key = location.name.clone();
    let entry = definitions.entry(key).or_default();
    if !entry.iter().any(|item| {
        item.path == location.path && item.line == location.line && item.column == location.column
    }) {
        entry.push(location.clone());
    }
    definition_positions.insert((location.path, location.line, location.column));
}

fn record_reference(
    location: Location,
    ecosystem: crate::languages::Ecosystem,
    definitions: &HashMap<String, Vec<Location>>,
    definition_positions: &HashSet<(PathBuf, usize, usize)>,
    edges: &mut Vec<ReferenceEdge>,
) {
    if definition_positions.contains(&(location.path.clone(), location.line, location.column)) {
        return;
    }
    if let Some(defs) = definitions.get(&location.name) {
        for def in defs {
            edges.push(ReferenceEdge {
                definition: def.clone(),
                usage: location.clone(),
                ecosystem,
            });
        }
    }
}

fn position(node: Node) -> (usize, usize) {
    let pos = node.start_position();
    (pos.row + 1, pos.column + 1)
}

fn record_definition_line(
    location: &Location,
    source: &str,
    lines: &mut HashMap<Location, String>,
) {
    if lines.contains_key(location) {
        return;
    }
    let text = source
        .lines()
        .nth(location.line.saturating_sub(1))
        .unwrap_or("")
        .trim_end()
        .to_string();
    lines.insert(location.clone(), text);
}

#[cfg(test)]
mod tests {
    use super::walk_tree;
    use tree_sitter::Parser;

    #[test]
    fn walk_tree_visits_nodes() {
        let mut parser = Parser::new();
        let language = tree_sitter_python::LANGUAGE;
        parser.set_language(&language.into()).expect("set language");
        let tree = parser.parse("x = 1\n", None).expect("parse");

        let mut kinds = Vec::new();
        walk_tree(&tree, |node| {
            kinds.push(node.kind().to_string());
        });

        assert!(kinds.contains(&"module".to_string()));
        assert!(kinds.contains(&"identifier".to_string()));
    }
}