readseek 0.2.14

structural source reader with stable line hashes
use crate::cache::Cache;
use crate::cli::{DefinitionCommand, ReferencesCommand};
use crate::lang::{AnalysisEngine, Language};
use crate::output::is_identifier_byte;
use crate::paths::{command_paths, definition_candidate_paths};
use crate::source::{
    SourceFile, Symbol, source_from_text, source_map_with_cache, symbol_at_line_in_map,
};
use crate::{
    CompactLocation, CompactOutput, DefinitionLocation, DefinitionOutput, ReferenceLocation,
    ReferencesOutput, symbols,
};
use anyhow::{Context, Result, bail};
use serde::Deserialize;
use std::fs;
use std::io::{self, Read as _};
use tree_sitter::{Node, Parser};

#[derive(Debug, Deserialize)]
struct IdentifyInput {
    identifier: Option<IdentifierInput>,
    symbol: Option<SymbolInput>,
}

#[derive(Debug, Deserialize)]
struct IdentifierInput {
    text: String,
}

#[derive(Debug, Deserialize)]
struct SymbolInput {
    qualified_name: String,
}

pub(crate) fn definition_output(command: &DefinitionCommand) -> Result<DefinitionOutput> {
    let name = definition_name(command)?;
    let search_name = definition_search_name(&name);
    let mut candidates = Vec::new();
    let mut macro_definitions = Vec::new();
    for path in definition_candidate_paths(command, search_name)? {
        let Ok(text) = fs::read_to_string(&path) else {
            continue;
        };
        if !text.contains(search_name) {
            continue;
        }

        candidates.push((path, text));
    }

    for (path, text) in &candidates {
        if !text
            .lines()
            .any(|line| macro_definition_name(line) == Some(search_name))
        {
            continue;
        }
        let Ok(source) = source_from_text(path, text, command.language, false, None) else {
            continue;
        };
        macro_definitions.extend(macro_definition_locations(&source, search_name));
    }

    if !macro_definitions.is_empty() {
        return Ok(DefinitionOutput {
            definitions: macro_definitions,
        });
    }

    let mut cache = Cache::new();
    let mut definitions = Vec::new();
    for (path, text) in candidates {
        let Ok(source) = source_from_text(&path, &text, command.language, false, None) else {
            continue;
        };
        let Ok(source_map) = source_map_with_cache(&source, &mut cache) else {
            continue;
        };
        for symbol in source_map.symbols {
            if symbol.qualified_name != name && symbol.name != search_name {
                continue;
            }
            let line = source
                .lines
                .get(symbol.start_line.saturating_sub(1))
                .context("definition symbol line is out of range")?;
            definitions.push(DefinitionLocation {
                file: source.path.clone(),
                language: source.detection.language,
                engine: source.detection.engine,
                file_hash: source.file_hash.clone(),
                line_hash: line.hash.clone(),
                text: line.text.clone(),
                symbol,
            });
        }
    }

    Ok(DefinitionOutput { definitions })
}

pub(crate) fn compact_definitions(output: &DefinitionOutput) -> CompactOutput {
    CompactOutput {
        locations: output
            .definitions
            .iter()
            .map(|definition| CompactLocation {
                file: definition.file.clone(),
                line: definition.symbol.start_line,
                column: 1,
                line_hash: definition.line_hash.clone(),
                text: definition.text.clone(),
                kind: Some(definition.symbol.kind.clone()),
                name: Some(definition.symbol.name.clone()),
                qualified_name: Some(definition.symbol.qualified_name.clone()),
            })
            .collect(),
    }
}

fn definition_name(command: &DefinitionCommand) -> Result<String> {
    match (command.name.as_ref(), command.stdin) {
        (Some(name), _) => Ok(name.clone()),
        (None, false) => bail!("definition requires a name or --stdin identify context"),
        (None, true) => definition_name_from_stdin(),
    }
}

fn definition_search_name(name: &str) -> &str {
    name.rsplit('.')
        .next()
        .filter(|part| !part.is_empty())
        .unwrap_or(name)
}

fn macro_definition_locations(source: &SourceFile, name: &str) -> Vec<DefinitionLocation> {
    if !matches!(source.detection.language, Language::C | Language::Cpp) {
        return Vec::new();
    }

    source
        .lines
        .iter()
        .filter(|line| macro_definition_name(&line.text) == Some(name))
        .map(|line| DefinitionLocation {
            file: source.path.clone(),
            language: source.detection.language,
            engine: source.detection.engine,
            file_hash: source.file_hash.clone(),
            symbol: Symbol {
                kind: "macro".to_owned(),
                name: name.to_owned(),
                qualified_name: name.to_owned(),
                start_line: line.number,
                end_line: line.number,
                start_hash: line.hash.clone(),
                end_hash: line.hash.clone(),
            },
            line_hash: line.hash.clone(),
            text: line.text.clone(),
        })
        .collect()
}

fn macro_definition_name(line: &str) -> Option<&str> {
    let rest = line.trim_start().strip_prefix("#define")?;
    if !rest.starts_with(char::is_whitespace) {
        return None;
    }

    let rest = rest.trim_start();
    let name_len = rest
        .find(|ch: char| !matches!(ch, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_'))
        .unwrap_or(rest.len());
    if name_len == 0 {
        return None;
    }

    Some(&rest[..name_len])
}

fn definition_name_from_stdin() -> Result<String> {
    let mut text = String::new();
    io::stdin()
        .read_to_string(&mut text)
        .context("read identify context from stdin")?;
    let input: IdentifyInput = serde_json::from_str(&text).context("parse identify context")?;
    if let Some(identifier) = input.identifier {
        return Ok(identifier.text);
    }
    if let Some(symbol) = input.symbol {
        return Ok(symbol.qualified_name);
    }
    bail!("identify context has no symbol or identifier")
}

pub(crate) fn references_output(command: &ReferencesCommand) -> Result<ReferencesOutput> {
    validate_reference_name(&command.name)?;
    let name = &command.name;
    let mut cache = Cache::new();
    let mut parser = tree_sitter::Parser::new();
    let mut references = Vec::new();
    for path in command_paths(
        &command.target,
        command.cached,
        command.others,
        command.ignored,
    )? {
        let Ok(text) = fs::read_to_string(&path) else {
            continue;
        };
        if !text.contains(name) {
            continue;
        }
        let Ok(source) = source_from_text(&path, &text, command.language, false, None) else {
            continue;
        };
        references.extend(references_in_source(&source, name, &mut parser, &mut cache));
    }

    Ok(ReferencesOutput { references })
}

pub(crate) fn compact_references(output: &ReferencesOutput) -> CompactOutput {
    CompactOutput {
        locations: output
            .references
            .iter()
            .map(|reference| {
                let symbol = reference.symbol.as_ref();
                CompactLocation {
                    file: reference.file.clone(),
                    line: reference.line,
                    column: reference.column,
                    line_hash: reference.line_hash.clone(),
                    text: reference.text.clone(),
                    kind: symbol.map(|symbol| symbol.kind.clone()),
                    name: symbol.map(|symbol| symbol.name.clone()),
                    qualified_name: symbol.map(|symbol| symbol.qualified_name.clone()),
                }
            })
            .collect(),
    }
}

fn validate_reference_name(name: &str) -> Result<()> {
    if name.is_empty() {
        bail!("reference name must not be empty");
    }
    if !name.bytes().all(is_identifier_byte) {
        bail!("reference name must be an ASCII identifier");
    }
    Ok(())
}

fn references_in_source(
    source: &SourceFile,
    name: &str,
    parser: &mut Parser,
    cache: &mut Cache,
) -> Vec<ReferenceLocation> {
    let source_map = source_map_with_cache(source, cache).ok();
    let ignored_ranges = reference_ignored_ranges(source, parser);
    let line_starts = line_start_offsets(&source.text);
    let mut references = Vec::new();
    for line in &source.lines {
        let columns = reference_columns(&line.text, name);
        if columns.is_empty() {
            continue;
        }
        let symbol = source_map
            .as_ref()
            .and_then(|source_map| symbol_at_line_in_map(source_map, line.number));
        for column in columns {
            let byte_offset = line_starts
                .get(line.number.saturating_sub(1))
                .map_or(column - 1, |line_start| line_start + column - 1);
            if is_ignored_reference(byte_offset, &ignored_ranges) {
                continue;
            }
            references.push(ReferenceLocation {
                file: source.path.clone(),
                language: source.detection.language,
                engine: source.detection.engine,
                file_hash: source.file_hash.clone(),
                line: line.number,
                column,
                line_hash: line.hash.clone(),
                text: line.text.clone(),
                symbol: symbol.clone(),
            });
        }
    }
    references
}

fn reference_ignored_ranges(source: &SourceFile, parser: &mut Parser) -> Vec<(usize, usize)> {
    if !matches!(source.detection.language, Language::C | Language::Cpp) {
        return Vec::new();
    }
    if source.detection.engine != AnalysisEngine::TreeSitter {
        return Vec::new();
    }
    let Some(language) = symbols::tree_sitter_language(source.detection.language) else {
        return Vec::new();
    };

    if parser.set_language(&language).is_err() {
        return Vec::new();
    }
    let Some(tree) = parser.parse(&source.text, None) else {
        return Vec::new();
    };

    let mut ranges = Vec::new();
    collect_reference_ignored_ranges(tree.root_node(), &mut ranges);
    ranges
}

fn collect_reference_ignored_ranges(node: Node<'_>, ranges: &mut Vec<(usize, usize)>) {
    if is_reference_noise_node(node.kind()) {
        ranges.push((node.start_byte(), node.end_byte()));
        return;
    }

    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        collect_reference_ignored_ranges(child, ranges);
    }
}

fn is_reference_noise_node(kind: &str) -> bool {
    kind == "comment" || kind.ends_with("string_literal") || kind == "char_literal"
}

fn line_start_offsets(text: &str) -> Vec<usize> {
    let mut offsets = vec![0];
    for (index, byte) in text.bytes().enumerate() {
        if byte == b'\n' && index + 1 < text.len() {
            offsets.push(index + 1);
        }
    }

    offsets
}

fn is_ignored_reference(byte_offset: usize, ranges: &[(usize, usize)]) -> bool {
    ranges
        .iter()
        .any(|&(start, end)| start <= byte_offset && byte_offset < end)
}

fn reference_columns(text: &str, name: &str) -> Vec<usize> {
    memchr::memmem::find_iter(text.as_bytes(), name.as_bytes())
        .filter(|&index| {
            let bytes = text.as_bytes();
            let before = index.checked_sub(1).map(|before_index| bytes[before_index]);
            let after = bytes.get(index + name.len()).copied();
            !before.is_some_and(is_identifier_byte) && !after.is_some_and(is_identifier_byte)
        })
        .map(|index| index + 1)
        .collect()
}