opengrep 1.1.0

Advanced AST-aware code search tool with tree-sitter parsing and AI integration capabilities
Documentation
//! Abstract Syntax Tree analysis and context extraction

use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::ops::Range;
use tree_sitter::{Language, Node, Parser, Point, Tree};

pub mod context;
pub mod walker;

pub use context::{AstContext, ContextNode};
pub use walker::AstWalker;

/// Position in source code
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Position {
    /// Line number (0-indexed)
    pub line: usize,
    /// Column number (0-indexed)
    pub column: usize,
    /// Byte offset
    pub offset: usize,
}

/// Node metadata for additional context
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct NodeMetadata {
    /// Whether this node defines a scope
    pub is_scope: bool,
    /// Whether this node is a definition
    pub is_definition: bool,
    /// Whether this node is a declaration
    pub is_declaration: bool,
    /// Visibility modifier if any
    pub visibility: Option<String>,
    /// Documentation comment if any
    pub documentation: Option<String>,
}

/// Language-specific configuration for AST analysis
#[derive(Debug, Clone)]
pub struct LanguageConfig {
    /// Node types that define scopes
    pub scope_types: Vec<String>,
    /// Node types that provide context
    pub context_types: Vec<String>,
    /// Node types that are definitions
    pub definition_types: Vec<String>,
    /// Node types that are declarations
    pub declaration_types: Vec<String>,
    /// Field names that contain node names
    pub name_fields: Vec<String>,
}

impl LanguageConfig {
    /// Get language configuration for a specific language
    pub fn for_language(lang: &str) -> Self {
        match lang {
            "rust" => Self {
                scope_types: vec![
                    "function_item".to_string(),
                    "impl_item".to_string(),
                    "struct_item".to_string(),
                    "enum_item".to_string(),
                    "trait_item".to_string(),
                    "mod_item".to_string(),
                ],
                context_types: vec![
                    "function_item".to_string(),
                    "impl_item".to_string(),
                    "struct_item".to_string(),
                    "enum_item".to_string(),
                    "trait_item".to_string(),
                    "mod_item".to_string(),
                ],
                definition_types: vec![
                    "function_item".to_string(),
                    "struct_item".to_string(),
                    "enum_item".to_string(),
                    "trait_item".to_string(),
                ],
                declaration_types: vec![
                    "function_signature".to_string(),
                    "trait_item".to_string(),
                ],
                name_fields: vec!["name".to_string(), "identifier".to_string()],
            },
            _ => Self::default(),
        }
    }
}

impl Default for LanguageConfig {
    fn default() -> Self {
        Self {
            scope_types: vec![
                "function".to_string(),
                "method".to_string(),
                "class".to_string(),
                "struct".to_string(),
                "interface".to_string(),
            ],
            context_types: vec![
                "function".to_string(),
                "method".to_string(),
                "class".to_string(),
                "struct".to_string(),
                "interface".to_string(),
            ],
            definition_types: vec![
                "function".to_string(),
                "class".to_string(),
                "struct".to_string(),
            ],
            declaration_types: vec![
                "function_declaration".to_string(),
                "class_declaration".to_string(),
            ],
            name_fields: vec!["name".to_string(), "identifier".to_string()],
        }
    }
}

impl From<Point> for Position {
    fn from(point: Point) -> Self {
        Self {
            line: point.row,
            column: point.column,
            offset: 0, // Will be set separately
        }
    }
}

/// AST node information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AstNode {
    /// Node type (e.g., "function", "class", "method")
    pub kind: String,
    /// Node name if available
    pub name: Option<String>,
    /// Start position
    pub start: Position,
    /// End position
    pub end: Position,
    /// Source code range
    pub range: Range<usize>,
    /// Depth in the AST
    pub depth: usize,
    /// Parent node index
    pub parent: Option<usize>,
    /// Child node indices
    pub children: Vec<usize>,
    /// Additional metadata
    pub metadata: NodeMetadata,
}

/// Analyzes the Abstract Syntax Tree (AST) of source code.
pub struct AstAnalyzer {
    parser: Parser,
    _language: Language,
}

/// Parsed AST result
#[derive(Debug)]
pub struct ParsedAst {
    /// The tree-sitter tree
    pub tree: Tree,
    /// Extracted AST nodes
    pub nodes: Vec<AstNode>,
    /// Source code
    pub source: String,
}

impl ParsedAst {
    /// Get context for a specific line
    pub fn get_context_for_line(&self, line_idx: usize) -> Option<AstContext> {
        // Find nodes that contain this line
        let mut context = AstContext::new();
        
        for node in &self.nodes {
            if node.start.line <= line_idx && line_idx <= node.end.line {
                context.add_node(node, &self.nodes);
            }
        }
        
        if context.nodes.is_empty() {
            None
        } else {
            context.build(&self.source);
            Some(context)
        }
    }
}

impl AstAnalyzer {
    /// Creates a new `AstAnalyzer`.
    pub fn new(language: Language) -> Result<Self> {
        let mut parser = Parser::new();
        parser.set_language(language)?;
        Ok(Self {
            parser,
            _language: language,
        })
    }

    /// Analyze source code and return the AST
    pub fn analyze(&mut self, code: &str) -> Option<Tree> {
        self.parser.parse(code, None)
    }

    /// Parse source code and return parsed AST
    pub fn parse(&mut self, source: &str) -> Result<ParsedAst> {
        let tree = self.analyze(source)
            .ok_or_else(|| anyhow::anyhow!("Failed to parse source code"))?;
        
        let nodes = self.extract_nodes(&tree, source);
        
        Ok(ParsedAst {
            tree,
            nodes,
            source: source.to_string(),
        })
    }

    /// Extracts all relevant nodes from a tree
    pub fn extract_nodes(&self, tree: &Tree, code: &str) -> Vec<AstNode> {
        let mut nodes = Vec::new();
        let walker = tree.walk();
        self.extract_nodes_recursive(walker.node(), code, &mut nodes, 0, None);
        nodes
    }

    fn extract_nodes_recursive(
        &self,
        node: Node,
        code: &str,
        nodes: &mut Vec<AstNode>,
        depth: usize,
        parent_index: Option<usize>,
    ) -> usize {
        let node_index = nodes.len();

        let new_node = AstNode {
            kind: node.kind().to_string(),
            name: self.get_node_name(node, code),
            start: node.start_position().into(),
            end: node.end_position().into(),
            range: node.byte_range(),
            depth,
            parent: parent_index,
            children: Vec::new(),
            metadata: NodeMetadata::default(),
        };

        nodes.push(new_node);

        for child in node.children(&mut node.walk()) {
            let child_index =
                self.extract_nodes_recursive(child, code, nodes, depth + 1, Some(node_index));
            if let Some(parent_node) = nodes.get_mut(node_index) {
                parent_node.children.push(child_index);
            }
        }

        node_index
    }

    // Helper to get name of a node if it has one (e.g. function name)
    fn get_node_name(&self, node: Node, code: &str) -> Option<String> {
        // This is language-specific and can get complicated.
        // For now, let's look for a child node with type 'identifier'.
        let name_node = node
            .children(&mut node.walk())
            .find(|n| n.kind().ends_with("identifier"));

        if let Some(name_node) = name_node {
            return Some(code[name_node.byte_range()].to_string());
        }

        None
    }
}