#[cfg(feature = "documents")]
pub mod doc;
pub mod l1;
pub mod l2;
pub mod l3;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::lang::{LangError, LangId, ParseOutcome, parse_with_default_timeout, with_parser};
use l1::extract_l1_from_tree;
use l2::extract_l2_from_tree;
pub fn extract_l1_l2(
lang: LangId,
source: &[u8],
eager_l2: bool,
) -> Result<(FileMapL1, Option<FileMapL2>), ExtractError> {
let outcome = with_parser(lang, |p| parse_with_default_timeout(p, source))?;
let tree = match outcome {
ParseOutcome::Ok(t) => t,
ParseOutcome::Failed => return Err(ExtractError::ParseFailure),
ParseOutcome::TimedOut => {
return Err(ExtractError::ParseTimeout(
crate::lang::DEFAULT_PARSE_TIMEOUT,
));
}
};
let l1 = extract_l1_from_tree(lang, &tree, source)?;
let l2 = if eager_l2 {
extract_l2_from_tree(lang, &tree, source).ok()
} else {
None
};
Ok((l1, l2))
}
pub const SCHEMA_VER: u16 = crate::version::RELEASE_MINOR;
#[derive(Debug, Error)]
pub enum ExtractError {
#[error("non-utf8 source")]
NonUtf8,
#[error("tree-sitter parse failure")]
ParseFailure,
#[error("tree-sitter parse timed out (> {0:?}) — file likely pathological")]
ParseTimeout(std::time::Duration),
#[error(transparent)]
Lang(#[from] LangError),
#[cfg(feature = "documents")]
#[error("kreuzberg extraction failed: {0}")]
Document(String),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FileMapL1 {
pub schema_ver: u16,
pub language: String,
pub size_bytes: u64,
pub had_errors: bool,
pub error_count: u32,
pub symbols: Vec<Symbol>,
pub imports: Vec<Import>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub implementations: Vec<Implementation>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Implementation {
pub trait_name: String,
pub impl_type: String,
pub start_byte: u32,
pub start_row: u32,
pub start_col: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Symbol {
pub name: String,
pub kind: SymbolKind,
pub start_byte: u32,
pub end_byte: u32,
pub start_row: u32,
pub start_col: u32,
pub signature: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub decorators: Vec<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum SymbolKind {
Function,
Method,
Struct,
Enum,
Class,
Interface,
Trait,
Type,
Const,
Module,
Macro,
Impl,
Namespace,
Getter,
Setter,
Unknown,
Field,
Variable,
EnumVariant,
Constructor,
Decorator,
}
impl SymbolKind {
pub fn from_capture_suffix(suffix: &str) -> Self {
match suffix {
"function" => Self::Function,
"method" => Self::Method,
"struct" => Self::Struct,
"enum" => Self::Enum,
"class" => Self::Class,
"interface" => Self::Interface,
"trait" => Self::Trait,
"type" => Self::Type,
"const" | "constant" => Self::Const,
"module" => Self::Module,
"macro" => Self::Macro,
"impl" => Self::Impl,
"namespace" => Self::Namespace,
"getter" => Self::Getter,
"setter" => Self::Setter,
"field" => Self::Field,
"variable" | "var" => Self::Variable,
"enum_variant" | "variant" => Self::EnumVariant,
"constructor" => Self::Constructor,
"decorator" => Self::Decorator,
_ => Self::Unknown,
}
}
pub(crate) fn specificity(self) -> u8 {
use SymbolKind::*;
match self {
Unknown => 0,
Const | Variable | Field | Decorator => 1,
Function | Method | Struct | Enum | Class | Interface | Trait | Type | Module
| Macro | Impl | Namespace | Getter | Setter | EnumVariant | Constructor => 2,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Import {
pub module: Option<String>,
pub raw: String,
pub start_byte: u32,
pub end_byte: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FileMapL2 {
pub schema_ver: u16,
pub language: String,
pub calls: Vec<Call>,
pub docs: Vec<DocComment>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Call {
pub callee: String,
pub start_byte: u32,
pub end_byte: u32,
#[serde(default)]
pub start_row: u32,
#[serde(default)]
pub start_col: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct DocComment {
pub text: String,
pub start_byte: u32,
pub end_byte: u32,
}