dossier_core/
lib.rs

1use std::{
2    fmt::Display,
3    path::{Path, PathBuf},
4    str::Utf8Error,
5};
6
7use serde::Serialize;
8use thiserror::Error;
9
10pub use indexmap;
11pub use serde_json;
12pub use tree_sitter;
13
14pub type Result<T> = std::result::Result<T, DossierError>;
15
16#[derive(Error, Debug)]
17pub enum DossierError {
18    UTF8Error(Utf8Error),
19}
20
21impl Display for DossierError {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        use DossierError::*;
24        match &self {
25            UTF8Error(error) => {
26                write!(f, "UTF8Error: {:?}", error)
27            }
28        }
29    }
30}
31
32pub type MarkdownString = String;
33pub type FullyQualifiedName = String;
34
35#[derive(Debug, Clone, Serialize, PartialEq)]
36pub enum Identity {
37    /// The fully qualified name of an entity
38    #[serde(rename = "fqn")]
39    FQN(FullyQualifiedName),
40    /// A reference to another entity via its fully qualified name
41    #[serde(rename = "refers_to")]
42    Reference(FullyQualifiedName),
43    #[serde(skip_serializing)]
44    Anonymous,
45}
46
47impl Identity {
48    pub fn is_anonymous(&self) -> bool {
49        matches!(self, Identity::Anonymous)
50    }
51}
52
53#[derive(Debug, Clone, Serialize, PartialEq)]
54pub struct Entity {
55    /// The title for the entity. Usually the name of the class/function/module, etc.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub title: Option<String>,
58    /// A description for the entity. Supports Markdown.
59    pub description: MarkdownString,
60    /// The type of the entity. E.g. function, class, module.
61    /// Each language will have a different set of entities.
62    pub kind: String,
63    /// The identity of the entity: either its fully qualified name, or a reference to another entity
64    /// via its fully qualified name.
65    ///
66    /// E.g. a class declaration will have an identity of its fully qualified name, but a
67    /// function's return position will have an reference to another entity that describes its type.
68    #[serde(flatten)]
69    #[serde(skip_serializing_if = "Identity::is_anonymous")]
70    pub identity: Identity,
71    /// Child entities. E.g. classes may contain functions, modules may have child modules, etc.
72    #[serde(skip_serializing_if = "Vec::is_empty")]
73    pub members: Vec<Entity>,
74    /// What context the entity is in. E.g. a type may be describing a parameter to a function, or a return type.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub member_context: Option<String>,
77    /// The language of the entity
78    pub language: String,
79    /// The language of the entity
80    pub source: Source,
81    /// Arbitrary metadata different types of entities need to store
82    #[serde(skip_serializing_if = "value_is_empty")]
83    pub meta: serde_json::Value,
84}
85
86fn value_is_empty(value: &serde_json::Value) -> bool {
87    value.is_null() || value.as_object().map(|o| o.is_empty()).unwrap_or(false)
88}
89
90#[derive(Debug, Clone, Serialize, PartialEq)]
91/// Position in a source file.
92///
93/// Contains the row and column number, as well as the byte offset from the start of the file,
94/// since different situations may call for one of the other.
95pub struct Position {
96    /// The line number of the entity in the source file
97    pub row: usize,
98    /// The column number on the line
99    pub column: usize,
100    /// Byte offset from the start of the file for the entity
101    pub byte_offset: usize,
102}
103
104#[derive(Debug, Clone, Serialize, PartialEq)]
105/// Metadata about the source of an `Entity`
106pub struct Source {
107    pub file: PathBuf,
108    /// Start position in the source file
109    pub start: Position,
110    /// End position in the source file
111    pub end: Position,
112    /// Optional: Git repository URL for the file
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub repository: Option<String>,
115}
116
117#[derive(Debug, Clone, Default)]
118/// A config passed into parsers.
119///
120/// Placeholder for now, but in the future could contain information
121/// about the parsing context like the current repository, etc.
122pub struct Context {
123    namespace: Vec<String>,
124}
125
126impl<'a> Context {
127    pub fn new() -> Self {
128        Self { namespace: vec![] }
129    }
130
131    /// Generates a fully qualified name (FQN) from a path, the current namespace,
132    /// and a list of parts
133    ///
134    /// For example, a file src/foo/bar.ts and parts of [Interface, methodName]
135    /// would yield a FQN of `src/foo.bar/ts::Interface::methodName`
136    ///
137    /// This function is operating-system independent, and will always use `/` as
138    /// the path separator.
139    pub fn generate_fqn<T>(&self, path: &Path, parts: T) -> String
140    where
141        T: IntoIterator<Item = &'a str>,
142    {
143        let mut fqn = format!("{}", path.display()).replace('\\', "/");
144
145        for part in &self.namespace {
146            fqn.push_str(&format!("::{}", part));
147        }
148
149        for part in parts {
150            fqn.push_str(&format!("::{}", part));
151        }
152
153        fqn
154    }
155
156    pub fn push_namespace(&mut self, namespace: &str) {
157        self.namespace.push(namespace.to_owned());
158    }
159
160    pub fn pop_namespace(&mut self) {
161        self.namespace.pop();
162    }
163}
164
165/// The trait for implementing language-specific parsers
166pub trait DocsParser {
167    /// Given a pathname to an entry point, return a list of entities
168    fn parse<'a, P: Into<&'a Path>, T: IntoIterator<Item = P>>(
169        &self,
170        paths: T,
171        ctx: &mut Context,
172    ) -> Result<Vec<Entity>>;
173}
174
175pub trait FileSource {
176    fn read_file<'a, P: Into<&'a Path>>(&self, path: P) -> std::io::Result<String>;
177}
178
179pub struct FileSystem;
180
181impl FileSource for FileSystem {
182    fn read_file<'a, P: Into<&'a Path>>(&self, path: P) -> std::io::Result<String> {
183        std::fs::read_to_string(path.into())
184    }
185}
186
187pub struct InMemoryFileSystem {
188    pub files: indexmap::IndexMap<PathBuf, String>,
189}
190
191impl FileSource for InMemoryFileSystem {
192    fn read_file<'a, P: Into<&'a Path>>(&self, path: P) -> std::io::Result<String> {
193        let path: &Path = path.into();
194        self.files
195            .get(path)
196            .map(|s| s.to_owned())
197            .ok_or(std::io::Error::new(
198                std::io::ErrorKind::NotFound,
199                format!("File not found: {}", path.display()),
200            ))
201    }
202}
203
204pub mod helpers {
205    use super::*;
206    use tree_sitter::{Node, Query, QueryCapture};
207
208    pub fn node_for_capture<'a>(
209        name: &str,
210        captures: &'a [QueryCapture<'a>],
211        query: &Query,
212    ) -> Option<Node<'a>> {
213        query
214            .capture_index_for_name(name)
215            .and_then(|index| captures.iter().find(|c| c.index == index))
216            .map(|c| c.node)
217    }
218
219    pub fn get_string_from_match<'a>(
220        captures: &'a [QueryCapture],
221        index: u32,
222        code: &'a str,
223    ) -> Option<crate::Result<&'a str>> {
224        captures.iter().find(|c| c.index == index).map(|capture| {
225            capture
226                .node
227                .utf8_text(code.as_bytes())
228                .map_err(DossierError::UTF8Error)
229        })
230    }
231}