Skip to main content

argyph_parse/
types.rs

1use camino::Utf8PathBuf;
2
3/// Half-open byte range `[start, end)` in a source file.
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
5pub struct ByteRange {
6    pub start: usize,
7    pub end: usize,
8}
9
10impl ByteRange {
11    #[must_use]
12    pub fn new(start: usize, end: usize) -> Self {
13        Self { start, end }
14    }
15
16    #[must_use]
17    pub fn len(&self) -> usize {
18        self.end.saturating_sub(self.start)
19    }
20
21    #[must_use]
22    pub fn is_empty(&self) -> bool {
23        self.start >= self.end
24    }
25}
26
27/// Stable identifier for a symbol within a codebase.
28///
29/// Formed from the file path, symbol name, and byte range so it remains
30/// stable across re-indexes unless the symbol itself moves or is renamed.
31#[derive(Debug, Clone, PartialEq, Eq, Hash)]
32pub struct SymbolId(String);
33
34impl SymbolId {
35    #[must_use]
36    pub fn new(file: &Utf8PathBuf, name: &str, start: usize) -> Self {
37        Self(format!("{file}::{name}::{start}"))
38    }
39
40    #[must_use]
41    pub fn as_str(&self) -> &str {
42        &self.0
43    }
44
45    #[must_use]
46    pub fn from_raw(raw: String) -> Self {
47        Self(raw)
48    }
49}
50
51impl std::fmt::Display for SymbolId {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        f.write_str(&self.0)
54    }
55}
56
57/// The kind of a code symbol.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
59pub enum SymbolKind {
60    Function,
61    Method,
62    Struct,
63    Enum,
64    Trait,
65    Impl,
66    Class,
67    Module,
68    Variable,
69    TypeAlias,
70    Constant,
71    Interface,
72    Macro,
73    Static,
74}
75
76impl SymbolKind {
77    #[must_use]
78    pub fn as_str(&self) -> &'static str {
79        match self {
80            Self::Function => "function",
81            Self::Method => "method",
82            Self::Struct => "struct",
83            Self::Enum => "enum",
84            Self::Trait => "trait",
85            Self::Impl => "impl",
86            Self::Class => "class",
87            Self::Module => "module",
88            Self::Variable => "variable",
89            Self::TypeAlias => "type_alias",
90            Self::Constant => "constant",
91            Self::Interface => "interface",
92            Self::Macro => "macro",
93            Self::Static => "static",
94        }
95    }
96}
97
98/// A single code symbol extracted from a source file.
99#[derive(Debug, Clone)]
100pub struct Symbol {
101    pub id: SymbolId,
102    pub name: String,
103    pub kind: SymbolKind,
104    pub file: Utf8PathBuf,
105    pub range: ByteRange,
106    /// The symbol's signature text (e.g. function parameters, class declaration).
107    pub signature: Option<String>,
108    /// Parent symbol ID, if this symbol is nested inside another.
109    pub parent: Option<SymbolId>,
110}
111
112/// Content-addressed identifier for a chunk.
113///
114/// Computed as the BLAKE3 hash of whitespace-normalized chunk text.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116pub struct ChunkId(pub [u8; 32]);
117
118impl ChunkId {
119    #[must_use]
120    pub fn from_text(text: &str) -> Self {
121        let normalized = normalize_chunk_text(text);
122        Self(blake3::hash(normalized.as_bytes()).into())
123    }
124
125    #[must_use]
126    pub fn as_bytes(&self) -> &[u8; 32] {
127        &self.0
128    }
129}
130
131impl std::fmt::Display for ChunkId {
132    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
133        for byte in &self.0 {
134            write!(f, "{byte:02x}")?;
135        }
136        Ok(())
137    }
138}
139
140/// The kind of a chunk.
141#[derive(Debug, Clone, Copy, PartialEq, Eq)]
142pub enum ChunkKind {
143    /// A function or method body.
144    FunctionBody,
145    /// A type definition (struct, enum, trait, class, interface).
146    TypeDef,
147    /// Top-level code that doesn't fit into a named construct.
148    TopLevel,
149    /// A character-based fallback split (for oversized nodes).
150    Fallback,
151}
152
153/// An AST-aware chunk of source text, ready for embedding.
154#[derive(Debug, Clone)]
155pub struct Chunk {
156    pub id: ChunkId,
157    pub file: Utf8PathBuf,
158    pub range: ByteRange,
159    pub text: String,
160    pub kind: ChunkKind,
161    pub language: argyph_fs::Language,
162}
163
164/// A raw import statement, unresolved.
165///
166/// Resolution into cross-file edges is the responsibility of `argyph-graph`.
167#[derive(Debug, Clone)]
168pub struct Import {
169    /// The raw import text.
170    pub raw: String,
171    /// The module path being imported (split on `.` or `/`).
172    pub module_path: Vec<String>,
173    /// Specific items imported, if any.
174    pub items: Vec<String>,
175    /// Byte range of the import statement in the source file.
176    pub range: ByteRange,
177}
178
179/// The result of parsing a single file.
180#[derive(Debug, Clone)]
181pub struct ParsedFile {
182    pub symbols: Vec<Symbol>,
183    pub chunks: Vec<Chunk>,
184    pub imports: Vec<Import>,
185}
186
187/// Compute a normalized text for content-addressed chunk IDs.
188fn normalize_chunk_text(text: &str) -> String {
189    let mut out = String::with_capacity(text.len());
190    for ch in text.chars() {
191        if ch.is_whitespace() {
192            if !out.ends_with(' ') {
193                out.push(' ');
194            }
195        } else {
196            out.push(ch);
197        }
198    }
199    out.trim().to_string()
200}