Skip to main content

seshat_core/
ir.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3use std::path::PathBuf;
4
5use crate::error::ParseEnumError;
6
7/// Supported programming languages.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum Language {
11    Rust,
12    TypeScript,
13    JavaScript,
14    Python,
15}
16
17impl Language {
18    /// Return the canonical snake_case representation.
19    pub fn as_str(&self) -> &'static str {
20        match self {
21            Self::Rust => "rust",
22            Self::TypeScript => "typescript",
23            Self::JavaScript => "javascript",
24            Self::Python => "python",
25        }
26    }
27}
28
29impl fmt::Display for Language {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            Self::Rust => write!(f, "Rust"),
33            Self::TypeScript => write!(f, "TypeScript"),
34            Self::JavaScript => write!(f, "JavaScript"),
35            Self::Python => write!(f, "Python"),
36        }
37    }
38}
39
40impl std::str::FromStr for Language {
41    type Err = ParseEnumError;
42
43    fn from_str(s: &str) -> Result<Self, Self::Err> {
44        match s {
45            "rust" => Ok(Self::Rust),
46            "typescript" => Ok(Self::TypeScript),
47            "javascript" => Ok(Self::JavaScript),
48            "python" => Ok(Self::Python),
49            _ => Err(ParseEnumError {
50                type_name: "Language",
51                value: s.to_owned(),
52            }),
53        }
54    }
55}
56
57impl Language {
58    /// Returns file extensions associated with this language.
59    pub fn extensions(&self) -> &'static [&'static str] {
60        match self {
61            Self::Rust => &["rs"],
62            Self::TypeScript => &["ts", "tsx"],
63            Self::JavaScript => &["js", "jsx", "mjs", "cjs"],
64            Self::Python => &["py"],
65        }
66    }
67
68    /// All supported language variants for iteration.
69    pub fn all() -> &'static [Language] {
70        &[Self::Rust, Self::TypeScript, Self::JavaScript, Self::Python]
71    }
72
73    /// Detect language from a file extension (without the leading dot).
74    ///
75    /// Returns `None` for unrecognised extensions.
76    pub fn from_extension(ext: &str) -> Option<Self> {
77        match ext {
78            "rs" => Some(Self::Rust),
79            "ts" | "tsx" => Some(Self::TypeScript),
80            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
81            "py" => Some(Self::Python),
82            _ => None,
83        }
84    }
85}
86
87/// Normalized intermediate representation of a parsed source file.
88///
89/// Common fields are shared across all languages. Language-specific
90/// details live in the `language_ir` enum variant.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub struct ProjectFile {
94    pub path: PathBuf,
95    pub language: Language,
96    pub content_hash: String,
97    pub imports: Vec<Import>,
98    pub exports: Vec<Export>,
99    pub functions: Vec<Function>,
100    pub types: Vec<TypeDef>,
101    pub dependencies_used: Vec<DependencyUsage>,
102    pub language_ir: LanguageIR,
103    /// File-level doc comment extracted by the parser.
104    ///
105    /// - Rust: `//!` inner doc comment at the top of the file.
106    /// - Python: module-level docstring (first `"""..."""` or `'''...'''`).
107    /// - TypeScript/JavaScript: leading `/** ... */` or `//` comment block.
108    ///
109    /// `None` when no file-level documentation is present or the parser
110    /// has not yet been updated to extract it.
111    #[serde(default)]
112    pub file_doc: Option<String>,
113}
114
115/// An import statement extracted from source code.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub struct Import {
119    pub module: String,
120    pub names: Vec<String>,
121    pub is_type_only: bool,
122    pub line: usize,
123}
124
125/// An export declaration extracted from source code.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "snake_case")]
128pub struct Export {
129    pub name: String,
130    pub is_default: bool,
131    pub is_type_only: bool,
132    pub line: usize,
133    /// 1-indexed source line where the export declaration ends.
134    ///
135    /// Equals [`Self::line`] for single-line statements such as
136    /// `pub use foo::*;`, `export { Foo };`, or `type Alias = X;`. For
137    /// multi-line declarations (e.g. `export class Foo { ... }`) this is the
138    /// closing line of the declaration node — matching the existing
139    /// [`Function::end_line`] semantics. Hunk-intersection logic in
140    /// `map_diff_impact` uses `[line, end_line]` as the symbol's range.
141    ///
142    /// Required (no `#[serde(default)]`): IR_SCHEMA_VERSION 8 added this
143    /// field; older v7 IR rows fail StaleIR detection and are re-scanned,
144    /// so deserialisation here should never legitimately encounter a
145    /// missing value. Failing loudly surfaces actual data corruption.
146    pub end_line: usize,
147}
148
149/// A function or method definition.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151#[serde(rename_all = "snake_case")]
152pub struct Function {
153    pub name: String,
154    pub is_public: bool,
155    pub is_async: bool,
156    pub line: usize,
157    pub end_line: usize,
158    /// Parameter names extracted by tree-sitter (empty if not yet extracted).
159    #[serde(default)]
160    pub parameters: Vec<String>,
161    /// Doc comment / docstring attached to this function.
162    ///
163    /// - Rust: consecutive `///` lines immediately preceding the function.
164    /// - Python: triple-quoted string as the first statement of the body.
165    /// - TypeScript/JavaScript: JSDoc `/** ... */` comment preceding the function.
166    ///
167    /// `None` when absent or when the parser has not yet been updated.
168    #[serde(default)]
169    pub doc_comment: Option<String>,
170}
171
172/// A type definition (struct, enum, interface, class, type alias).
173#[derive(Debug, Clone, Serialize, Deserialize)]
174#[serde(rename_all = "snake_case")]
175pub struct TypeDef {
176    pub name: String,
177    pub kind: TypeDefKind,
178    pub is_public: bool,
179    pub line: usize,
180    /// 1-indexed source line where the type definition ends.
181    ///
182    /// Equals [`Self::line`] for single-line type aliases (`type Alias = X;`).
183    /// For multi-line declarations (struct, enum, trait, interface, class)
184    /// this is the closing line of the declaration node — matching the
185    /// existing [`Function::end_line`] semantics. Hunk-intersection logic in
186    /// `map_diff_impact` uses `[line, end_line]` as the symbol's range.
187    ///
188    /// Required (no `#[serde(default)]`): IR_SCHEMA_VERSION 8 added this
189    /// field; older v7 IR rows fail StaleIR detection and are re-scanned,
190    /// so deserialisation here should never legitimately encounter a
191    /// missing value. Failing loudly surfaces actual data corruption.
192    pub end_line: usize,
193    /// Doc comment attached to this type definition.
194    ///
195    /// Same conventions as [`Function::doc_comment`].
196    /// `None` when absent or parser not yet updated.
197    #[serde(default)]
198    pub doc_comment: Option<String>,
199}
200
201/// The kind of a type definition.
202#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
203#[serde(rename_all = "snake_case")]
204pub enum TypeDefKind {
205    Struct,
206    Enum,
207    Trait,
208    Interface,
209    Class,
210    TypeAlias,
211}
212
213/// A dependency usage reference found in source code.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub struct DependencyUsage {
217    pub package: String,
218    pub import_path: String,
219    pub line: usize,
220}
221
222/// Language-specific IR details.
223#[derive(Debug, Clone, Serialize, Deserialize)]
224#[serde(rename_all = "snake_case")]
225pub enum LanguageIR {
226    Rust(RustIR),
227    TypeScript(TypeScriptIR),
228    JavaScript(JavaScriptIR),
229    Python(PythonIR),
230}
231
232/// A `mod foo;` or `mod foo { ... }` declaration in a Rust file.
233#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
234#[serde(rename_all = "snake_case")]
235pub struct ModDeclaration {
236    /// Name of the declared module (e.g. `"config"`, `"tests"`).
237    pub name: String,
238    /// 1-indexed source line of the `mod` keyword.
239    pub line: usize,
240}
241
242/// A macro invocation in a Rust file (e.g. `tracing::info!(...)`, `vec![...]`).
243///
244/// Stores the full macro path as written in source and the call-site line so
245/// that detectors can point to real usage rather than import declarations.
246#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
247#[serde(rename_all = "snake_case")]
248pub struct MacroCall {
249    /// Full macro name as written, e.g. `"tracing::info"`, `"vec"`.
250    pub name: String,
251    /// 1-indexed source line of the macro invocation.
252    pub line: usize,
253}
254
255/// A function or method call-site in a Rust file.
256///
257/// Stores **one example per unique callee name** (deduplication happens in the
258/// parser).  The snippet captures a window around the call so that MCP clients
259/// can show real usage patterns without additional disk I/O at query time.
260///
261/// # Snippet layout
262///
263/// ```text
264/// [2 lines of context before the opening line]
265/// [all lines of the call expression — may span many lines for multi-arg calls]
266/// [4 lines of context after the closing line]
267/// ```
268///
269/// Hard cap: 30 lines total.  Lines are taken from the raw source file and
270/// include the original indentation.
271#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
272#[serde(rename_all = "snake_case")]
273pub struct FunctionCall {
274    /// Full callee name as written in source, e.g. `"scan_project"`,
275    /// `"Arc::new"`, `"db.execute"`, `"tracing::info"`.
276    pub callee: String,
277    /// 1-indexed line of the **opening** of the call expression (function name
278    /// position).
279    pub line: usize,
280    /// 1-indexed line of the **closing parenthesis** of the call expression.
281    /// Equals `line` for single-line calls.
282    pub end_line: usize,
283    /// Multi-line snippet centered on the call site (see type-level docs).
284    /// Empty string if source was unavailable at parse time.
285    pub snippet: String,
286}
287
288/// Rust-specific IR details.
289#[derive(Debug, Clone, Default, Serialize, Deserialize)]
290#[serde(rename_all = "snake_case")]
291pub struct RustIR {
292    pub mod_declarations: Vec<ModDeclaration>,
293    pub derive_macros: Vec<DeriveUsage>,
294    pub trait_implementations: Vec<TraitImpl>,
295    pub error_types: Vec<String>,
296    /// All macro invocations found in this file.
297    ///
298    /// Populated by the Rust tree-sitter parser.  Detectors use this to
299    /// produce call-site evidence (e.g. `tracing::info!` lines) instead of
300    /// pointing at import declarations.
301    #[serde(default)]
302    pub macro_calls: Vec<MacroCall>,
303    /// Function and method call-sites found in this file.
304    ///
305    /// Deduplicated by callee name — at most one example per unique callee.
306    /// Hard limit: 500 entries per file.  Used by `query_code_pattern` to
307    /// return real call-site snippets alongside symbol definitions.
308    #[serde(default)]
309    pub function_calls: Vec<FunctionCall>,
310}
311
312/// A `#[derive(...)]` usage.
313#[derive(Debug, Clone, Serialize, Deserialize)]
314#[serde(rename_all = "snake_case")]
315pub struct DeriveUsage {
316    pub type_name: String,
317    pub derives: Vec<String>,
318    pub line: usize,
319}
320
321/// A trait implementation (`impl Trait for Type`).
322#[derive(Debug, Clone, Serialize, Deserialize)]
323#[serde(rename_all = "snake_case")]
324pub struct TraitImpl {
325    pub trait_name: String,
326    pub type_name: String,
327    pub line: usize,
328}
329
330/// TypeScript-specific IR details.
331#[derive(Debug, Clone, Default, Serialize, Deserialize)]
332#[serde(rename_all = "snake_case")]
333pub struct TypeScriptIR {
334    pub has_barrel_exports: bool,
335    pub type_only_imports: Vec<String>,
336    pub decorators: Vec<String>,
337    pub default_export: bool,
338    /// Function and method call-sites found in this file (v7+).
339    ///
340    /// Deduplicated by callee name — at most one example per unique callee.
341    /// Hard limit: 500 entries per file.
342    #[serde(default)]
343    pub function_calls: Vec<FunctionCall>,
344}
345
346/// JavaScript-specific IR details.
347#[derive(Debug, Clone, Default, Serialize, Deserialize)]
348#[serde(rename_all = "snake_case")]
349pub struct JavaScriptIR {
350    pub module_system: ModuleSystem,
351    pub has_module_exports: bool,
352    pub require_calls: Vec<String>,
353    /// Function and method call-sites found in this file (v7+).
354    ///
355    /// Deduplicated by callee name — at most one example per unique callee.
356    /// Hard limit: 500 entries per file.  `require` calls are excluded
357    /// (already captured in `require_calls`).
358    #[serde(default)]
359    pub function_calls: Vec<FunctionCall>,
360}
361
362/// JavaScript module system.
363#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
364#[serde(rename_all = "snake_case")]
365pub enum ModuleSystem {
366    #[default]
367    Unknown,
368    CommonJS,
369    ESM,
370}
371
372/// Python-specific IR details.
373#[derive(Debug, Clone, Default, Serialize, Deserialize)]
374#[serde(rename_all = "snake_case")]
375pub struct PythonIR {
376    pub has_all_export: bool,
377    pub is_init_file: bool,
378    pub type_hints_used: bool,
379    pub decorators: Vec<String>,
380    /// Function and method call-sites found in this file (v7+).
381    ///
382    /// Deduplicated by callee name — at most one example per unique callee.
383    /// Hard limit: 500 entries per file.
384    #[serde(default)]
385    pub function_calls: Vec<FunctionCall>,
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391
392    #[test]
393    fn language_display() {
394        assert_eq!(Language::Rust.to_string(), "Rust");
395        assert_eq!(Language::TypeScript.to_string(), "TypeScript");
396        assert_eq!(Language::JavaScript.to_string(), "JavaScript");
397        assert_eq!(Language::Python.to_string(), "Python");
398    }
399
400    #[test]
401    fn language_roundtrip_str() {
402        let langs = [
403            Language::Rust,
404            Language::TypeScript,
405            Language::JavaScript,
406            Language::Python,
407        ];
408        for l in langs {
409            let parsed: Language = l.as_str().parse().unwrap();
410            assert_eq!(parsed, l);
411        }
412    }
413
414    #[test]
415    fn language_parse_unknown() {
416        assert!("go".parse::<Language>().is_err());
417    }
418
419    #[test]
420    fn language_extensions() {
421        assert_eq!(Language::Rust.extensions(), &["rs"]);
422        assert!(Language::TypeScript.extensions().contains(&"tsx"));
423        assert!(Language::JavaScript.extensions().contains(&"mjs"));
424        assert_eq!(Language::Python.extensions(), &["py"]);
425    }
426
427    #[test]
428    fn language_from_extension() {
429        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
430        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
431        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
432        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
433        assert_eq!(Language::from_extension("jsx"), Some(Language::JavaScript));
434        assert_eq!(Language::from_extension("mjs"), Some(Language::JavaScript));
435        assert_eq!(Language::from_extension("cjs"), Some(Language::JavaScript));
436        assert_eq!(Language::from_extension("py"), Some(Language::Python));
437        assert_eq!(Language::from_extension("go"), None);
438        assert_eq!(Language::from_extension(""), None);
439    }
440
441    #[test]
442    fn language_all() {
443        let all = Language::all();
444        assert_eq!(all.len(), 4);
445        assert!(all.contains(&Language::Rust));
446        assert!(all.contains(&Language::TypeScript));
447        assert!(all.contains(&Language::JavaScript));
448        assert!(all.contains(&Language::Python));
449    }
450
451    #[test]
452    fn language_ir_enum_covers_all_languages() {
453        // Verify each variant can be constructed
454        let _rust = LanguageIR::Rust(RustIR::default());
455        let _ts = LanguageIR::TypeScript(TypeScriptIR::default());
456        let _js = LanguageIR::JavaScript(JavaScriptIR::default());
457        let _py = LanguageIR::Python(PythonIR::default());
458    }
459
460    #[test]
461    fn project_file_serialization_roundtrip() {
462        let pf = ProjectFile {
463            path: PathBuf::from("src/main.rs"),
464            language: Language::Rust,
465            content_hash: "abc123".to_owned(),
466            imports: vec![Import {
467                module: "std::io".to_owned(),
468                names: vec!["Read".to_owned()],
469                is_type_only: false,
470                line: 1,
471            }],
472            exports: Vec::new(),
473            functions: vec![Function {
474                name: "main".to_owned(),
475                is_public: false,
476                is_async: false,
477                line: 3,
478                end_line: 5,
479                parameters: vec![],
480                doc_comment: None,
481            }],
482            types: Vec::new(),
483            dependencies_used: Vec::new(),
484            language_ir: LanguageIR::Rust(RustIR::default()),
485            file_doc: None,
486        };
487
488        let json = serde_json::to_string(&pf).expect("serialize");
489        let deserialized: ProjectFile = serde_json::from_str(&json).expect("deserialize");
490        assert_eq!(deserialized.path, pf.path);
491        assert_eq!(deserialized.language, pf.language);
492        assert_eq!(deserialized.content_hash, pf.content_hash);
493        assert_eq!(deserialized.imports.len(), 1);
494        assert_eq!(deserialized.functions.len(), 1);
495    }
496
497    #[test]
498    fn module_system_default_is_unknown() {
499        assert_eq!(ModuleSystem::default(), ModuleSystem::Unknown);
500    }
501}