Skip to main content

seshat_core/
ir.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3use std::path::PathBuf;
4
5use crate::error::ParseEnumError;
6
7/// Supported programming languages.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum Language {
11    Rust,
12    TypeScript,
13    JavaScript,
14    Python,
15}
16
17impl Language {
18    /// Return the canonical snake_case representation.
19    pub fn as_str(&self) -> &'static str {
20        match self {
21            Self::Rust => "rust",
22            Self::TypeScript => "typescript",
23            Self::JavaScript => "javascript",
24            Self::Python => "python",
25        }
26    }
27}
28
29impl fmt::Display for Language {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            Self::Rust => write!(f, "Rust"),
33            Self::TypeScript => write!(f, "TypeScript"),
34            Self::JavaScript => write!(f, "JavaScript"),
35            Self::Python => write!(f, "Python"),
36        }
37    }
38}
39
40impl std::str::FromStr for Language {
41    type Err = ParseEnumError;
42
43    fn from_str(s: &str) -> Result<Self, Self::Err> {
44        match s {
45            "rust" => Ok(Self::Rust),
46            "typescript" => Ok(Self::TypeScript),
47            "javascript" => Ok(Self::JavaScript),
48            "python" => Ok(Self::Python),
49            _ => Err(ParseEnumError {
50                type_name: "Language",
51                value: s.to_owned(),
52            }),
53        }
54    }
55}
56
57impl Language {
58    /// Returns file extensions associated with this language.
59    pub fn extensions(&self) -> &'static [&'static str] {
60        match self {
61            Self::Rust => &["rs"],
62            Self::TypeScript => &["ts", "tsx"],
63            Self::JavaScript => &["js", "jsx", "mjs", "cjs"],
64            Self::Python => &["py"],
65        }
66    }
67
68    /// All supported language variants for iteration.
69    pub fn all() -> &'static [Language] {
70        &[Self::Rust, Self::TypeScript, Self::JavaScript, Self::Python]
71    }
72
73    /// Detect language from a file extension (without the leading dot).
74    ///
75    /// Returns `None` for unrecognised extensions.
76    pub fn from_extension(ext: &str) -> Option<Self> {
77        match ext {
78            "rs" => Some(Self::Rust),
79            "ts" | "tsx" => Some(Self::TypeScript),
80            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
81            "py" => Some(Self::Python),
82            _ => None,
83        }
84    }
85
86    /// Visibility/export marker rendered before a public symbol in this
87    /// language's syntax. Empty string when the symbol is private or when the
88    /// language has no syntactic visibility keyword.
89    ///
90    /// Used by [`crate::symbol_snippet`] so synthetic definition snippets read
91    /// natively for each language instead of always borrowing Rust's `pub`.
92    ///
93    /// - Rust: `pub ` for public, `""` for private.
94    /// - TypeScript / JavaScript: `export ` for public, `""` for private.
95    ///   (TS/JS parsers set `is_public = true` exactly when a symbol carries
96    ///   the `export` keyword.)
97    /// - Python: always `""` — Python has no syntactic visibility marker.
98    #[must_use]
99    pub fn visibility_keyword(self, is_public: bool) -> &'static str {
100        if !is_public {
101            return "";
102        }
103        match self {
104            Self::Rust => "pub ",
105            Self::TypeScript | Self::JavaScript => "export ",
106            Self::Python => "",
107        }
108    }
109
110    /// Source keyword for declaring a function in this language: `fn` /
111    /// `function` / `def`.
112    #[must_use]
113    pub fn function_keyword(self) -> &'static str {
114        match self {
115            Self::Rust => "fn",
116            Self::TypeScript | Self::JavaScript => "function",
117            Self::Python => "def",
118        }
119    }
120}
121
122/// Normalized intermediate representation of a parsed source file.
123///
124/// Common fields are shared across all languages. Language-specific
125/// details live in the `language_ir` enum variant.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "snake_case")]
128pub struct ProjectFile {
129    pub path: PathBuf,
130    pub language: Language,
131    pub content_hash: String,
132    pub imports: Vec<Import>,
133    pub exports: Vec<Export>,
134    pub functions: Vec<Function>,
135    pub types: Vec<TypeDef>,
136    pub dependencies_used: Vec<DependencyUsage>,
137    pub language_ir: LanguageIR,
138    /// File-level doc comment extracted by the parser.
139    ///
140    /// - Rust: `//!` inner doc comment at the top of the file.
141    /// - Python: module-level docstring (first `"""..."""` or `'''...'''`).
142    /// - TypeScript/JavaScript: leading `/** ... */` or `//` comment block.
143    ///
144    /// `None` when no file-level documentation is present or the parser
145    /// has not yet been updated to extract it.
146    #[serde(default)]
147    pub file_doc: Option<String>,
148}
149
150/// An import statement extracted from source code.
151#[derive(Debug, Clone, Serialize, Deserialize)]
152#[serde(rename_all = "snake_case")]
153pub struct Import {
154    pub module: String,
155    pub names: Vec<String>,
156    pub is_type_only: bool,
157    pub line: usize,
158}
159
160/// An export declaration extracted from source code.
161#[derive(Debug, Clone, Serialize, Deserialize)]
162#[serde(rename_all = "snake_case")]
163pub struct Export {
164    pub name: String,
165    pub is_default: bool,
166    pub is_type_only: bool,
167    pub line: usize,
168    /// 1-indexed source line where the export declaration ends.
169    ///
170    /// Equals [`Self::line`] for single-line statements such as
171    /// `pub use foo::*;`, `export { Foo };`, or `type Alias = X;`. For
172    /// multi-line declarations (e.g. `export class Foo { ... }`) this is the
173    /// closing line of the declaration node — matching the existing
174    /// [`Function::end_line`] semantics. Hunk-intersection logic in
175    /// `map_diff_impact` uses `[line, end_line]` as the symbol's range.
176    ///
177    /// Required (no `#[serde(default)]`): IR_SCHEMA_VERSION 8 added this
178    /// field; older v7 IR rows fail StaleIR detection and are re-scanned,
179    /// so deserialisation here should never legitimately encounter a
180    /// missing value. Failing loudly surfaces actual data corruption.
181    pub end_line: usize,
182}
183
184/// A function or method definition.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186#[serde(rename_all = "snake_case")]
187pub struct Function {
188    pub name: String,
189    pub is_public: bool,
190    pub is_async: bool,
191    pub line: usize,
192    pub end_line: usize,
193    /// Parameter names extracted by tree-sitter (empty if not yet extracted).
194    #[serde(default)]
195    pub parameters: Vec<String>,
196    /// Doc comment / docstring attached to this function.
197    ///
198    /// - Rust: consecutive `///` lines immediately preceding the function.
199    /// - Python: triple-quoted string as the first statement of the body.
200    /// - TypeScript/JavaScript: JSDoc `/** ... */` comment preceding the function.
201    ///
202    /// `None` when absent or when the parser has not yet been updated.
203    #[serde(default)]
204    pub doc_comment: Option<String>,
205}
206
207/// A type definition (struct, enum, interface, class, type alias).
208#[derive(Debug, Clone, Serialize, Deserialize)]
209#[serde(rename_all = "snake_case")]
210pub struct TypeDef {
211    pub name: String,
212    pub kind: TypeDefKind,
213    pub is_public: bool,
214    pub line: usize,
215    /// 1-indexed source line where the type definition ends.
216    ///
217    /// Equals [`Self::line`] for single-line type aliases (`type Alias = X;`).
218    /// For multi-line declarations (struct, enum, trait, interface, class)
219    /// this is the closing line of the declaration node — matching the
220    /// existing [`Function::end_line`] semantics. Hunk-intersection logic in
221    /// `map_diff_impact` uses `[line, end_line]` as the symbol's range.
222    ///
223    /// Required (no `#[serde(default)]`): IR_SCHEMA_VERSION 8 added this
224    /// field; older v7 IR rows fail StaleIR detection and are re-scanned,
225    /// so deserialisation here should never legitimately encounter a
226    /// missing value. Failing loudly surfaces actual data corruption.
227    pub end_line: usize,
228    /// Doc comment attached to this type definition.
229    ///
230    /// Same conventions as [`Function::doc_comment`].
231    /// `None` when absent or parser not yet updated.
232    #[serde(default)]
233    pub doc_comment: Option<String>,
234}
235
236/// The kind of a type definition.
237#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
238#[serde(rename_all = "snake_case")]
239pub enum TypeDefKind {
240    Struct,
241    Enum,
242    Trait,
243    Interface,
244    Class,
245    TypeAlias,
246}
247
248impl TypeDefKind {
249    /// Source keyword used to declare a type of this kind: `struct` / `enum` /
250    /// `trait` / `interface` / `class` / `type`.
251    ///
252    /// Note `TypeAlias` renders as `type`, matching both Rust's `type Foo =
253    /// …;` and TS's `type Foo = …;`. The old debug-derived spelling
254    /// `typealias` was not valid syntax in any supported language.
255    #[must_use]
256    pub fn keyword(&self) -> &'static str {
257        match self {
258            Self::Struct => "struct",
259            Self::Enum => "enum",
260            Self::Trait => "trait",
261            Self::Interface => "interface",
262            Self::Class => "class",
263            Self::TypeAlias => "type",
264        }
265    }
266}
267
268/// A dependency usage reference found in source code.
269#[derive(Debug, Clone, Serialize, Deserialize)]
270#[serde(rename_all = "snake_case")]
271pub struct DependencyUsage {
272    pub package: String,
273    pub import_path: String,
274    pub line: usize,
275}
276
277/// Language-specific IR details.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279#[serde(rename_all = "snake_case")]
280pub enum LanguageIR {
281    Rust(RustIR),
282    TypeScript(TypeScriptIR),
283    JavaScript(JavaScriptIR),
284    Python(PythonIR),
285}
286
287/// A `mod foo;` or `mod foo { ... }` declaration in a Rust file.
288#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
289#[serde(rename_all = "snake_case")]
290pub struct ModDeclaration {
291    /// Name of the declared module (e.g. `"config"`, `"tests"`).
292    pub name: String,
293    /// 1-indexed source line of the `mod` keyword.
294    pub line: usize,
295}
296
297/// A macro invocation in a Rust file (e.g. `tracing::info!(...)`, `vec![...]`).
298///
299/// Stores the full macro path as written in source and the call-site line so
300/// that detectors can point to real usage rather than import declarations.
301#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
302#[serde(rename_all = "snake_case")]
303pub struct MacroCall {
304    /// Full macro name as written, e.g. `"tracing::info"`, `"vec"`.
305    pub name: String,
306    /// 1-indexed source line of the macro invocation.
307    pub line: usize,
308}
309
310/// A function or method call-site in a Rust file.
311///
312/// Stores **one example per unique callee name** (deduplication happens in the
313/// parser).  The snippet captures a window around the call so that MCP clients
314/// can show real usage patterns without additional disk I/O at query time.
315///
316/// # Snippet layout
317///
318/// ```text
319/// [2 lines of context before the opening line]
320/// [all lines of the call expression — may span many lines for multi-arg calls]
321/// [4 lines of context after the closing line]
322/// ```
323///
324/// Hard cap: 30 lines total.  Lines are taken from the raw source file and
325/// include the original indentation.
326#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
327#[serde(rename_all = "snake_case")]
328pub struct FunctionCall {
329    /// Full callee name as written in source, e.g. `"scan_project"`,
330    /// `"Arc::new"`, `"db.execute"`, `"tracing::info"`.
331    pub callee: String,
332    /// 1-indexed line of the **opening** of the call expression (function name
333    /// position).
334    pub line: usize,
335    /// 1-indexed line of the **closing parenthesis** of the call expression.
336    /// Equals `line` for single-line calls.
337    pub end_line: usize,
338    /// Multi-line snippet centered on the call site (see type-level docs).
339    /// Empty string if source was unavailable at parse time.
340    pub snippet: String,
341}
342
343/// Rust-specific IR details.
344#[derive(Debug, Clone, Default, Serialize, Deserialize)]
345#[serde(rename_all = "snake_case")]
346pub struct RustIR {
347    pub mod_declarations: Vec<ModDeclaration>,
348    pub derive_macros: Vec<DeriveUsage>,
349    pub trait_implementations: Vec<TraitImpl>,
350    pub error_types: Vec<String>,
351    /// All macro invocations found in this file.
352    ///
353    /// Populated by the Rust tree-sitter parser.  Detectors use this to
354    /// produce call-site evidence (e.g. `tracing::info!` lines) instead of
355    /// pointing at import declarations.
356    #[serde(default)]
357    pub macro_calls: Vec<MacroCall>,
358    /// Function and method call-sites found in this file.
359    ///
360    /// Deduplicated by callee name — at most one example per unique callee.
361    /// Hard limit: 500 entries per file.  Used by `query_code_pattern` to
362    /// return real call-site snippets alongside symbol definitions.
363    #[serde(default)]
364    pub function_calls: Vec<FunctionCall>,
365}
366
367/// A `#[derive(...)]` usage.
368#[derive(Debug, Clone, Serialize, Deserialize)]
369#[serde(rename_all = "snake_case")]
370pub struct DeriveUsage {
371    pub type_name: String,
372    pub derives: Vec<String>,
373    pub line: usize,
374}
375
376/// A trait implementation (`impl Trait for Type`).
377#[derive(Debug, Clone, Serialize, Deserialize)]
378#[serde(rename_all = "snake_case")]
379pub struct TraitImpl {
380    pub trait_name: String,
381    pub type_name: String,
382    pub line: usize,
383}
384
385/// TypeScript-specific IR details.
386#[derive(Debug, Clone, Default, Serialize, Deserialize)]
387#[serde(rename_all = "snake_case")]
388pub struct TypeScriptIR {
389    pub has_barrel_exports: bool,
390    pub type_only_imports: Vec<String>,
391    pub decorators: Vec<String>,
392    pub default_export: bool,
393    /// Function and method call-sites found in this file (v7+).
394    ///
395    /// Deduplicated by callee name — at most one example per unique callee.
396    /// Hard limit: 500 entries per file.
397    #[serde(default)]
398    pub function_calls: Vec<FunctionCall>,
399}
400
401/// JavaScript-specific IR details.
402#[derive(Debug, Clone, Default, Serialize, Deserialize)]
403#[serde(rename_all = "snake_case")]
404pub struct JavaScriptIR {
405    pub module_system: ModuleSystem,
406    pub has_module_exports: bool,
407    pub require_calls: Vec<String>,
408    /// Function and method call-sites found in this file (v7+).
409    ///
410    /// Deduplicated by callee name — at most one example per unique callee.
411    /// Hard limit: 500 entries per file.  `require` calls are excluded
412    /// (already captured in `require_calls`).
413    #[serde(default)]
414    pub function_calls: Vec<FunctionCall>,
415}
416
417/// JavaScript module system.
418#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
419#[serde(rename_all = "snake_case")]
420pub enum ModuleSystem {
421    #[default]
422    Unknown,
423    CommonJS,
424    ESM,
425}
426
427/// Python-specific IR details.
428#[derive(Debug, Clone, Default, Serialize, Deserialize)]
429#[serde(rename_all = "snake_case")]
430pub struct PythonIR {
431    pub has_all_export: bool,
432    pub is_init_file: bool,
433    pub type_hints_used: bool,
434    pub decorators: Vec<String>,
435    /// Function and method call-sites found in this file (v7+).
436    ///
437    /// Deduplicated by callee name — at most one example per unique callee.
438    /// Hard limit: 500 entries per file.
439    #[serde(default)]
440    pub function_calls: Vec<FunctionCall>,
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn language_display() {
449        assert_eq!(Language::Rust.to_string(), "Rust");
450        assert_eq!(Language::TypeScript.to_string(), "TypeScript");
451        assert_eq!(Language::JavaScript.to_string(), "JavaScript");
452        assert_eq!(Language::Python.to_string(), "Python");
453    }
454
455    #[test]
456    fn language_roundtrip_str() {
457        let langs = [
458            Language::Rust,
459            Language::TypeScript,
460            Language::JavaScript,
461            Language::Python,
462        ];
463        for l in langs {
464            let parsed: Language = l.as_str().parse().unwrap();
465            assert_eq!(parsed, l);
466        }
467    }
468
469    #[test]
470    fn language_parse_unknown() {
471        assert!("go".parse::<Language>().is_err());
472    }
473
474    #[test]
475    fn language_extensions() {
476        assert_eq!(Language::Rust.extensions(), &["rs"]);
477        assert!(Language::TypeScript.extensions().contains(&"tsx"));
478        assert!(Language::JavaScript.extensions().contains(&"mjs"));
479        assert_eq!(Language::Python.extensions(), &["py"]);
480    }
481
482    #[test]
483    fn language_from_extension() {
484        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
485        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
486        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
487        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
488        assert_eq!(Language::from_extension("jsx"), Some(Language::JavaScript));
489        assert_eq!(Language::from_extension("mjs"), Some(Language::JavaScript));
490        assert_eq!(Language::from_extension("cjs"), Some(Language::JavaScript));
491        assert_eq!(Language::from_extension("py"), Some(Language::Python));
492        assert_eq!(Language::from_extension("go"), None);
493        assert_eq!(Language::from_extension(""), None);
494    }
495
496    #[test]
497    fn language_all() {
498        let all = Language::all();
499        assert_eq!(all.len(), 4);
500        assert!(all.contains(&Language::Rust));
501        assert!(all.contains(&Language::TypeScript));
502        assert!(all.contains(&Language::JavaScript));
503        assert!(all.contains(&Language::Python));
504    }
505
506    #[test]
507    fn language_ir_enum_covers_all_languages() {
508        // Verify each variant can be constructed
509        let _rust = LanguageIR::Rust(RustIR::default());
510        let _ts = LanguageIR::TypeScript(TypeScriptIR::default());
511        let _js = LanguageIR::JavaScript(JavaScriptIR::default());
512        let _py = LanguageIR::Python(PythonIR::default());
513    }
514
515    #[test]
516    fn project_file_serialization_roundtrip() {
517        let pf = ProjectFile {
518            path: PathBuf::from("src/main.rs"),
519            language: Language::Rust,
520            content_hash: "abc123".to_owned(),
521            imports: vec![Import {
522                module: "std::io".to_owned(),
523                names: vec!["Read".to_owned()],
524                is_type_only: false,
525                line: 1,
526            }],
527            exports: Vec::new(),
528            functions: vec![Function {
529                name: "main".to_owned(),
530                is_public: false,
531                is_async: false,
532                line: 3,
533                end_line: 5,
534                parameters: vec![],
535                doc_comment: None,
536            }],
537            types: Vec::new(),
538            dependencies_used: Vec::new(),
539            language_ir: LanguageIR::Rust(RustIR::default()),
540            file_doc: None,
541        };
542
543        let json = serde_json::to_string(&pf).expect("serialize");
544        let deserialized: ProjectFile = serde_json::from_str(&json).expect("deserialize");
545        assert_eq!(deserialized.path, pf.path);
546        assert_eq!(deserialized.language, pf.language);
547        assert_eq!(deserialized.content_hash, pf.content_hash);
548        assert_eq!(deserialized.imports.len(), 1);
549        assert_eq!(deserialized.functions.len(), 1);
550    }
551
552    #[test]
553    fn module_system_default_is_unknown() {
554        assert_eq!(ModuleSystem::default(), ModuleSystem::Unknown);
555    }
556}