Skip to main content

seshat_core/
ir.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3use std::path::PathBuf;
4
5use crate::error::ParseEnumError;
6
7/// Supported programming languages.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum Language {
11    Rust,
12    TypeScript,
13    JavaScript,
14    Python,
15}
16
17impl Language {
18    /// Return the canonical snake_case representation.
19    pub fn as_str(&self) -> &'static str {
20        match self {
21            Self::Rust => "rust",
22            Self::TypeScript => "typescript",
23            Self::JavaScript => "javascript",
24            Self::Python => "python",
25        }
26    }
27}
28
29impl fmt::Display for Language {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            Self::Rust => write!(f, "Rust"),
33            Self::TypeScript => write!(f, "TypeScript"),
34            Self::JavaScript => write!(f, "JavaScript"),
35            Self::Python => write!(f, "Python"),
36        }
37    }
38}
39
40impl std::str::FromStr for Language {
41    type Err = ParseEnumError;
42
43    fn from_str(s: &str) -> Result<Self, Self::Err> {
44        match s {
45            "rust" => Ok(Self::Rust),
46            "typescript" => Ok(Self::TypeScript),
47            "javascript" => Ok(Self::JavaScript),
48            "python" => Ok(Self::Python),
49            _ => Err(ParseEnumError {
50                type_name: "Language",
51                value: s.to_owned(),
52            }),
53        }
54    }
55}
56
57impl Language {
58    /// Returns file extensions associated with this language.
59    pub fn extensions(&self) -> &'static [&'static str] {
60        match self {
61            Self::Rust => &["rs"],
62            Self::TypeScript => &["ts", "tsx"],
63            Self::JavaScript => &["js", "jsx", "mjs", "cjs"],
64            Self::Python => &["py"],
65        }
66    }
67
68    /// All supported language variants for iteration.
69    pub fn all() -> &'static [Language] {
70        &[Self::Rust, Self::TypeScript, Self::JavaScript, Self::Python]
71    }
72
73    /// Detect language from a file extension (without the leading dot).
74    ///
75    /// Returns `None` for unrecognised extensions.
76    pub fn from_extension(ext: &str) -> Option<Self> {
77        match ext {
78            "rs" => Some(Self::Rust),
79            "ts" | "tsx" => Some(Self::TypeScript),
80            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
81            "py" => Some(Self::Python),
82            _ => None,
83        }
84    }
85}
86
87/// Normalized intermediate representation of a parsed source file.
88///
89/// Common fields are shared across all languages. Language-specific
90/// details live in the `language_ir` enum variant.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub struct ProjectFile {
94    pub path: PathBuf,
95    pub language: Language,
96    pub content_hash: String,
97    pub imports: Vec<Import>,
98    pub exports: Vec<Export>,
99    pub functions: Vec<Function>,
100    pub types: Vec<TypeDef>,
101    pub dependencies_used: Vec<DependencyUsage>,
102    pub language_ir: LanguageIR,
103    /// File-level doc comment extracted by the parser.
104    ///
105    /// - Rust: `//!` inner doc comment at the top of the file.
106    /// - Python: module-level docstring (first `"""..."""` or `'''...'''`).
107    /// - TypeScript/JavaScript: leading `/** ... */` or `//` comment block.
108    ///
109    /// `None` when no file-level documentation is present or the parser
110    /// has not yet been updated to extract it.
111    #[serde(default)]
112    pub file_doc: Option<String>,
113}
114
115/// An import statement extracted from source code.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub struct Import {
119    pub module: String,
120    pub names: Vec<String>,
121    pub is_type_only: bool,
122    pub line: usize,
123}
124
125/// An export declaration extracted from source code.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "snake_case")]
128pub struct Export {
129    pub name: String,
130    pub is_default: bool,
131    pub is_type_only: bool,
132    pub line: usize,
133}
134
135/// A function or method definition.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137#[serde(rename_all = "snake_case")]
138pub struct Function {
139    pub name: String,
140    pub is_public: bool,
141    pub is_async: bool,
142    pub line: usize,
143    pub end_line: usize,
144    /// Parameter names extracted by tree-sitter (empty if not yet extracted).
145    #[serde(default)]
146    pub parameters: Vec<String>,
147    /// Doc comment / docstring attached to this function.
148    ///
149    /// - Rust: consecutive `///` lines immediately preceding the function.
150    /// - Python: triple-quoted string as the first statement of the body.
151    /// - TypeScript/JavaScript: JSDoc `/** ... */` comment preceding the function.
152    ///
153    /// `None` when absent or when the parser has not yet been updated.
154    #[serde(default)]
155    pub doc_comment: Option<String>,
156}
157
158/// A type definition (struct, enum, interface, class, type alias).
159#[derive(Debug, Clone, Serialize, Deserialize)]
160#[serde(rename_all = "snake_case")]
161pub struct TypeDef {
162    pub name: String,
163    pub kind: TypeDefKind,
164    pub is_public: bool,
165    pub line: usize,
166    /// Doc comment attached to this type definition.
167    ///
168    /// Same conventions as [`Function::doc_comment`].
169    /// `None` when absent or parser not yet updated.
170    #[serde(default)]
171    pub doc_comment: Option<String>,
172}
173
174/// The kind of a type definition.
175#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
176#[serde(rename_all = "snake_case")]
177pub enum TypeDefKind {
178    Struct,
179    Enum,
180    Trait,
181    Interface,
182    Class,
183    TypeAlias,
184}
185
186/// A dependency usage reference found in source code.
187#[derive(Debug, Clone, Serialize, Deserialize)]
188#[serde(rename_all = "snake_case")]
189pub struct DependencyUsage {
190    pub package: String,
191    pub import_path: String,
192    pub line: usize,
193}
194
195/// Language-specific IR details.
196#[derive(Debug, Clone, Serialize, Deserialize)]
197#[serde(rename_all = "snake_case")]
198pub enum LanguageIR {
199    Rust(RustIR),
200    TypeScript(TypeScriptIR),
201    JavaScript(JavaScriptIR),
202    Python(PythonIR),
203}
204
205/// A `mod foo;` or `mod foo { ... }` declaration in a Rust file.
206#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
207#[serde(rename_all = "snake_case")]
208pub struct ModDeclaration {
209    /// Name of the declared module (e.g. `"config"`, `"tests"`).
210    pub name: String,
211    /// 1-indexed source line of the `mod` keyword.
212    pub line: usize,
213}
214
215/// A macro invocation in a Rust file (e.g. `tracing::info!(...)`, `vec![...]`).
216///
217/// Stores the full macro path as written in source and the call-site line so
218/// that detectors can point to real usage rather than import declarations.
219#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
220#[serde(rename_all = "snake_case")]
221pub struct MacroCall {
222    /// Full macro name as written, e.g. `"tracing::info"`, `"vec"`.
223    pub name: String,
224    /// 1-indexed source line of the macro invocation.
225    pub line: usize,
226}
227
228/// A function or method call-site in a Rust file.
229///
230/// Stores **one example per unique callee name** (deduplication happens in the
231/// parser).  The snippet captures a window around the call so that MCP clients
232/// can show real usage patterns without additional disk I/O at query time.
233///
234/// # Snippet layout
235///
236/// ```text
237/// [2 lines of context before the opening line]
238/// [all lines of the call expression — may span many lines for multi-arg calls]
239/// [4 lines of context after the closing line]
240/// ```
241///
242/// Hard cap: 30 lines total.  Lines are taken from the raw source file and
243/// include the original indentation.
244#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
245#[serde(rename_all = "snake_case")]
246pub struct FunctionCall {
247    /// Full callee name as written in source, e.g. `"scan_project"`,
248    /// `"Arc::new"`, `"db.execute"`, `"tracing::info"`.
249    pub callee: String,
250    /// 1-indexed line of the **opening** of the call expression (function name
251    /// position).
252    pub line: usize,
253    /// 1-indexed line of the **closing parenthesis** of the call expression.
254    /// Equals `line` for single-line calls.
255    pub end_line: usize,
256    /// Multi-line snippet centered on the call site (see type-level docs).
257    /// Empty string if source was unavailable at parse time.
258    pub snippet: String,
259}
260
261/// Rust-specific IR details.
262#[derive(Debug, Clone, Default, Serialize, Deserialize)]
263#[serde(rename_all = "snake_case")]
264pub struct RustIR {
265    pub mod_declarations: Vec<ModDeclaration>,
266    pub derive_macros: Vec<DeriveUsage>,
267    pub trait_implementations: Vec<TraitImpl>,
268    pub error_types: Vec<String>,
269    /// All macro invocations found in this file.
270    ///
271    /// Populated by the Rust tree-sitter parser.  Detectors use this to
272    /// produce call-site evidence (e.g. `tracing::info!` lines) instead of
273    /// pointing at import declarations.
274    #[serde(default)]
275    pub macro_calls: Vec<MacroCall>,
276    /// Function and method call-sites found in this file.
277    ///
278    /// Deduplicated by callee name — at most one example per unique callee.
279    /// Hard limit: 500 entries per file.  Used by `query_code_pattern` to
280    /// return real call-site snippets alongside symbol definitions.
281    #[serde(default)]
282    pub function_calls: Vec<FunctionCall>,
283}
284
285/// A `#[derive(...)]` usage.
286#[derive(Debug, Clone, Serialize, Deserialize)]
287#[serde(rename_all = "snake_case")]
288pub struct DeriveUsage {
289    pub type_name: String,
290    pub derives: Vec<String>,
291    pub line: usize,
292}
293
294/// A trait implementation (`impl Trait for Type`).
295#[derive(Debug, Clone, Serialize, Deserialize)]
296#[serde(rename_all = "snake_case")]
297pub struct TraitImpl {
298    pub trait_name: String,
299    pub type_name: String,
300    pub line: usize,
301}
302
303/// TypeScript-specific IR details.
304#[derive(Debug, Clone, Default, Serialize, Deserialize)]
305#[serde(rename_all = "snake_case")]
306pub struct TypeScriptIR {
307    pub has_barrel_exports: bool,
308    pub type_only_imports: Vec<String>,
309    pub decorators: Vec<String>,
310    pub default_export: bool,
311    /// Function and method call-sites found in this file (v7+).
312    ///
313    /// Deduplicated by callee name — at most one example per unique callee.
314    /// Hard limit: 500 entries per file.
315    #[serde(default)]
316    pub function_calls: Vec<FunctionCall>,
317}
318
319/// JavaScript-specific IR details.
320#[derive(Debug, Clone, Default, Serialize, Deserialize)]
321#[serde(rename_all = "snake_case")]
322pub struct JavaScriptIR {
323    pub module_system: ModuleSystem,
324    pub has_module_exports: bool,
325    pub require_calls: Vec<String>,
326    /// Function and method call-sites found in this file (v7+).
327    ///
328    /// Deduplicated by callee name — at most one example per unique callee.
329    /// Hard limit: 500 entries per file.  `require` calls are excluded
330    /// (already captured in `require_calls`).
331    #[serde(default)]
332    pub function_calls: Vec<FunctionCall>,
333}
334
335/// JavaScript module system.
336#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ModuleSystem {
339    #[default]
340    Unknown,
341    CommonJS,
342    ESM,
343}
344
345/// Python-specific IR details.
346#[derive(Debug, Clone, Default, Serialize, Deserialize)]
347#[serde(rename_all = "snake_case")]
348pub struct PythonIR {
349    pub has_all_export: bool,
350    pub is_init_file: bool,
351    pub type_hints_used: bool,
352    pub decorators: Vec<String>,
353    /// Function and method call-sites found in this file (v7+).
354    ///
355    /// Deduplicated by callee name — at most one example per unique callee.
356    /// Hard limit: 500 entries per file.
357    #[serde(default)]
358    pub function_calls: Vec<FunctionCall>,
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn language_display() {
367        assert_eq!(Language::Rust.to_string(), "Rust");
368        assert_eq!(Language::TypeScript.to_string(), "TypeScript");
369        assert_eq!(Language::JavaScript.to_string(), "JavaScript");
370        assert_eq!(Language::Python.to_string(), "Python");
371    }
372
373    #[test]
374    fn language_roundtrip_str() {
375        let langs = [
376            Language::Rust,
377            Language::TypeScript,
378            Language::JavaScript,
379            Language::Python,
380        ];
381        for l in langs {
382            let parsed: Language = l.as_str().parse().unwrap();
383            assert_eq!(parsed, l);
384        }
385    }
386
387    #[test]
388    fn language_parse_unknown() {
389        assert!("go".parse::<Language>().is_err());
390    }
391
392    #[test]
393    fn language_extensions() {
394        assert_eq!(Language::Rust.extensions(), &["rs"]);
395        assert!(Language::TypeScript.extensions().contains(&"tsx"));
396        assert!(Language::JavaScript.extensions().contains(&"mjs"));
397        assert_eq!(Language::Python.extensions(), &["py"]);
398    }
399
400    #[test]
401    fn language_from_extension() {
402        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
403        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
404        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
405        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
406        assert_eq!(Language::from_extension("jsx"), Some(Language::JavaScript));
407        assert_eq!(Language::from_extension("mjs"), Some(Language::JavaScript));
408        assert_eq!(Language::from_extension("cjs"), Some(Language::JavaScript));
409        assert_eq!(Language::from_extension("py"), Some(Language::Python));
410        assert_eq!(Language::from_extension("go"), None);
411        assert_eq!(Language::from_extension(""), None);
412    }
413
414    #[test]
415    fn language_all() {
416        let all = Language::all();
417        assert_eq!(all.len(), 4);
418        assert!(all.contains(&Language::Rust));
419        assert!(all.contains(&Language::TypeScript));
420        assert!(all.contains(&Language::JavaScript));
421        assert!(all.contains(&Language::Python));
422    }
423
424    #[test]
425    fn language_ir_enum_covers_all_languages() {
426        // Verify each variant can be constructed
427        let _rust = LanguageIR::Rust(RustIR::default());
428        let _ts = LanguageIR::TypeScript(TypeScriptIR::default());
429        let _js = LanguageIR::JavaScript(JavaScriptIR::default());
430        let _py = LanguageIR::Python(PythonIR::default());
431    }
432
433    #[test]
434    fn project_file_serialization_roundtrip() {
435        let pf = ProjectFile {
436            path: PathBuf::from("src/main.rs"),
437            language: Language::Rust,
438            content_hash: "abc123".to_owned(),
439            imports: vec![Import {
440                module: "std::io".to_owned(),
441                names: vec!["Read".to_owned()],
442                is_type_only: false,
443                line: 1,
444            }],
445            exports: Vec::new(),
446            functions: vec![Function {
447                name: "main".to_owned(),
448                is_public: false,
449                is_async: false,
450                line: 3,
451                end_line: 5,
452                parameters: vec![],
453                doc_comment: None,
454            }],
455            types: Vec::new(),
456            dependencies_used: Vec::new(),
457            language_ir: LanguageIR::Rust(RustIR::default()),
458            file_doc: None,
459        };
460
461        let json = serde_json::to_string(&pf).expect("serialize");
462        let deserialized: ProjectFile = serde_json::from_str(&json).expect("deserialize");
463        assert_eq!(deserialized.path, pf.path);
464        assert_eq!(deserialized.language, pf.language);
465        assert_eq!(deserialized.content_hash, pf.content_hash);
466        assert_eq!(deserialized.imports.len(), 1);
467        assert_eq!(deserialized.functions.len(), 1);
468    }
469
470    #[test]
471    fn module_system_default_is_unknown() {
472        assert_eq!(ModuleSystem::default(), ModuleSystem::Unknown);
473    }
474}