Skip to main content

depyler_knowledge/
lib.rs

1//! Sovereign Type Database for Python Library Type Extraction
2//!
3//! This crate provides a "Type Truth Database" for Python libraries, enabling
4//! the Depyler transpiler to **know** types instead of **guessing** them.
5//!
6//! # Architecture (The Sovereign Stack)
7//!
8//! 1. **Harvester**: Uses `uv pip install --target` for deterministic package fetching
9//! 2. **Extractor**: Uses `rustpython_parser` for `.pyi` stub parsing
10//! 3. **Database**: Uses Apache Parquet via `arrow` crate for efficient queries
11//!
12//! # Peer-Reviewed Foundation
13//!
14//! - PEP 484 (van Rossum, Lehtosalo, 2014): Type Hints
15//! - PEP 561 (Smith, 2017): Stub Distribution (.pyi format)
16//! - PEP 585 (Langa, 2019): Generic Syntax
17//! - Apache Parquet Spec (2013): Columnar storage format
18//!
19//! # Example
20//!
21//! ```ignore
22//! use depyler_knowledge::{Harvester, Extractor, TypeDatabase};
23//!
24//! // Harvest the requests package
25//! let harvest = Harvester::new("/tmp/harvest")?.fetch("requests")?;
26//!
27//! // Extract type facts from .pyi stubs
28//! let facts = Extractor::new().extract_all(&harvest)?;
29//!
30//! // Store in Parquet database
31//! let db = TypeDatabase::new("types.parquet")?;
32//! db.write(&facts)?;
33//!
34//! // Query: Get signature for requests.get
35//! let sig = db.find_signature("requests", "get");
36//! assert!(sig.unwrap().contains("url: str"));
37//! ```
38
39#[cfg(feature = "parquet-storage")]
40pub mod database;
41pub mod error;
42pub mod extractor;
43pub mod harvester;
44#[cfg(feature = "parquet-storage")]
45pub mod query;
46
47#[cfg(feature = "parquet-storage")]
48pub use database::TypeDatabase;
49pub use error::{KnowledgeError, Result};
50pub use extractor::Extractor;
51pub use harvester::{HarvestResult, Harvester};
52#[cfg(feature = "parquet-storage")]
53pub use query::TypeQuery;
54
55use serde::{Deserialize, Serialize};
56
57/// The kind of symbol extracted from Python stubs.
58#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
59pub enum TypeFactKind {
60    /// A function (top-level or module-level)
61    Function,
62    /// A class definition
63    Class,
64    /// A method within a class
65    Method,
66    /// A class or module attribute
67    Attribute,
68}
69
70impl std::fmt::Display for TypeFactKind {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            Self::Function => write!(f, "function"),
74            Self::Class => write!(f, "class"),
75            Self::Method => write!(f, "method"),
76            Self::Attribute => write!(f, "attribute"),
77        }
78    }
79}
80
81impl std::str::FromStr for TypeFactKind {
82    type Err = KnowledgeError;
83
84    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
85        match s {
86            "function" => Ok(Self::Function),
87            "class" => Ok(Self::Class),
88            "method" => Ok(Self::Method),
89            "attribute" => Ok(Self::Attribute),
90            _ => Err(KnowledgeError::InvalidKind(s.to_string())),
91        }
92    }
93}
94
95/// A single type fact extracted from Python stubs.
96///
97/// This is the core data structure of the Sovereign Type Database.
98/// Each fact represents a symbol (function, class, method, attribute)
99/// with its full type signature.
100///
101/// # Schema Rationale
102///
103/// - `module`: Fully qualified module path (e.g., "requests.api")
104/// - `symbol`: Symbol name (e.g., "get")
105/// - `kind`: Discriminant for symbol type
106/// - `signature`: Full signature string for display/debugging
107/// - `return_type`: Parsed return type for codegen integration
108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub struct TypeFact {
110    /// Fully qualified module path (e.g., "requests.api")
111    pub module: String,
112    /// Symbol name (e.g., "get")
113    pub symbol: String,
114    /// The kind of symbol (function, class, method, attribute)
115    pub kind: TypeFactKind,
116    /// Full signature string (e.g., "(url: str, params: dict = None) -> Response")
117    pub signature: String,
118    /// Return type for functions/methods (e.g., "requests.models.Response")
119    pub return_type: String,
120}
121
122impl TypeFact {
123    /// Create a new TypeFact for a function.
124    pub fn function(module: &str, symbol: &str, signature: &str, return_type: &str) -> Self {
125        Self {
126            module: module.to_string(),
127            symbol: symbol.to_string(),
128            kind: TypeFactKind::Function,
129            signature: signature.to_string(),
130            return_type: return_type.to_string(),
131        }
132    }
133
134    /// Create a new TypeFact for a class.
135    pub fn class(module: &str, symbol: &str) -> Self {
136        Self {
137            module: module.to_string(),
138            symbol: symbol.to_string(),
139            kind: TypeFactKind::Class,
140            signature: String::new(),
141            return_type: format!("{module}.{symbol}"),
142        }
143    }
144
145    /// Create a new TypeFact for a method.
146    pub fn method(
147        module: &str,
148        class: &str,
149        method: &str,
150        signature: &str,
151        return_type: &str,
152    ) -> Self {
153        Self {
154            module: module.to_string(),
155            symbol: format!("{class}.{method}"),
156            kind: TypeFactKind::Method,
157            signature: signature.to_string(),
158            return_type: return_type.to_string(),
159        }
160    }
161
162    /// Get the fully qualified name of this symbol.
163    pub fn fqn(&self) -> String {
164        format!("{}.{}", self.module, self.symbol)
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_type_fact_function() {
174        let fact = TypeFact::function(
175            "requests",
176            "get",
177            "(url: str, **kwargs) -> Response",
178            "requests.models.Response",
179        );
180        assert_eq!(fact.module, "requests");
181        assert_eq!(fact.symbol, "get");
182        assert_eq!(fact.kind, TypeFactKind::Function);
183        assert_eq!(fact.fqn(), "requests.get");
184    }
185
186    #[test]
187    fn test_type_fact_class() {
188        let fact = TypeFact::class("requests.models", "Response");
189        assert_eq!(fact.kind, TypeFactKind::Class);
190        assert_eq!(fact.return_type, "requests.models.Response");
191    }
192
193    #[test]
194    fn test_type_fact_method() {
195        let fact = TypeFact::method(
196            "requests.models",
197            "Response",
198            "json",
199            "(self) -> dict",
200            "dict",
201        );
202        assert_eq!(fact.symbol, "Response.json");
203        assert_eq!(fact.kind, TypeFactKind::Method);
204    }
205
206    #[test]
207    fn test_type_fact_kind_display() {
208        assert_eq!(TypeFactKind::Function.to_string(), "function");
209        assert_eq!(TypeFactKind::Class.to_string(), "class");
210        assert_eq!(TypeFactKind::Method.to_string(), "method");
211        assert_eq!(TypeFactKind::Attribute.to_string(), "attribute");
212    }
213
214    #[test]
215    fn test_type_fact_kind_from_str() {
216        assert_eq!(
217            "function".parse::<TypeFactKind>().unwrap(),
218            TypeFactKind::Function
219        );
220        assert_eq!(
221            "class".parse::<TypeFactKind>().unwrap(),
222            TypeFactKind::Class
223        );
224        assert!("invalid".parse::<TypeFactKind>().is_err());
225    }
226
227    #[test]
228    fn test_type_fact_kind_from_str_method() {
229        assert_eq!(
230            "method".parse::<TypeFactKind>().unwrap(),
231            TypeFactKind::Method
232        );
233    }
234
235    #[test]
236    fn test_type_fact_kind_from_str_attribute() {
237        assert_eq!(
238            "attribute".parse::<TypeFactKind>().unwrap(),
239            TypeFactKind::Attribute
240        );
241    }
242
243    #[test]
244    fn test_type_fact_kind_from_str_invalid_returns_error() {
245        let result = "unknown_kind".parse::<TypeFactKind>();
246        assert!(result.is_err());
247        let err = result.unwrap_err();
248        assert!(err.to_string().contains("unknown_kind"));
249    }
250
251    #[test]
252    fn test_type_fact_kind_from_str_empty_string() {
253        let result = "".parse::<TypeFactKind>();
254        assert!(result.is_err());
255    }
256
257    #[test]
258    fn test_type_fact_kind_from_str_case_sensitive() {
259        // "Function" with capital F should fail
260        let result = "Function".parse::<TypeFactKind>();
261        assert!(result.is_err());
262    }
263
264    #[test]
265    fn test_type_fact_kind_clone_and_eq() {
266        let kind = TypeFactKind::Function;
267        let cloned = kind.clone();
268        assert_eq!(kind, cloned);
269    }
270
271    #[test]
272    fn test_type_fact_kind_debug() {
273        let kind = TypeFactKind::Function;
274        let debug_str = format!("{kind:?}");
275        assert_eq!(debug_str, "Function");
276    }
277
278    #[test]
279    fn test_type_fact_function_fqn() {
280        let fact = TypeFact::function(
281            "os.path",
282            "join",
283            "(path: str, *paths: str) -> str",
284            "str",
285        );
286        assert_eq!(fact.fqn(), "os.path.join");
287    }
288
289    #[test]
290    fn test_type_fact_class_return_type_format() {
291        let fact = TypeFact::class("collections", "OrderedDict");
292        assert_eq!(fact.return_type, "collections.OrderedDict");
293        assert_eq!(fact.signature, "");
294    }
295
296    #[test]
297    fn test_type_fact_method_symbol_format() {
298        let fact = TypeFact::method(
299            "http.client",
300            "HTTPConnection",
301            "request",
302            "(self, method: str, url: str) -> None",
303            "None",
304        );
305        assert_eq!(fact.symbol, "HTTPConnection.request");
306        assert_eq!(fact.fqn(), "http.client.HTTPConnection.request");
307    }
308
309    #[test]
310    fn test_type_fact_clone() {
311        let original = TypeFact::function("mod", "func", "(x: int) -> int", "int");
312        let cloned = original.clone();
313        assert_eq!(original, cloned);
314    }
315
316    #[test]
317    fn test_type_fact_serialization_json() {
318        let fact = TypeFact::function("math", "sqrt", "(x: float) -> float", "float");
319        let json = serde_json::to_string(&fact).unwrap();
320        let deserialized: TypeFact = serde_json::from_str(&json).unwrap();
321        assert_eq!(fact, deserialized);
322    }
323
324    #[test]
325    fn test_type_fact_kind_serialization_json() {
326        let kind = TypeFactKind::Method;
327        let json = serde_json::to_string(&kind).unwrap();
328        let deserialized: TypeFactKind = serde_json::from_str(&json).unwrap();
329        assert_eq!(kind, deserialized);
330    }
331
332    #[test]
333    fn test_type_fact_with_empty_strings() {
334        let fact = TypeFact::function("", "", "", "");
335        assert_eq!(fact.module, "");
336        assert_eq!(fact.symbol, "");
337        assert_eq!(fact.fqn(), ".");
338    }
339
340    #[test]
341    fn test_type_fact_with_unicode_symbol() {
342        let fact = TypeFact::function("mymod", "calc_\u{03c0}", "(n: int) -> float", "float");
343        assert_eq!(fact.symbol, "calc_\u{03c0}");
344        assert_eq!(fact.fqn(), "mymod.calc_\u{03c0}");
345    }
346
347    #[test]
348    fn test_type_fact_method_class_with_dots() {
349        let fact = TypeFact::method(
350            "pkg.submod",
351            "MyClass",
352            "do_thing",
353            "(self) -> None",
354            "None",
355        );
356        assert_eq!(fact.fqn(), "pkg.submod.MyClass.do_thing");
357    }
358
359    // ========================================================================
360    // S9B7: Coverage tests for knowledge lib
361    // ========================================================================
362
363    #[test]
364    fn test_s9b7_type_fact_function_empty_signature() {
365        let fact = TypeFact::function("mod", "func", "", "int");
366        assert_eq!(fact.signature, "");
367        assert_eq!(fact.return_type, "int");
368        assert_eq!(fact.kind, TypeFactKind::Function);
369    }
370
371    #[test]
372    fn test_s9b7_type_fact_class_fqn() {
373        let fact = TypeFact::class("pkg.sub", "Cls");
374        assert_eq!(fact.fqn(), "pkg.sub.Cls");
375    }
376
377    #[test]
378    fn test_s9b7_type_fact_method_return_type() {
379        let fact = TypeFact::method("mod", "Cls", "meth", "(self) -> Vec<i32>", "Vec<i32>");
380        assert_eq!(fact.return_type, "Vec<i32>");
381    }
382
383    #[test]
384    fn test_s9b7_type_fact_kind_ne() {
385        assert_ne!(TypeFactKind::Function, TypeFactKind::Class);
386        assert_ne!(TypeFactKind::Method, TypeFactKind::Attribute);
387        assert_ne!(TypeFactKind::Function, TypeFactKind::Method);
388    }
389
390    #[test]
391    fn test_s9b7_type_fact_ne() {
392        let f1 = TypeFact::function("a", "b", "", "");
393        let f2 = TypeFact::function("a", "c", "", "");
394        assert_ne!(f1, f2);
395    }
396
397    #[test]
398    fn test_s9b7_type_fact_kind_attribute_display() {
399        assert_eq!(TypeFactKind::Attribute.to_string(), "attribute");
400    }
401
402    #[test]
403    fn test_s9b7_type_fact_class_empty_signature() {
404        let fact = TypeFact::class("mod", "Empty");
405        assert_eq!(fact.signature, "");
406    }
407
408    #[test]
409    fn test_s9b7_type_fact_debug() {
410        let fact = TypeFact::function("m", "f", "sig", "ret");
411        let debug = format!("{:?}", fact);
412        assert!(debug.contains("TypeFact"));
413        assert!(debug.contains("Function"));
414    }
415
416    #[test]
417    fn test_type_fact_kind_display_roundtrip() {
418        let kinds = [
419            TypeFactKind::Function,
420            TypeFactKind::Class,
421            TypeFactKind::Method,
422            TypeFactKind::Attribute,
423        ];
424        for kind in &kinds {
425            let display_str = kind.to_string();
426            let parsed: TypeFactKind = display_str.parse().unwrap();
427            assert_eq!(*kind, parsed);
428        }
429    }
430}