depyler_knowledge/
lib.rs

1//! Sovereign Type Database for Python Library Type Extraction
2//!
3//! This crate provides a "Type Truth Database" for Python libraries, enabling
4//! the Depyler transpiler to **know** types instead of **guessing** them.
5//!
6//! # Architecture (The Sovereign Stack)
7//!
8//! 1. **Harvester**: Uses `uv pip install --target` for deterministic package fetching
9//! 2. **Extractor**: Uses `rustpython_parser` for `.pyi` stub parsing
10//! 3. **Database**: Uses Apache Parquet via `arrow` crate for efficient queries
11//!
12//! # Peer-Reviewed Foundation
13//!
14//! - PEP 484 (van Rossum, Lehtosalo, 2014): Type Hints
15//! - PEP 561 (Smith, 2017): Stub Distribution (.pyi format)
16//! - PEP 585 (Langa, 2019): Generic Syntax
17//! - Apache Parquet Spec (2013): Columnar storage format
18//!
19//! # Example
20//!
21//! ```ignore
22//! use depyler_knowledge::{Harvester, Extractor, TypeDatabase};
23//!
24//! // Harvest the requests package
25//! let harvest = Harvester::new("/tmp/harvest")?.fetch("requests")?;
26//!
27//! // Extract type facts from .pyi stubs
28//! let facts = Extractor::new().extract_all(&harvest)?;
29//!
30//! // Store in Parquet database
31//! let db = TypeDatabase::new("types.parquet")?;
32//! db.write(&facts)?;
33//!
34//! // Query: Get signature for requests.get
35//! let sig = db.find_signature("requests", "get");
36//! assert!(sig.unwrap().contains("url: str"));
37//! ```
38
39pub mod database;
40pub mod error;
41pub mod extractor;
42pub mod harvester;
43pub mod query;
44
45pub use database::TypeDatabase;
46pub use error::{KnowledgeError, Result};
47pub use extractor::Extractor;
48pub use harvester::{HarvestResult, Harvester};
49pub use query::TypeQuery;
50
51use serde::{Deserialize, Serialize};
52
53/// The kind of symbol extracted from Python stubs.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub enum TypeFactKind {
56    /// A function (top-level or module-level)
57    Function,
58    /// A class definition
59    Class,
60    /// A method within a class
61    Method,
62    /// A class or module attribute
63    Attribute,
64}
65
66impl std::fmt::Display for TypeFactKind {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            Self::Function => write!(f, "function"),
70            Self::Class => write!(f, "class"),
71            Self::Method => write!(f, "method"),
72            Self::Attribute => write!(f, "attribute"),
73        }
74    }
75}
76
77impl std::str::FromStr for TypeFactKind {
78    type Err = KnowledgeError;
79
80    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
81        match s {
82            "function" => Ok(Self::Function),
83            "class" => Ok(Self::Class),
84            "method" => Ok(Self::Method),
85            "attribute" => Ok(Self::Attribute),
86            _ => Err(KnowledgeError::InvalidKind(s.to_string())),
87        }
88    }
89}
90
91/// A single type fact extracted from Python stubs.
92///
93/// This is the core data structure of the Sovereign Type Database.
94/// Each fact represents a symbol (function, class, method, attribute)
95/// with its full type signature.
96///
97/// # Schema Rationale
98///
99/// - `module`: Fully qualified module path (e.g., "requests.api")
100/// - `symbol`: Symbol name (e.g., "get")
101/// - `kind`: Discriminant for symbol type
102/// - `signature`: Full signature string for display/debugging
103/// - `return_type`: Parsed return type for codegen integration
104#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
105pub struct TypeFact {
106    /// Fully qualified module path (e.g., "requests.api")
107    pub module: String,
108    /// Symbol name (e.g., "get")
109    pub symbol: String,
110    /// The kind of symbol (function, class, method, attribute)
111    pub kind: TypeFactKind,
112    /// Full signature string (e.g., "(url: str, params: dict = None) -> Response")
113    pub signature: String,
114    /// Return type for functions/methods (e.g., "requests.models.Response")
115    pub return_type: String,
116}
117
118impl TypeFact {
119    /// Create a new TypeFact for a function.
120    pub fn function(module: &str, symbol: &str, signature: &str, return_type: &str) -> Self {
121        Self {
122            module: module.to_string(),
123            symbol: symbol.to_string(),
124            kind: TypeFactKind::Function,
125            signature: signature.to_string(),
126            return_type: return_type.to_string(),
127        }
128    }
129
130    /// Create a new TypeFact for a class.
131    pub fn class(module: &str, symbol: &str) -> Self {
132        Self {
133            module: module.to_string(),
134            symbol: symbol.to_string(),
135            kind: TypeFactKind::Class,
136            signature: String::new(),
137            return_type: format!("{module}.{symbol}"),
138        }
139    }
140
141    /// Create a new TypeFact for a method.
142    pub fn method(
143        module: &str,
144        class: &str,
145        method: &str,
146        signature: &str,
147        return_type: &str,
148    ) -> Self {
149        Self {
150            module: module.to_string(),
151            symbol: format!("{class}.{method}"),
152            kind: TypeFactKind::Method,
153            signature: signature.to_string(),
154            return_type: return_type.to_string(),
155        }
156    }
157
158    /// Get the fully qualified name of this symbol.
159    pub fn fqn(&self) -> String {
160        format!("{}.{}", self.module, self.symbol)
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn test_type_fact_function() {
170        let fact = TypeFact::function(
171            "requests",
172            "get",
173            "(url: str, **kwargs) -> Response",
174            "requests.models.Response",
175        );
176        assert_eq!(fact.module, "requests");
177        assert_eq!(fact.symbol, "get");
178        assert_eq!(fact.kind, TypeFactKind::Function);
179        assert_eq!(fact.fqn(), "requests.get");
180    }
181
182    #[test]
183    fn test_type_fact_class() {
184        let fact = TypeFact::class("requests.models", "Response");
185        assert_eq!(fact.kind, TypeFactKind::Class);
186        assert_eq!(fact.return_type, "requests.models.Response");
187    }
188
189    #[test]
190    fn test_type_fact_method() {
191        let fact = TypeFact::method(
192            "requests.models",
193            "Response",
194            "json",
195            "(self) -> dict",
196            "dict",
197        );
198        assert_eq!(fact.symbol, "Response.json");
199        assert_eq!(fact.kind, TypeFactKind::Method);
200    }
201
202    #[test]
203    fn test_type_fact_kind_display() {
204        assert_eq!(TypeFactKind::Function.to_string(), "function");
205        assert_eq!(TypeFactKind::Class.to_string(), "class");
206        assert_eq!(TypeFactKind::Method.to_string(), "method");
207        assert_eq!(TypeFactKind::Attribute.to_string(), "attribute");
208    }
209
210    #[test]
211    fn test_type_fact_kind_from_str() {
212        assert_eq!("function".parse::<TypeFactKind>().unwrap(), TypeFactKind::Function);
213        assert_eq!("class".parse::<TypeFactKind>().unwrap(), TypeFactKind::Class);
214        assert!("invalid".parse::<TypeFactKind>().is_err());
215    }
216}